{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 4502300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.552717499944473e-11, "loss": 4.7837, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.1105434999888946e-10, "loss": 4.7449, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.665815249983342e-10, "loss": 4.762, "step": 1500 }, { "epoch": 0.02, "learning_rate": 2.221086999977789e-10, "loss": 4.7893, "step": 2000 }, { "epoch": 0.03, "learning_rate": 2.7763587499722363e-10, "loss": 4.7779, "step": 2500 }, { "epoch": 0.03, "learning_rate": 3.331630499966684e-10, "loss": 4.7454, "step": 3000 }, { "epoch": 0.04, "learning_rate": 3.886902249961131e-10, "loss": 4.7598, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.442173999955578e-10, "loss": 4.7754, "step": 4000 }, { "epoch": 0.05, "learning_rate": 4.997445749950025e-10, "loss": 4.7607, "step": 4500 }, { "epoch": 0.06, "learning_rate": 5.552717499944473e-10, "loss": 4.7713, "step": 5000 }, { "epoch": 0.06, "learning_rate": 6.10798924993892e-10, "loss": 4.7322, "step": 5500 }, { "epoch": 0.07, "learning_rate": 6.663260999933368e-10, "loss": 4.7939, "step": 6000 }, { "epoch": 0.07, "learning_rate": 7.218532749927815e-10, "loss": 4.7548, "step": 6500 }, { "epoch": 0.08, "learning_rate": 7.773804499922262e-10, "loss": 4.772, "step": 7000 }, { "epoch": 0.08, "learning_rate": 8.32907624991671e-10, "loss": 4.7405, "step": 7500 }, { "epoch": 0.09, "learning_rate": 8.884347999911156e-10, "loss": 4.7392, "step": 8000 }, { "epoch": 0.09, "learning_rate": 9.439619749905603e-10, "loss": 4.7745, "step": 8500 }, { "epoch": 0.1, "learning_rate": 9.99489149990005e-10, "loss": 4.7632, "step": 9000 }, { "epoch": 0.11, "learning_rate": 1.0550163249894497e-09, "loss": 4.7306, "step": 9500 }, { "epoch": 0.11, "learning_rate": 1.1105434999888945e-09, "loss": 4.7724, "step": 10000 }, { "epoch": 0.12, "learning_rate": 1.1660706749883393e-09, "loss": 4.7545, "step": 10500 }, { "epoch": 0.12, "learning_rate": 1.221597849987784e-09, "loss": 4.7522, "step": 11000 }, { "epoch": 0.13, "learning_rate": 1.2771250249872287e-09, "loss": 4.744, "step": 11500 }, { "epoch": 0.13, "learning_rate": 1.3326521999866735e-09, "loss": 4.7259, "step": 12000 }, { "epoch": 0.14, "learning_rate": 1.3881793749861181e-09, "loss": 4.7489, "step": 12500 }, { "epoch": 0.14, "learning_rate": 1.443706549985563e-09, "loss": 4.7674, "step": 13000 }, { "epoch": 0.15, "learning_rate": 1.4992337249850075e-09, "loss": 4.7464, "step": 13500 }, { "epoch": 0.16, "learning_rate": 1.5547608999844523e-09, "loss": 4.7427, "step": 14000 }, { "epoch": 0.16, "learning_rate": 1.6102880749838971e-09, "loss": 4.7386, "step": 14500 }, { "epoch": 0.17, "learning_rate": 1.665815249983342e-09, "loss": 4.7435, "step": 15000 }, { "epoch": 0.17, "learning_rate": 1.7213424249827865e-09, "loss": 4.7479, "step": 15500 }, { "epoch": 0.18, "learning_rate": 1.7768695999822313e-09, "loss": 4.7619, "step": 16000 }, { "epoch": 0.18, "learning_rate": 1.832396774981676e-09, "loss": 4.7526, "step": 16500 }, { "epoch": 0.19, "learning_rate": 1.8879239499811207e-09, "loss": 4.7487, "step": 17000 }, { "epoch": 0.19, "learning_rate": 1.9434511249805653e-09, "loss": 4.7383, "step": 17500 }, { "epoch": 0.2, "learning_rate": 1.99897829998001e-09, "loss": 4.7341, "step": 18000 }, { "epoch": 0.21, "learning_rate": 2.054505474979455e-09, "loss": 4.7364, "step": 18500 }, { "epoch": 0.21, "learning_rate": 2.1100326499788995e-09, "loss": 4.7539, "step": 19000 }, { "epoch": 0.22, "learning_rate": 2.1655598249783445e-09, "loss": 4.7276, "step": 19500 }, { "epoch": 0.22, "learning_rate": 2.221086999977789e-09, "loss": 4.7091, "step": 20000 }, { "epoch": 0.23, "learning_rate": 2.2766141749772337e-09, "loss": 4.7189, "step": 20500 }, { "epoch": 0.23, "learning_rate": 2.3321413499766787e-09, "loss": 4.727, "step": 21000 }, { "epoch": 0.24, "learning_rate": 2.3876685249761233e-09, "loss": 4.7139, "step": 21500 }, { "epoch": 0.24, "learning_rate": 2.443195699975568e-09, "loss": 4.7044, "step": 22000 }, { "epoch": 0.25, "learning_rate": 2.4987228749750124e-09, "loss": 4.7405, "step": 22500 }, { "epoch": 0.26, "learning_rate": 2.5542500499744574e-09, "loss": 4.7065, "step": 23000 }, { "epoch": 0.26, "learning_rate": 2.609777224973902e-09, "loss": 4.7193, "step": 23500 }, { "epoch": 0.27, "learning_rate": 2.665304399973347e-09, "loss": 4.6842, "step": 24000 }, { "epoch": 0.27, "learning_rate": 2.7208315749727912e-09, "loss": 4.7172, "step": 24500 }, { "epoch": 0.28, "learning_rate": 2.7763587499722362e-09, "loss": 4.6902, "step": 25000 }, { "epoch": 0.28, "learning_rate": 2.831885924971681e-09, "loss": 4.6976, "step": 25500 }, { "epoch": 0.29, "learning_rate": 2.887413099971126e-09, "loss": 4.6845, "step": 26000 }, { "epoch": 0.29, "learning_rate": 2.9429402749705704e-09, "loss": 4.6812, "step": 26500 }, { "epoch": 0.3, "learning_rate": 2.998467449970015e-09, "loss": 4.6999, "step": 27000 }, { "epoch": 0.31, "learning_rate": 3.05399462496946e-09, "loss": 4.7025, "step": 27500 }, { "epoch": 0.31, "learning_rate": 3.1095217999689046e-09, "loss": 4.6931, "step": 28000 }, { "epoch": 0.32, "learning_rate": 3.165048974968349e-09, "loss": 4.6684, "step": 28500 }, { "epoch": 0.32, "learning_rate": 3.2205761499677942e-09, "loss": 4.6757, "step": 29000 }, { "epoch": 0.33, "learning_rate": 3.276103324967239e-09, "loss": 4.652, "step": 29500 }, { "epoch": 0.33, "learning_rate": 3.331630499966684e-09, "loss": 4.6497, "step": 30000 }, { "epoch": 0.34, "learning_rate": 3.3871576749661284e-09, "loss": 4.665, "step": 30500 }, { "epoch": 0.34, "learning_rate": 3.442684849965573e-09, "loss": 4.6444, "step": 31000 }, { "epoch": 0.35, "learning_rate": 3.498212024965018e-09, "loss": 4.6672, "step": 31500 }, { "epoch": 0.36, "learning_rate": 3.5537391999644626e-09, "loss": 4.6469, "step": 32000 }, { "epoch": 0.36, "learning_rate": 3.6092663749639068e-09, "loss": 4.6385, "step": 32500 }, { "epoch": 0.37, "learning_rate": 3.664793549963352e-09, "loss": 4.6352, "step": 33000 }, { "epoch": 0.37, "learning_rate": 3.7203207249627964e-09, "loss": 4.6436, "step": 33500 }, { "epoch": 0.38, "learning_rate": 3.775847899962241e-09, "loss": 4.6293, "step": 34000 }, { "epoch": 0.38, "learning_rate": 3.831375074961686e-09, "loss": 4.6468, "step": 34500 }, { "epoch": 0.39, "learning_rate": 3.8869022499611306e-09, "loss": 4.6267, "step": 35000 }, { "epoch": 0.39, "learning_rate": 3.9424294249605756e-09, "loss": 4.6408, "step": 35500 }, { "epoch": 0.4, "learning_rate": 3.99795659996002e-09, "loss": 4.629, "step": 36000 }, { "epoch": 0.41, "learning_rate": 4.0534837749594656e-09, "loss": 4.6255, "step": 36500 }, { "epoch": 0.41, "learning_rate": 4.10901094995891e-09, "loss": 4.6302, "step": 37000 }, { "epoch": 0.42, "learning_rate": 4.164538124958354e-09, "loss": 4.5945, "step": 37500 }, { "epoch": 0.42, "learning_rate": 4.220065299957799e-09, "loss": 4.6059, "step": 38000 }, { "epoch": 0.43, "learning_rate": 4.275592474957244e-09, "loss": 4.5908, "step": 38500 }, { "epoch": 0.43, "learning_rate": 4.331119649956689e-09, "loss": 4.5667, "step": 39000 }, { "epoch": 0.44, "learning_rate": 4.386646824956133e-09, "loss": 4.5727, "step": 39500 }, { "epoch": 0.44, "learning_rate": 4.442173999955578e-09, "loss": 4.5981, "step": 40000 }, { "epoch": 0.45, "learning_rate": 4.497701174955023e-09, "loss": 4.5854, "step": 40500 }, { "epoch": 0.46, "learning_rate": 4.553228349954467e-09, "loss": 4.583, "step": 41000 }, { "epoch": 0.46, "learning_rate": 4.608755524953912e-09, "loss": 4.5917, "step": 41500 }, { "epoch": 0.47, "learning_rate": 4.664282699953357e-09, "loss": 4.5816, "step": 42000 }, { "epoch": 0.47, "learning_rate": 4.7198098749528015e-09, "loss": 4.5827, "step": 42500 }, { "epoch": 0.48, "learning_rate": 4.7753370499522465e-09, "loss": 4.5674, "step": 43000 }, { "epoch": 0.48, "learning_rate": 4.830864224951691e-09, "loss": 4.5866, "step": 43500 }, { "epoch": 0.49, "learning_rate": 4.886391399951136e-09, "loss": 4.5659, "step": 44000 }, { "epoch": 0.49, "learning_rate": 4.941918574950581e-09, "loss": 4.553, "step": 44500 }, { "epoch": 0.5, "learning_rate": 4.997445749950025e-09, "loss": 4.5593, "step": 45000 }, { "epoch": 0.51, "learning_rate": 5.05297292494947e-09, "loss": 4.5629, "step": 45500 }, { "epoch": 0.51, "learning_rate": 5.108500099948915e-09, "loss": 4.5519, "step": 46000 }, { "epoch": 0.52, "learning_rate": 5.164027274948359e-09, "loss": 4.5401, "step": 46500 }, { "epoch": 0.52, "learning_rate": 5.219554449947804e-09, "loss": 4.5342, "step": 47000 }, { "epoch": 0.53, "learning_rate": 5.275081624947249e-09, "loss": 4.5381, "step": 47500 }, { "epoch": 0.53, "learning_rate": 5.330608799946694e-09, "loss": 4.5256, "step": 48000 }, { "epoch": 0.54, "learning_rate": 5.386135974946138e-09, "loss": 4.5267, "step": 48500 }, { "epoch": 0.54, "learning_rate": 5.4416631499455825e-09, "loss": 4.543, "step": 49000 }, { "epoch": 0.55, "learning_rate": 5.497190324945028e-09, "loss": 4.5319, "step": 49500 }, { "epoch": 0.56, "learning_rate": 5.5527174999444725e-09, "loss": 4.5299, "step": 50000 }, { "epoch": 0.56, "learning_rate": 5.6082446749439175e-09, "loss": 4.5117, "step": 50500 }, { "epoch": 0.57, "learning_rate": 5.663771849943362e-09, "loss": 4.5291, "step": 51000 }, { "epoch": 0.57, "learning_rate": 5.719299024942807e-09, "loss": 4.5038, "step": 51500 }, { "epoch": 0.58, "learning_rate": 5.774826199942252e-09, "loss": 4.5173, "step": 52000 }, { "epoch": 0.58, "learning_rate": 5.830353374941696e-09, "loss": 4.5036, "step": 52500 }, { "epoch": 0.59, "learning_rate": 5.885880549941141e-09, "loss": 4.5334, "step": 53000 }, { "epoch": 0.59, "learning_rate": 5.941407724940586e-09, "loss": 4.4966, "step": 53500 }, { "epoch": 0.6, "learning_rate": 5.99693489994003e-09, "loss": 4.4683, "step": 54000 }, { "epoch": 0.61, "learning_rate": 6.052462074939475e-09, "loss": 4.5084, "step": 54500 }, { "epoch": 0.61, "learning_rate": 6.10798924993892e-09, "loss": 4.4961, "step": 55000 }, { "epoch": 0.62, "learning_rate": 6.163516424938364e-09, "loss": 4.511, "step": 55500 }, { "epoch": 0.62, "learning_rate": 6.219043599937809e-09, "loss": 4.4621, "step": 56000 }, { "epoch": 0.63, "learning_rate": 6.274570774937254e-09, "loss": 4.488, "step": 56500 }, { "epoch": 0.63, "learning_rate": 6.330097949936698e-09, "loss": 4.4761, "step": 57000 }, { "epoch": 0.64, "learning_rate": 6.3856251249361434e-09, "loss": 4.487, "step": 57500 }, { "epoch": 0.64, "learning_rate": 6.4411522999355884e-09, "loss": 4.4958, "step": 58000 }, { "epoch": 0.65, "learning_rate": 6.496679474935033e-09, "loss": 4.4825, "step": 58500 }, { "epoch": 0.66, "learning_rate": 6.552206649934478e-09, "loss": 4.4856, "step": 59000 }, { "epoch": 0.66, "learning_rate": 6.607733824933923e-09, "loss": 4.4724, "step": 59500 }, { "epoch": 0.67, "learning_rate": 6.663260999933368e-09, "loss": 4.4476, "step": 60000 }, { "epoch": 0.67, "learning_rate": 6.718788174932812e-09, "loss": 4.4582, "step": 60500 }, { "epoch": 0.68, "learning_rate": 6.774315349932257e-09, "loss": 4.4804, "step": 61000 }, { "epoch": 0.68, "learning_rate": 6.829842524931702e-09, "loss": 4.4752, "step": 61500 }, { "epoch": 0.69, "learning_rate": 6.885369699931146e-09, "loss": 4.4499, "step": 62000 }, { "epoch": 0.69, "learning_rate": 6.940896874930591e-09, "loss": 4.4592, "step": 62500 }, { "epoch": 0.7, "learning_rate": 6.996424049930036e-09, "loss": 4.4546, "step": 63000 }, { "epoch": 0.71, "learning_rate": 7.051951224929479e-09, "loss": 4.4364, "step": 63500 }, { "epoch": 0.71, "learning_rate": 7.107478399928925e-09, "loss": 4.4465, "step": 64000 }, { "epoch": 0.72, "learning_rate": 7.16300557492837e-09, "loss": 4.4404, "step": 64500 }, { "epoch": 0.72, "learning_rate": 7.2185327499278135e-09, "loss": 4.4519, "step": 65000 }, { "epoch": 0.73, "learning_rate": 7.2740599249272585e-09, "loss": 4.4402, "step": 65500 }, { "epoch": 0.73, "learning_rate": 7.329587099926704e-09, "loss": 4.4382, "step": 66000 }, { "epoch": 0.74, "learning_rate": 7.385114274926149e-09, "loss": 4.4386, "step": 66500 }, { "epoch": 0.74, "learning_rate": 7.440641449925593e-09, "loss": 4.4537, "step": 67000 }, { "epoch": 0.75, "learning_rate": 7.496168624925039e-09, "loss": 4.4171, "step": 67500 }, { "epoch": 0.76, "learning_rate": 7.551695799924483e-09, "loss": 4.425, "step": 68000 }, { "epoch": 0.76, "learning_rate": 7.607222974923927e-09, "loss": 4.4289, "step": 68500 }, { "epoch": 0.77, "learning_rate": 7.662750149923373e-09, "loss": 4.4365, "step": 69000 }, { "epoch": 0.77, "learning_rate": 7.718277324922817e-09, "loss": 4.4359, "step": 69500 }, { "epoch": 0.78, "learning_rate": 7.773804499922261e-09, "loss": 4.4229, "step": 70000 }, { "epoch": 0.78, "learning_rate": 7.829331674921707e-09, "loss": 4.4428, "step": 70500 }, { "epoch": 0.79, "learning_rate": 7.884858849921151e-09, "loss": 4.4071, "step": 71000 }, { "epoch": 0.79, "learning_rate": 7.940386024920597e-09, "loss": 4.419, "step": 71500 }, { "epoch": 0.8, "learning_rate": 7.99591319992004e-09, "loss": 4.4022, "step": 72000 }, { "epoch": 0.81, "learning_rate": 8.051440374919485e-09, "loss": 4.4226, "step": 72500 }, { "epoch": 0.81, "learning_rate": 8.106967549918931e-09, "loss": 4.4109, "step": 73000 }, { "epoch": 0.82, "learning_rate": 8.162494724918374e-09, "loss": 4.413, "step": 73500 }, { "epoch": 0.82, "learning_rate": 8.21802189991782e-09, "loss": 4.4194, "step": 74000 }, { "epoch": 0.83, "learning_rate": 8.273549074917265e-09, "loss": 4.3956, "step": 74500 }, { "epoch": 0.83, "learning_rate": 8.329076249916708e-09, "loss": 4.4074, "step": 75000 }, { "epoch": 0.84, "learning_rate": 8.384603424916154e-09, "loss": 4.4119, "step": 75500 }, { "epoch": 0.84, "learning_rate": 8.440130599915598e-09, "loss": 4.3935, "step": 76000 }, { "epoch": 0.85, "learning_rate": 8.495657774915042e-09, "loss": 4.4161, "step": 76500 }, { "epoch": 0.86, "learning_rate": 8.551184949914488e-09, "loss": 4.3867, "step": 77000 }, { "epoch": 0.86, "learning_rate": 8.606712124913932e-09, "loss": 4.425, "step": 77500 }, { "epoch": 0.87, "learning_rate": 8.662239299913378e-09, "loss": 4.3904, "step": 78000 }, { "epoch": 0.87, "learning_rate": 8.717766474912822e-09, "loss": 4.3925, "step": 78500 }, { "epoch": 0.88, "learning_rate": 8.773293649912266e-09, "loss": 4.3798, "step": 79000 }, { "epoch": 0.88, "learning_rate": 8.828820824911712e-09, "loss": 4.3841, "step": 79500 }, { "epoch": 0.89, "learning_rate": 8.884347999911156e-09, "loss": 4.3653, "step": 80000 }, { "epoch": 0.89, "learning_rate": 8.9398751749106e-09, "loss": 4.3873, "step": 80500 }, { "epoch": 0.9, "learning_rate": 8.995402349910046e-09, "loss": 4.3724, "step": 81000 }, { "epoch": 0.91, "learning_rate": 9.05092952490949e-09, "loss": 4.3679, "step": 81500 }, { "epoch": 0.91, "learning_rate": 9.106456699908935e-09, "loss": 4.3533, "step": 82000 }, { "epoch": 0.92, "learning_rate": 9.16198387490838e-09, "loss": 4.3635, "step": 82500 }, { "epoch": 0.92, "learning_rate": 9.217511049907825e-09, "loss": 4.361, "step": 83000 }, { "epoch": 0.93, "learning_rate": 9.273038224907269e-09, "loss": 4.3852, "step": 83500 }, { "epoch": 0.93, "learning_rate": 9.328565399906715e-09, "loss": 4.386, "step": 84000 }, { "epoch": 0.94, "learning_rate": 9.384092574906159e-09, "loss": 4.3636, "step": 84500 }, { "epoch": 0.94, "learning_rate": 9.439619749905603e-09, "loss": 4.3734, "step": 85000 }, { "epoch": 0.95, "learning_rate": 9.495146924905049e-09, "loss": 4.3527, "step": 85500 }, { "epoch": 0.96, "learning_rate": 9.550674099904493e-09, "loss": 4.3573, "step": 86000 }, { "epoch": 0.96, "learning_rate": 9.606201274903937e-09, "loss": 4.3753, "step": 86500 }, { "epoch": 0.97, "learning_rate": 9.661728449903381e-09, "loss": 4.3663, "step": 87000 }, { "epoch": 0.97, "learning_rate": 9.717255624902827e-09, "loss": 4.3617, "step": 87500 }, { "epoch": 0.98, "learning_rate": 9.772782799902271e-09, "loss": 4.3662, "step": 88000 }, { "epoch": 0.98, "learning_rate": 9.828309974901716e-09, "loss": 4.3652, "step": 88500 }, { "epoch": 0.99, "learning_rate": 9.883837149901161e-09, "loss": 4.3617, "step": 89000 }, { "epoch": 0.99, "learning_rate": 9.939364324900607e-09, "loss": 4.351, "step": 89500 }, { "epoch": 1.0, "learning_rate": 9.99489149990005e-09, "loss": 4.3559, "step": 90000 }, { "epoch": 1.0, "eval_loss": 4.290574073791504, "eval_runtime": 6.3081, "eval_samples_per_second": 246.35, "step": 90046 }, { "epoch": 1.01, "learning_rate": 1.0050418674899496e-08, "loss": 4.357, "step": 90500 }, { "epoch": 1.01, "learning_rate": 1.010594584989894e-08, "loss": 4.324, "step": 91000 }, { "epoch": 1.02, "learning_rate": 1.0161473024898384e-08, "loss": 4.34, "step": 91500 }, { "epoch": 1.02, "learning_rate": 1.021700019989783e-08, "loss": 4.3296, "step": 92000 }, { "epoch": 1.03, "learning_rate": 1.0272527374897274e-08, "loss": 4.3347, "step": 92500 }, { "epoch": 1.03, "learning_rate": 1.0328054549896718e-08, "loss": 4.328, "step": 93000 }, { "epoch": 1.04, "learning_rate": 1.0383581724896164e-08, "loss": 4.337, "step": 93500 }, { "epoch": 1.04, "learning_rate": 1.0439108899895608e-08, "loss": 4.3266, "step": 94000 }, { "epoch": 1.05, "learning_rate": 1.0494636074895054e-08, "loss": 4.3219, "step": 94500 }, { "epoch": 1.06, "learning_rate": 1.0550163249894498e-08, "loss": 4.3348, "step": 95000 }, { "epoch": 1.06, "learning_rate": 1.0605690424893942e-08, "loss": 4.3462, "step": 95500 }, { "epoch": 1.07, "learning_rate": 1.0661217599893388e-08, "loss": 4.3301, "step": 96000 }, { "epoch": 1.07, "learning_rate": 1.0716744774892832e-08, "loss": 4.3296, "step": 96500 }, { "epoch": 1.08, "learning_rate": 1.0772271949892277e-08, "loss": 4.3241, "step": 97000 }, { "epoch": 1.08, "learning_rate": 1.0827799124891722e-08, "loss": 4.3226, "step": 97500 }, { "epoch": 1.09, "learning_rate": 1.0883326299891165e-08, "loss": 4.3247, "step": 98000 }, { "epoch": 1.09, "learning_rate": 1.093885347489061e-08, "loss": 4.3166, "step": 98500 }, { "epoch": 1.1, "learning_rate": 1.0994380649890057e-08, "loss": 4.3325, "step": 99000 }, { "epoch": 1.1, "learning_rate": 1.1049907824889499e-08, "loss": 4.3388, "step": 99500 }, { "epoch": 1.11, "learning_rate": 1.1105434999888945e-08, "loss": 4.3162, "step": 100000 }, { "epoch": 1.12, "learning_rate": 1.116096217488839e-08, "loss": 4.3019, "step": 100500 }, { "epoch": 1.12, "learning_rate": 1.1216489349887835e-08, "loss": 4.3087, "step": 101000 }, { "epoch": 1.13, "learning_rate": 1.1272016524887279e-08, "loss": 4.3023, "step": 101500 }, { "epoch": 1.13, "learning_rate": 1.1327543699886723e-08, "loss": 4.3206, "step": 102000 }, { "epoch": 1.14, "learning_rate": 1.1383070874886169e-08, "loss": 4.2908, "step": 102500 }, { "epoch": 1.14, "learning_rate": 1.1438598049885613e-08, "loss": 4.3311, "step": 103000 }, { "epoch": 1.15, "learning_rate": 1.1494125224885057e-08, "loss": 4.3057, "step": 103500 }, { "epoch": 1.15, "learning_rate": 1.1549652399884503e-08, "loss": 4.305, "step": 104000 }, { "epoch": 1.16, "learning_rate": 1.1605179574883947e-08, "loss": 4.2982, "step": 104500 }, { "epoch": 1.17, "learning_rate": 1.1660706749883392e-08, "loss": 4.2979, "step": 105000 }, { "epoch": 1.17, "learning_rate": 1.1716233924882838e-08, "loss": 4.2965, "step": 105500 }, { "epoch": 1.18, "learning_rate": 1.1771761099882282e-08, "loss": 4.3031, "step": 106000 }, { "epoch": 1.18, "learning_rate": 1.1827288274881726e-08, "loss": 4.2963, "step": 106500 }, { "epoch": 1.19, "learning_rate": 1.1882815449881172e-08, "loss": 4.2962, "step": 107000 }, { "epoch": 1.19, "learning_rate": 1.1938342624880616e-08, "loss": 4.3185, "step": 107500 }, { "epoch": 1.2, "learning_rate": 1.199386979988006e-08, "loss": 4.281, "step": 108000 }, { "epoch": 1.2, "learning_rate": 1.2049396974879506e-08, "loss": 4.2945, "step": 108500 }, { "epoch": 1.21, "learning_rate": 1.210492414987895e-08, "loss": 4.2983, "step": 109000 }, { "epoch": 1.22, "learning_rate": 1.2160451324878394e-08, "loss": 4.3044, "step": 109500 }, { "epoch": 1.22, "learning_rate": 1.221597849987784e-08, "loss": 4.2755, "step": 110000 }, { "epoch": 1.23, "learning_rate": 1.2271505674877284e-08, "loss": 4.2836, "step": 110500 }, { "epoch": 1.23, "learning_rate": 1.2327032849876728e-08, "loss": 4.2827, "step": 111000 }, { "epoch": 1.24, "learning_rate": 1.2382560024876174e-08, "loss": 4.2843, "step": 111500 }, { "epoch": 1.24, "learning_rate": 1.2438087199875618e-08, "loss": 4.2762, "step": 112000 }, { "epoch": 1.25, "learning_rate": 1.2493614374875064e-08, "loss": 4.2847, "step": 112500 }, { "epoch": 1.25, "learning_rate": 1.2549141549874508e-08, "loss": 4.2741, "step": 113000 }, { "epoch": 1.26, "learning_rate": 1.2604668724873951e-08, "loss": 4.2848, "step": 113500 }, { "epoch": 1.27, "learning_rate": 1.2660195899873397e-08, "loss": 4.257, "step": 114000 }, { "epoch": 1.27, "learning_rate": 1.2715723074872841e-08, "loss": 4.2695, "step": 114500 }, { "epoch": 1.28, "learning_rate": 1.2771250249872287e-08, "loss": 4.2666, "step": 115000 }, { "epoch": 1.28, "learning_rate": 1.2826777424871733e-08, "loss": 4.2889, "step": 115500 }, { "epoch": 1.29, "learning_rate": 1.2882304599871177e-08, "loss": 4.2677, "step": 116000 }, { "epoch": 1.29, "learning_rate": 1.2937831774870623e-08, "loss": 4.2612, "step": 116500 }, { "epoch": 1.3, "learning_rate": 1.2993358949870065e-08, "loss": 4.2597, "step": 117000 }, { "epoch": 1.3, "learning_rate": 1.304888612486951e-08, "loss": 4.2624, "step": 117500 }, { "epoch": 1.31, "learning_rate": 1.3104413299868955e-08, "loss": 4.2755, "step": 118000 }, { "epoch": 1.32, "learning_rate": 1.31599404748684e-08, "loss": 4.2701, "step": 118500 }, { "epoch": 1.32, "learning_rate": 1.3215467649867845e-08, "loss": 4.2737, "step": 119000 }, { "epoch": 1.33, "learning_rate": 1.3270994824867291e-08, "loss": 4.2509, "step": 119500 }, { "epoch": 1.33, "learning_rate": 1.3326521999866735e-08, "loss": 4.2591, "step": 120000 }, { "epoch": 1.34, "learning_rate": 1.3382049174866178e-08, "loss": 4.2735, "step": 120500 }, { "epoch": 1.34, "learning_rate": 1.3437576349865624e-08, "loss": 4.2559, "step": 121000 }, { "epoch": 1.35, "learning_rate": 1.3493103524865068e-08, "loss": 4.2355, "step": 121500 }, { "epoch": 1.35, "learning_rate": 1.3548630699864514e-08, "loss": 4.2611, "step": 122000 }, { "epoch": 1.36, "learning_rate": 1.3604157874863958e-08, "loss": 4.2562, "step": 122500 }, { "epoch": 1.37, "learning_rate": 1.3659685049863404e-08, "loss": 4.2578, "step": 123000 }, { "epoch": 1.37, "learning_rate": 1.3715212224862846e-08, "loss": 4.2806, "step": 123500 }, { "epoch": 1.38, "learning_rate": 1.3770739399862292e-08, "loss": 4.2339, "step": 124000 }, { "epoch": 1.38, "learning_rate": 1.3826266574861736e-08, "loss": 4.2419, "step": 124500 }, { "epoch": 1.39, "learning_rate": 1.3881793749861182e-08, "loss": 4.2429, "step": 125000 }, { "epoch": 1.39, "learning_rate": 1.3937320924860626e-08, "loss": 4.2233, "step": 125500 }, { "epoch": 1.4, "learning_rate": 1.3992848099860072e-08, "loss": 4.2532, "step": 126000 }, { "epoch": 1.4, "learning_rate": 1.4048375274859516e-08, "loss": 4.2413, "step": 126500 }, { "epoch": 1.41, "learning_rate": 1.4103902449858959e-08, "loss": 4.2578, "step": 127000 }, { "epoch": 1.42, "learning_rate": 1.4159429624858405e-08, "loss": 4.2444, "step": 127500 }, { "epoch": 1.42, "learning_rate": 1.421495679985785e-08, "loss": 4.2482, "step": 128000 }, { "epoch": 1.43, "learning_rate": 1.4270483974857295e-08, "loss": 4.2213, "step": 128500 }, { "epoch": 1.43, "learning_rate": 1.432601114985674e-08, "loss": 4.2461, "step": 129000 }, { "epoch": 1.44, "learning_rate": 1.4381538324856185e-08, "loss": 4.2304, "step": 129500 }, { "epoch": 1.44, "learning_rate": 1.4437065499855627e-08, "loss": 4.2312, "step": 130000 }, { "epoch": 1.45, "learning_rate": 1.4492592674855073e-08, "loss": 4.2371, "step": 130500 }, { "epoch": 1.45, "learning_rate": 1.4548119849854517e-08, "loss": 4.225, "step": 131000 }, { "epoch": 1.46, "learning_rate": 1.4603647024853963e-08, "loss": 4.2405, "step": 131500 }, { "epoch": 1.47, "learning_rate": 1.4659174199853409e-08, "loss": 4.2358, "step": 132000 }, { "epoch": 1.47, "learning_rate": 1.4714701374852853e-08, "loss": 4.2158, "step": 132500 }, { "epoch": 1.48, "learning_rate": 1.4770228549852299e-08, "loss": 4.2217, "step": 133000 }, { "epoch": 1.48, "learning_rate": 1.4825755724851741e-08, "loss": 4.2346, "step": 133500 }, { "epoch": 1.49, "learning_rate": 1.4881282899851185e-08, "loss": 4.2364, "step": 134000 }, { "epoch": 1.49, "learning_rate": 1.493681007485063e-08, "loss": 4.2242, "step": 134500 }, { "epoch": 1.5, "learning_rate": 1.4992337249850077e-08, "loss": 4.2359, "step": 135000 }, { "epoch": 1.5, "learning_rate": 1.504786442484952e-08, "loss": 4.2528, "step": 135500 }, { "epoch": 1.51, "learning_rate": 1.5103391599848965e-08, "loss": 4.2199, "step": 136000 }, { "epoch": 1.52, "learning_rate": 1.515891877484841e-08, "loss": 4.2272, "step": 136500 }, { "epoch": 1.52, "learning_rate": 1.5214445949847854e-08, "loss": 4.217, "step": 137000 }, { "epoch": 1.53, "learning_rate": 1.5269973124847298e-08, "loss": 4.2113, "step": 137500 }, { "epoch": 1.53, "learning_rate": 1.5325500299846746e-08, "loss": 4.2396, "step": 138000 }, { "epoch": 1.54, "learning_rate": 1.538102747484619e-08, "loss": 4.2435, "step": 138500 }, { "epoch": 1.54, "learning_rate": 1.5436554649845634e-08, "loss": 4.2247, "step": 139000 }, { "epoch": 1.55, "learning_rate": 1.5492081824845078e-08, "loss": 4.2265, "step": 139500 }, { "epoch": 1.55, "learning_rate": 1.5547608999844522e-08, "loss": 4.2093, "step": 140000 }, { "epoch": 1.56, "learning_rate": 1.5603136174843966e-08, "loss": 4.2299, "step": 140500 }, { "epoch": 1.57, "learning_rate": 1.5658663349843414e-08, "loss": 4.2198, "step": 141000 }, { "epoch": 1.57, "learning_rate": 1.5714190524842858e-08, "loss": 4.1949, "step": 141500 }, { "epoch": 1.58, "learning_rate": 1.5769717699842302e-08, "loss": 4.2192, "step": 142000 }, { "epoch": 1.58, "learning_rate": 1.5825244874841746e-08, "loss": 4.2192, "step": 142500 }, { "epoch": 1.59, "learning_rate": 1.5880772049841194e-08, "loss": 4.2136, "step": 143000 }, { "epoch": 1.59, "learning_rate": 1.5936299224840635e-08, "loss": 4.2102, "step": 143500 }, { "epoch": 1.6, "learning_rate": 1.599182639984008e-08, "loss": 4.2302, "step": 144000 }, { "epoch": 1.6, "learning_rate": 1.6047353574839526e-08, "loss": 4.2161, "step": 144500 }, { "epoch": 1.61, "learning_rate": 1.610288074983897e-08, "loss": 4.2064, "step": 145000 }, { "epoch": 1.62, "learning_rate": 1.6158407924838415e-08, "loss": 4.217, "step": 145500 }, { "epoch": 1.62, "learning_rate": 1.6213935099837862e-08, "loss": 4.2203, "step": 146000 }, { "epoch": 1.63, "learning_rate": 1.6269462274837303e-08, "loss": 4.1943, "step": 146500 }, { "epoch": 1.63, "learning_rate": 1.6324989449836747e-08, "loss": 4.2022, "step": 147000 }, { "epoch": 1.64, "learning_rate": 1.6380516624836195e-08, "loss": 4.2151, "step": 147500 }, { "epoch": 1.64, "learning_rate": 1.643604379983564e-08, "loss": 4.2117, "step": 148000 }, { "epoch": 1.65, "learning_rate": 1.6491570974835083e-08, "loss": 4.2076, "step": 148500 }, { "epoch": 1.65, "learning_rate": 1.654709814983453e-08, "loss": 4.1816, "step": 149000 }, { "epoch": 1.66, "learning_rate": 1.6602625324833975e-08, "loss": 4.2007, "step": 149500 }, { "epoch": 1.67, "learning_rate": 1.6658152499833416e-08, "loss": 4.1998, "step": 150000 }, { "epoch": 1.67, "learning_rate": 1.6713679674832863e-08, "loss": 4.1838, "step": 150500 }, { "epoch": 1.68, "learning_rate": 1.6769206849832307e-08, "loss": 4.2061, "step": 151000 }, { "epoch": 1.68, "learning_rate": 1.682473402483175e-08, "loss": 4.1925, "step": 151500 }, { "epoch": 1.69, "learning_rate": 1.6880261199831196e-08, "loss": 4.2042, "step": 152000 }, { "epoch": 1.69, "learning_rate": 1.6935788374830643e-08, "loss": 4.1919, "step": 152500 }, { "epoch": 1.7, "learning_rate": 1.6991315549830084e-08, "loss": 4.1963, "step": 153000 }, { "epoch": 1.7, "learning_rate": 1.7046842724829528e-08, "loss": 4.2059, "step": 153500 }, { "epoch": 1.71, "learning_rate": 1.7102369899828976e-08, "loss": 4.1924, "step": 154000 }, { "epoch": 1.72, "learning_rate": 1.715789707482842e-08, "loss": 4.1929, "step": 154500 }, { "epoch": 1.72, "learning_rate": 1.7213424249827864e-08, "loss": 4.1895, "step": 155000 }, { "epoch": 1.73, "learning_rate": 1.726895142482731e-08, "loss": 4.1929, "step": 155500 }, { "epoch": 1.73, "learning_rate": 1.7324478599826756e-08, "loss": 4.195, "step": 156000 }, { "epoch": 1.74, "learning_rate": 1.7380005774826197e-08, "loss": 4.1959, "step": 156500 }, { "epoch": 1.74, "learning_rate": 1.7435532949825644e-08, "loss": 4.1842, "step": 157000 }, { "epoch": 1.75, "learning_rate": 1.7491060124825088e-08, "loss": 4.1645, "step": 157500 }, { "epoch": 1.75, "learning_rate": 1.7546587299824533e-08, "loss": 4.1997, "step": 158000 }, { "epoch": 1.76, "learning_rate": 1.760211447482398e-08, "loss": 4.1824, "step": 158500 }, { "epoch": 1.77, "learning_rate": 1.7657641649823424e-08, "loss": 4.2048, "step": 159000 }, { "epoch": 1.77, "learning_rate": 1.771316882482287e-08, "loss": 4.1896, "step": 159500 }, { "epoch": 1.78, "learning_rate": 1.7768695999822313e-08, "loss": 4.168, "step": 160000 }, { "epoch": 1.78, "learning_rate": 1.7824223174821757e-08, "loss": 4.1701, "step": 160500 }, { "epoch": 1.79, "learning_rate": 1.78797503498212e-08, "loss": 4.1858, "step": 161000 }, { "epoch": 1.79, "learning_rate": 1.7935277524820645e-08, "loss": 4.162, "step": 161500 }, { "epoch": 1.8, "learning_rate": 1.7990804699820093e-08, "loss": 4.1677, "step": 162000 }, { "epoch": 1.8, "learning_rate": 1.8046331874819537e-08, "loss": 4.1765, "step": 162500 }, { "epoch": 1.81, "learning_rate": 1.810185904981898e-08, "loss": 4.1811, "step": 163000 }, { "epoch": 1.82, "learning_rate": 1.8157386224818425e-08, "loss": 4.184, "step": 163500 }, { "epoch": 1.82, "learning_rate": 1.821291339981787e-08, "loss": 4.1738, "step": 164000 }, { "epoch": 1.83, "learning_rate": 1.8268440574817313e-08, "loss": 4.1816, "step": 164500 }, { "epoch": 1.83, "learning_rate": 1.832396774981676e-08, "loss": 4.1815, "step": 165000 }, { "epoch": 1.84, "learning_rate": 1.8379494924816205e-08, "loss": 4.1761, "step": 165500 }, { "epoch": 1.84, "learning_rate": 1.843502209981565e-08, "loss": 4.1894, "step": 166000 }, { "epoch": 1.85, "learning_rate": 1.8490549274815093e-08, "loss": 4.1747, "step": 166500 }, { "epoch": 1.85, "learning_rate": 1.8546076449814538e-08, "loss": 4.1831, "step": 167000 }, { "epoch": 1.86, "learning_rate": 1.8601603624813982e-08, "loss": 4.1821, "step": 167500 }, { "epoch": 1.87, "learning_rate": 1.865713079981343e-08, "loss": 4.1538, "step": 168000 }, { "epoch": 1.87, "learning_rate": 1.8712657974812874e-08, "loss": 4.171, "step": 168500 }, { "epoch": 1.88, "learning_rate": 1.8768185149812318e-08, "loss": 4.1832, "step": 169000 }, { "epoch": 1.88, "learning_rate": 1.8823712324811762e-08, "loss": 4.1354, "step": 169500 }, { "epoch": 1.89, "learning_rate": 1.8879239499811206e-08, "loss": 4.1606, "step": 170000 }, { "epoch": 1.89, "learning_rate": 1.893476667481065e-08, "loss": 4.1688, "step": 170500 }, { "epoch": 1.9, "learning_rate": 1.8990293849810098e-08, "loss": 4.192, "step": 171000 }, { "epoch": 1.9, "learning_rate": 1.9045821024809542e-08, "loss": 4.1674, "step": 171500 }, { "epoch": 1.91, "learning_rate": 1.9101348199808986e-08, "loss": 4.175, "step": 172000 }, { "epoch": 1.92, "learning_rate": 1.915687537480843e-08, "loss": 4.1618, "step": 172500 }, { "epoch": 1.92, "learning_rate": 1.9212402549807874e-08, "loss": 4.1692, "step": 173000 }, { "epoch": 1.93, "learning_rate": 1.926792972480732e-08, "loss": 4.1469, "step": 173500 }, { "epoch": 1.93, "learning_rate": 1.9323456899806763e-08, "loss": 4.1824, "step": 174000 }, { "epoch": 1.94, "learning_rate": 1.937898407480621e-08, "loss": 4.138, "step": 174500 }, { "epoch": 1.94, "learning_rate": 1.9434511249805654e-08, "loss": 4.1668, "step": 175000 }, { "epoch": 1.95, "learning_rate": 1.94900384248051e-08, "loss": 4.1715, "step": 175500 }, { "epoch": 1.95, "learning_rate": 1.9545565599804543e-08, "loss": 4.1606, "step": 176000 }, { "epoch": 1.96, "learning_rate": 1.9601092774803987e-08, "loss": 4.1726, "step": 176500 }, { "epoch": 1.97, "learning_rate": 1.965661994980343e-08, "loss": 4.1387, "step": 177000 }, { "epoch": 1.97, "learning_rate": 1.971214712480288e-08, "loss": 4.1411, "step": 177500 }, { "epoch": 1.98, "learning_rate": 1.9767674299802323e-08, "loss": 4.1571, "step": 178000 }, { "epoch": 1.98, "learning_rate": 1.9823201474801767e-08, "loss": 4.1629, "step": 178500 }, { "epoch": 1.99, "learning_rate": 1.9878728649801214e-08, "loss": 4.1518, "step": 179000 }, { "epoch": 1.99, "learning_rate": 1.9934255824800655e-08, "loss": 4.175, "step": 179500 }, { "epoch": 2.0, "learning_rate": 1.99897829998001e-08, "loss": 4.1433, "step": 180000 }, { "epoch": 2.0, "eval_loss": 4.123116493225098, "eval_runtime": 6.3261, "eval_samples_per_second": 245.648, "step": 180092 }, { "epoch": 2.0, "learning_rate": 2.0045310174799547e-08, "loss": 4.156, "step": 180500 }, { "epoch": 2.01, "learning_rate": 2.010083734979899e-08, "loss": 4.1567, "step": 181000 }, { "epoch": 2.02, "learning_rate": 2.0156364524798435e-08, "loss": 4.1371, "step": 181500 }, { "epoch": 2.02, "learning_rate": 2.021189169979788e-08, "loss": 4.1543, "step": 182000 }, { "epoch": 2.03, "learning_rate": 2.0267418874797327e-08, "loss": 4.1629, "step": 182500 }, { "epoch": 2.03, "learning_rate": 2.0322946049796768e-08, "loss": 4.1472, "step": 183000 }, { "epoch": 2.04, "learning_rate": 2.0378473224796215e-08, "loss": 4.149, "step": 183500 }, { "epoch": 2.04, "learning_rate": 2.043400039979566e-08, "loss": 4.1544, "step": 184000 }, { "epoch": 2.05, "learning_rate": 2.0489527574795104e-08, "loss": 4.1534, "step": 184500 }, { "epoch": 2.05, "learning_rate": 2.0545054749794548e-08, "loss": 4.1411, "step": 185000 }, { "epoch": 2.06, "learning_rate": 2.0600581924793995e-08, "loss": 4.1362, "step": 185500 }, { "epoch": 2.07, "learning_rate": 2.0656109099793436e-08, "loss": 4.1591, "step": 186000 }, { "epoch": 2.07, "learning_rate": 2.071163627479288e-08, "loss": 4.1516, "step": 186500 }, { "epoch": 2.08, "learning_rate": 2.0767163449792328e-08, "loss": 4.1445, "step": 187000 }, { "epoch": 2.08, "learning_rate": 2.0822690624791772e-08, "loss": 4.1516, "step": 187500 }, { "epoch": 2.09, "learning_rate": 2.0878217799791216e-08, "loss": 4.1523, "step": 188000 }, { "epoch": 2.09, "learning_rate": 2.0933744974790664e-08, "loss": 4.1315, "step": 188500 }, { "epoch": 2.1, "learning_rate": 2.0989272149790108e-08, "loss": 4.1512, "step": 189000 }, { "epoch": 2.1, "learning_rate": 2.104479932478955e-08, "loss": 4.1461, "step": 189500 }, { "epoch": 2.11, "learning_rate": 2.1100326499788996e-08, "loss": 4.1427, "step": 190000 }, { "epoch": 2.12, "learning_rate": 2.115585367478844e-08, "loss": 4.1519, "step": 190500 }, { "epoch": 2.12, "learning_rate": 2.1211380849787885e-08, "loss": 4.1469, "step": 191000 }, { "epoch": 2.13, "learning_rate": 2.1266908024787332e-08, "loss": 4.1438, "step": 191500 }, { "epoch": 2.13, "learning_rate": 2.1322435199786776e-08, "loss": 4.1581, "step": 192000 }, { "epoch": 2.14, "learning_rate": 2.1377962374786217e-08, "loss": 4.1525, "step": 192500 }, { "epoch": 2.14, "learning_rate": 2.1433489549785665e-08, "loss": 4.1381, "step": 193000 }, { "epoch": 2.15, "learning_rate": 2.148901672478511e-08, "loss": 4.1397, "step": 193500 }, { "epoch": 2.15, "learning_rate": 2.1544543899784553e-08, "loss": 4.1365, "step": 194000 }, { "epoch": 2.16, "learning_rate": 2.1600071074783997e-08, "loss": 4.1297, "step": 194500 }, { "epoch": 2.17, "learning_rate": 2.1655598249783445e-08, "loss": 4.1296, "step": 195000 }, { "epoch": 2.17, "learning_rate": 2.171112542478289e-08, "loss": 4.1545, "step": 195500 }, { "epoch": 2.18, "learning_rate": 2.176665259978233e-08, "loss": 4.1479, "step": 196000 }, { "epoch": 2.18, "learning_rate": 2.1822179774781777e-08, "loss": 4.1311, "step": 196500 }, { "epoch": 2.19, "learning_rate": 2.187770694978122e-08, "loss": 4.1384, "step": 197000 }, { "epoch": 2.19, "learning_rate": 2.1933234124780666e-08, "loss": 4.1299, "step": 197500 }, { "epoch": 2.2, "learning_rate": 2.1988761299780113e-08, "loss": 4.1386, "step": 198000 }, { "epoch": 2.2, "learning_rate": 2.2044288474779557e-08, "loss": 4.1577, "step": 198500 }, { "epoch": 2.21, "learning_rate": 2.2099815649778998e-08, "loss": 4.1175, "step": 199000 }, { "epoch": 2.22, "learning_rate": 2.2155342824778446e-08, "loss": 4.1354, "step": 199500 }, { "epoch": 2.22, "learning_rate": 2.221086999977789e-08, "loss": 4.1239, "step": 200000 }, { "epoch": 2.23, "learning_rate": 2.2266397174777334e-08, "loss": 4.1558, "step": 200500 }, { "epoch": 2.23, "learning_rate": 2.232192434977678e-08, "loss": 4.1277, "step": 201000 }, { "epoch": 2.24, "learning_rate": 2.2377451524776226e-08, "loss": 4.1317, "step": 201500 }, { "epoch": 2.24, "learning_rate": 2.243297869977567e-08, "loss": 4.1267, "step": 202000 }, { "epoch": 2.25, "learning_rate": 2.2488505874775114e-08, "loss": 4.1315, "step": 202500 }, { "epoch": 2.25, "learning_rate": 2.2544033049774558e-08, "loss": 4.1399, "step": 203000 }, { "epoch": 2.26, "learning_rate": 2.2599560224774002e-08, "loss": 4.1285, "step": 203500 }, { "epoch": 2.27, "learning_rate": 2.2655087399773447e-08, "loss": 4.1298, "step": 204000 }, { "epoch": 2.27, "learning_rate": 2.2710614574772894e-08, "loss": 4.1137, "step": 204500 }, { "epoch": 2.28, "learning_rate": 2.2766141749772338e-08, "loss": 4.1352, "step": 205000 }, { "epoch": 2.28, "learning_rate": 2.2821668924771782e-08, "loss": 4.1176, "step": 205500 }, { "epoch": 2.29, "learning_rate": 2.2877196099771227e-08, "loss": 4.1378, "step": 206000 }, { "epoch": 2.29, "learning_rate": 2.293272327477067e-08, "loss": 4.1138, "step": 206500 }, { "epoch": 2.3, "learning_rate": 2.2988250449770115e-08, "loss": 4.1411, "step": 207000 }, { "epoch": 2.3, "learning_rate": 2.3043777624769562e-08, "loss": 4.1264, "step": 207500 }, { "epoch": 2.31, "learning_rate": 2.3099304799769007e-08, "loss": 4.1252, "step": 208000 }, { "epoch": 2.32, "learning_rate": 2.315483197476845e-08, "loss": 4.1338, "step": 208500 }, { "epoch": 2.32, "learning_rate": 2.3210359149767895e-08, "loss": 4.1296, "step": 209000 }, { "epoch": 2.33, "learning_rate": 2.326588632476734e-08, "loss": 4.1215, "step": 209500 }, { "epoch": 2.33, "learning_rate": 2.3321413499766783e-08, "loss": 4.1278, "step": 210000 }, { "epoch": 2.34, "learning_rate": 2.337694067476623e-08, "loss": 4.136, "step": 210500 }, { "epoch": 2.34, "learning_rate": 2.3432467849765675e-08, "loss": 4.0958, "step": 211000 }, { "epoch": 2.35, "learning_rate": 2.348799502476512e-08, "loss": 4.1178, "step": 211500 }, { "epoch": 2.35, "learning_rate": 2.3543522199764563e-08, "loss": 4.1008, "step": 212000 }, { "epoch": 2.36, "learning_rate": 2.3599049374764008e-08, "loss": 4.095, "step": 212500 }, { "epoch": 2.37, "learning_rate": 2.3654576549763452e-08, "loss": 4.1155, "step": 213000 }, { "epoch": 2.37, "learning_rate": 2.37101037247629e-08, "loss": 4.1213, "step": 213500 }, { "epoch": 2.38, "learning_rate": 2.3765630899762343e-08, "loss": 4.096, "step": 214000 }, { "epoch": 2.38, "learning_rate": 2.3821158074761788e-08, "loss": 4.1187, "step": 214500 }, { "epoch": 2.39, "learning_rate": 2.3876685249761232e-08, "loss": 4.1168, "step": 215000 }, { "epoch": 2.39, "learning_rate": 2.3932212424760676e-08, "loss": 4.1148, "step": 215500 }, { "epoch": 2.4, "learning_rate": 2.398773959976012e-08, "loss": 4.1237, "step": 216000 }, { "epoch": 2.4, "learning_rate": 2.4043266774759564e-08, "loss": 4.111, "step": 216500 }, { "epoch": 2.41, "learning_rate": 2.4098793949759012e-08, "loss": 4.1143, "step": 217000 }, { "epoch": 2.42, "learning_rate": 2.4154321124758456e-08, "loss": 4.1118, "step": 217500 }, { "epoch": 2.42, "learning_rate": 2.42098482997579e-08, "loss": 4.1146, "step": 218000 }, { "epoch": 2.43, "learning_rate": 2.4265375474757348e-08, "loss": 4.1138, "step": 218500 }, { "epoch": 2.43, "learning_rate": 2.432090264975679e-08, "loss": 4.0978, "step": 219000 }, { "epoch": 2.44, "learning_rate": 2.4376429824756233e-08, "loss": 4.1065, "step": 219500 }, { "epoch": 2.44, "learning_rate": 2.443195699975568e-08, "loss": 4.1024, "step": 220000 }, { "epoch": 2.45, "learning_rate": 2.4487484174755124e-08, "loss": 4.1114, "step": 220500 }, { "epoch": 2.45, "learning_rate": 2.454301134975457e-08, "loss": 4.1118, "step": 221000 }, { "epoch": 2.46, "learning_rate": 2.4598538524754016e-08, "loss": 4.1131, "step": 221500 }, { "epoch": 2.47, "learning_rate": 2.4654065699753457e-08, "loss": 4.0993, "step": 222000 }, { "epoch": 2.47, "learning_rate": 2.47095928747529e-08, "loss": 4.1242, "step": 222500 }, { "epoch": 2.48, "learning_rate": 2.476512004975235e-08, "loss": 4.1001, "step": 223000 }, { "epoch": 2.48, "learning_rate": 2.4820647224751793e-08, "loss": 4.1123, "step": 223500 }, { "epoch": 2.49, "learning_rate": 2.4876174399751237e-08, "loss": 4.096, "step": 224000 }, { "epoch": 2.49, "learning_rate": 2.493170157475068e-08, "loss": 4.0955, "step": 224500 }, { "epoch": 2.5, "learning_rate": 2.498722874975013e-08, "loss": 4.1065, "step": 225000 }, { "epoch": 2.5, "learning_rate": 2.504275592474957e-08, "loss": 4.0978, "step": 225500 }, { "epoch": 2.51, "learning_rate": 2.5098283099749017e-08, "loss": 4.117, "step": 226000 }, { "epoch": 2.52, "learning_rate": 2.515381027474846e-08, "loss": 4.1116, "step": 226500 }, { "epoch": 2.52, "learning_rate": 2.5209337449747902e-08, "loss": 4.1183, "step": 227000 }, { "epoch": 2.53, "learning_rate": 2.526486462474735e-08, "loss": 4.092, "step": 227500 }, { "epoch": 2.53, "learning_rate": 2.5320391799746794e-08, "loss": 4.0974, "step": 228000 }, { "epoch": 2.54, "learning_rate": 2.537591897474624e-08, "loss": 4.1012, "step": 228500 }, { "epoch": 2.54, "learning_rate": 2.5431446149745682e-08, "loss": 4.1221, "step": 229000 }, { "epoch": 2.55, "learning_rate": 2.5486973324745133e-08, "loss": 4.1168, "step": 229500 }, { "epoch": 2.55, "learning_rate": 2.5542500499744574e-08, "loss": 4.1236, "step": 230000 }, { "epoch": 2.56, "learning_rate": 2.5598027674744015e-08, "loss": 4.107, "step": 230500 }, { "epoch": 2.57, "learning_rate": 2.5653554849743465e-08, "loss": 4.0949, "step": 231000 }, { "epoch": 2.57, "learning_rate": 2.5709082024742906e-08, "loss": 4.1096, "step": 231500 }, { "epoch": 2.58, "learning_rate": 2.5764609199742354e-08, "loss": 4.1036, "step": 232000 }, { "epoch": 2.58, "learning_rate": 2.5820136374741798e-08, "loss": 4.0994, "step": 232500 }, { "epoch": 2.59, "learning_rate": 2.5875663549741245e-08, "loss": 4.1042, "step": 233000 }, { "epoch": 2.59, "learning_rate": 2.5931190724740686e-08, "loss": 4.0824, "step": 233500 }, { "epoch": 2.6, "learning_rate": 2.598671789974013e-08, "loss": 4.1048, "step": 234000 }, { "epoch": 2.6, "learning_rate": 2.6042245074739578e-08, "loss": 4.0772, "step": 234500 }, { "epoch": 2.61, "learning_rate": 2.609777224973902e-08, "loss": 4.0978, "step": 235000 }, { "epoch": 2.62, "learning_rate": 2.6153299424738466e-08, "loss": 4.0774, "step": 235500 }, { "epoch": 2.62, "learning_rate": 2.620882659973791e-08, "loss": 4.1111, "step": 236000 }, { "epoch": 2.63, "learning_rate": 2.6264353774737358e-08, "loss": 4.0863, "step": 236500 }, { "epoch": 2.63, "learning_rate": 2.63198809497368e-08, "loss": 4.1038, "step": 237000 }, { "epoch": 2.64, "learning_rate": 2.6375408124736243e-08, "loss": 4.0634, "step": 237500 }, { "epoch": 2.64, "learning_rate": 2.643093529973569e-08, "loss": 4.0941, "step": 238000 }, { "epoch": 2.65, "learning_rate": 2.648646247473513e-08, "loss": 4.1001, "step": 238500 }, { "epoch": 2.65, "learning_rate": 2.6541989649734582e-08, "loss": 4.1073, "step": 239000 }, { "epoch": 2.66, "learning_rate": 2.6597516824734023e-08, "loss": 4.0594, "step": 239500 }, { "epoch": 2.67, "learning_rate": 2.665304399973347e-08, "loss": 4.084, "step": 240000 }, { "epoch": 2.67, "learning_rate": 2.6708571174732915e-08, "loss": 4.0941, "step": 240500 }, { "epoch": 2.68, "learning_rate": 2.6764098349732356e-08, "loss": 4.1004, "step": 241000 }, { "epoch": 2.68, "learning_rate": 2.6819625524731803e-08, "loss": 4.0788, "step": 241500 }, { "epoch": 2.69, "learning_rate": 2.6875152699731247e-08, "loss": 4.1075, "step": 242000 }, { "epoch": 2.69, "learning_rate": 2.6930679874730695e-08, "loss": 4.1066, "step": 242500 }, { "epoch": 2.7, "learning_rate": 2.6986207049730136e-08, "loss": 4.096, "step": 243000 }, { "epoch": 2.7, "learning_rate": 2.704173422472958e-08, "loss": 4.1113, "step": 243500 }, { "epoch": 2.71, "learning_rate": 2.7097261399729027e-08, "loss": 4.1107, "step": 244000 }, { "epoch": 2.72, "learning_rate": 2.7152788574728468e-08, "loss": 4.0997, "step": 244500 }, { "epoch": 2.72, "learning_rate": 2.7208315749727916e-08, "loss": 4.0944, "step": 245000 }, { "epoch": 2.73, "learning_rate": 2.726384292472736e-08, "loss": 4.0742, "step": 245500 }, { "epoch": 2.73, "learning_rate": 2.7319370099726807e-08, "loss": 4.0917, "step": 246000 }, { "epoch": 2.74, "learning_rate": 2.7374897274726248e-08, "loss": 4.0972, "step": 246500 }, { "epoch": 2.74, "learning_rate": 2.7430424449725692e-08, "loss": 4.0874, "step": 247000 }, { "epoch": 2.75, "learning_rate": 2.748595162472514e-08, "loss": 4.0942, "step": 247500 }, { "epoch": 2.75, "learning_rate": 2.7541478799724584e-08, "loss": 4.0869, "step": 248000 }, { "epoch": 2.76, "learning_rate": 2.759700597472403e-08, "loss": 4.079, "step": 248500 }, { "epoch": 2.77, "learning_rate": 2.7652533149723472e-08, "loss": 4.0971, "step": 249000 }, { "epoch": 2.77, "learning_rate": 2.770806032472292e-08, "loss": 4.0913, "step": 249500 }, { "epoch": 2.78, "learning_rate": 2.7763587499722364e-08, "loss": 4.085, "step": 250000 }, { "epoch": 2.78, "learning_rate": 2.7819114674721805e-08, "loss": 4.0759, "step": 250500 }, { "epoch": 2.79, "learning_rate": 2.7874641849721252e-08, "loss": 4.0712, "step": 251000 }, { "epoch": 2.79, "learning_rate": 2.7930169024720697e-08, "loss": 4.1092, "step": 251500 }, { "epoch": 2.8, "learning_rate": 2.7985696199720144e-08, "loss": 4.0749, "step": 252000 }, { "epoch": 2.8, "learning_rate": 2.8041223374719585e-08, "loss": 4.0832, "step": 252500 }, { "epoch": 2.81, "learning_rate": 2.8096750549719032e-08, "loss": 4.0867, "step": 253000 }, { "epoch": 2.82, "learning_rate": 2.8152277724718477e-08, "loss": 4.0784, "step": 253500 }, { "epoch": 2.82, "learning_rate": 2.8207804899717917e-08, "loss": 4.0646, "step": 254000 }, { "epoch": 2.83, "learning_rate": 2.8263332074717365e-08, "loss": 4.0779, "step": 254500 }, { "epoch": 2.83, "learning_rate": 2.831885924971681e-08, "loss": 4.0666, "step": 255000 }, { "epoch": 2.84, "learning_rate": 2.8374386424716257e-08, "loss": 4.0903, "step": 255500 }, { "epoch": 2.84, "learning_rate": 2.84299135997157e-08, "loss": 4.0937, "step": 256000 }, { "epoch": 2.85, "learning_rate": 2.8485440774715148e-08, "loss": 4.0922, "step": 256500 }, { "epoch": 2.85, "learning_rate": 2.854096794971459e-08, "loss": 4.0747, "step": 257000 }, { "epoch": 2.86, "learning_rate": 2.8596495124714033e-08, "loss": 4.0704, "step": 257500 }, { "epoch": 2.87, "learning_rate": 2.865202229971348e-08, "loss": 4.0716, "step": 258000 }, { "epoch": 2.87, "learning_rate": 2.870754947471292e-08, "loss": 4.0747, "step": 258500 }, { "epoch": 2.88, "learning_rate": 2.876307664971237e-08, "loss": 4.0942, "step": 259000 }, { "epoch": 2.88, "learning_rate": 2.8818603824711813e-08, "loss": 4.1021, "step": 259500 }, { "epoch": 2.89, "learning_rate": 2.8874130999711254e-08, "loss": 4.0918, "step": 260000 }, { "epoch": 2.89, "learning_rate": 2.89296581747107e-08, "loss": 4.0813, "step": 260500 }, { "epoch": 2.9, "learning_rate": 2.8985185349710146e-08, "loss": 4.0744, "step": 261000 }, { "epoch": 2.9, "learning_rate": 2.9040712524709593e-08, "loss": 4.0882, "step": 261500 }, { "epoch": 2.91, "learning_rate": 2.9096239699709034e-08, "loss": 4.0887, "step": 262000 }, { "epoch": 2.92, "learning_rate": 2.9151766874708482e-08, "loss": 4.0747, "step": 262500 }, { "epoch": 2.92, "learning_rate": 2.9207294049707926e-08, "loss": 4.0707, "step": 263000 }, { "epoch": 2.93, "learning_rate": 2.9262821224707367e-08, "loss": 4.0642, "step": 263500 }, { "epoch": 2.93, "learning_rate": 2.9318348399706818e-08, "loss": 4.0646, "step": 264000 }, { "epoch": 2.94, "learning_rate": 2.937387557470626e-08, "loss": 4.075, "step": 264500 }, { "epoch": 2.94, "learning_rate": 2.9429402749705706e-08, "loss": 4.0733, "step": 265000 }, { "epoch": 2.95, "learning_rate": 2.948492992470515e-08, "loss": 4.0734, "step": 265500 }, { "epoch": 2.95, "learning_rate": 2.9540457099704598e-08, "loss": 4.0871, "step": 266000 }, { "epoch": 2.96, "learning_rate": 2.959598427470404e-08, "loss": 4.0676, "step": 266500 }, { "epoch": 2.97, "learning_rate": 2.9651511449703483e-08, "loss": 4.0773, "step": 267000 }, { "epoch": 2.97, "learning_rate": 2.970703862470293e-08, "loss": 4.0733, "step": 267500 }, { "epoch": 2.98, "learning_rate": 2.976256579970237e-08, "loss": 4.0874, "step": 268000 }, { "epoch": 2.98, "learning_rate": 2.981809297470182e-08, "loss": 4.0765, "step": 268500 }, { "epoch": 2.99, "learning_rate": 2.987362014970126e-08, "loss": 4.0605, "step": 269000 }, { "epoch": 2.99, "learning_rate": 2.992914732470071e-08, "loss": 4.0506, "step": 269500 }, { "epoch": 3.0, "learning_rate": 2.9984674499700154e-08, "loss": 4.0806, "step": 270000 }, { "epoch": 3.0, "eval_loss": 4.056514263153076, "eval_runtime": 6.3026, "eval_samples_per_second": 246.567, "step": 270138 }, { "epoch": 3.0, "learning_rate": 3.004020167469959e-08, "loss": 4.0646, "step": 270500 }, { "epoch": 3.01, "learning_rate": 3.009572884969904e-08, "loss": 4.0577, "step": 271000 }, { "epoch": 3.02, "learning_rate": 3.015125602469849e-08, "loss": 4.0721, "step": 271500 }, { "epoch": 3.02, "learning_rate": 3.020678319969793e-08, "loss": 4.0837, "step": 272000 }, { "epoch": 3.03, "learning_rate": 3.0262310374697375e-08, "loss": 4.0935, "step": 272500 }, { "epoch": 3.03, "learning_rate": 3.031783754969682e-08, "loss": 4.0564, "step": 273000 }, { "epoch": 3.04, "learning_rate": 3.0373364724696264e-08, "loss": 4.0866, "step": 273500 }, { "epoch": 3.04, "learning_rate": 3.042889189969571e-08, "loss": 4.0721, "step": 274000 }, { "epoch": 3.05, "learning_rate": 3.048441907469516e-08, "loss": 4.0722, "step": 274500 }, { "epoch": 3.05, "learning_rate": 3.0539946249694596e-08, "loss": 4.0812, "step": 275000 }, { "epoch": 3.06, "learning_rate": 3.059547342469405e-08, "loss": 4.0439, "step": 275500 }, { "epoch": 3.07, "learning_rate": 3.065100059969349e-08, "loss": 4.0733, "step": 276000 }, { "epoch": 3.07, "learning_rate": 3.070652777469293e-08, "loss": 4.0709, "step": 276500 }, { "epoch": 3.08, "learning_rate": 3.076205494969238e-08, "loss": 4.0633, "step": 277000 }, { "epoch": 3.08, "learning_rate": 3.0817582124691824e-08, "loss": 4.0608, "step": 277500 }, { "epoch": 3.09, "learning_rate": 3.087310929969127e-08, "loss": 4.0561, "step": 278000 }, { "epoch": 3.09, "learning_rate": 3.092863647469071e-08, "loss": 4.0637, "step": 278500 }, { "epoch": 3.1, "learning_rate": 3.0984163649690156e-08, "loss": 4.0655, "step": 279000 }, { "epoch": 3.1, "learning_rate": 3.10396908246896e-08, "loss": 4.0861, "step": 279500 }, { "epoch": 3.11, "learning_rate": 3.1095217999689044e-08, "loss": 4.0609, "step": 280000 }, { "epoch": 3.12, "learning_rate": 3.115074517468849e-08, "loss": 4.065, "step": 280500 }, { "epoch": 3.12, "learning_rate": 3.120627234968793e-08, "loss": 4.0679, "step": 281000 }, { "epoch": 3.13, "learning_rate": 3.1261799524687384e-08, "loss": 4.05, "step": 281500 }, { "epoch": 3.13, "learning_rate": 3.131732669968683e-08, "loss": 4.0283, "step": 282000 }, { "epoch": 3.14, "learning_rate": 3.137285387468627e-08, "loss": 4.0595, "step": 282500 }, { "epoch": 3.14, "learning_rate": 3.1428381049685716e-08, "loss": 4.0778, "step": 283000 }, { "epoch": 3.15, "learning_rate": 3.148390822468516e-08, "loss": 4.0679, "step": 283500 }, { "epoch": 3.15, "learning_rate": 3.1539435399684605e-08, "loss": 4.0653, "step": 284000 }, { "epoch": 3.16, "learning_rate": 3.159496257468405e-08, "loss": 4.0538, "step": 284500 }, { "epoch": 3.17, "learning_rate": 3.165048974968349e-08, "loss": 4.0479, "step": 285000 }, { "epoch": 3.17, "learning_rate": 3.170601692468294e-08, "loss": 4.0469, "step": 285500 }, { "epoch": 3.18, "learning_rate": 3.176154409968239e-08, "loss": 4.0756, "step": 286000 }, { "epoch": 3.18, "learning_rate": 3.1817071274681825e-08, "loss": 4.0609, "step": 286500 }, { "epoch": 3.19, "learning_rate": 3.187259844968127e-08, "loss": 4.0247, "step": 287000 }, { "epoch": 3.19, "learning_rate": 3.192812562468072e-08, "loss": 4.074, "step": 287500 }, { "epoch": 3.2, "learning_rate": 3.198365279968016e-08, "loss": 4.0597, "step": 288000 }, { "epoch": 3.2, "learning_rate": 3.203917997467961e-08, "loss": 4.0569, "step": 288500 }, { "epoch": 3.21, "learning_rate": 3.209470714967905e-08, "loss": 4.0393, "step": 289000 }, { "epoch": 3.22, "learning_rate": 3.215023432467849e-08, "loss": 4.0618, "step": 289500 }, { "epoch": 3.22, "learning_rate": 3.220576149967794e-08, "loss": 4.0426, "step": 290000 }, { "epoch": 3.23, "learning_rate": 3.2261288674677385e-08, "loss": 4.0593, "step": 290500 }, { "epoch": 3.23, "learning_rate": 3.231681584967683e-08, "loss": 4.0546, "step": 291000 }, { "epoch": 3.24, "learning_rate": 3.2372343024676274e-08, "loss": 4.0527, "step": 291500 }, { "epoch": 3.24, "learning_rate": 3.2427870199675725e-08, "loss": 4.0523, "step": 292000 }, { "epoch": 3.25, "learning_rate": 3.248339737467516e-08, "loss": 4.0676, "step": 292500 }, { "epoch": 3.25, "learning_rate": 3.2538924549674606e-08, "loss": 4.0683, "step": 293000 }, { "epoch": 3.26, "learning_rate": 3.259445172467406e-08, "loss": 4.0577, "step": 293500 }, { "epoch": 3.26, "learning_rate": 3.2649978899673495e-08, "loss": 4.0606, "step": 294000 }, { "epoch": 3.27, "learning_rate": 3.2705506074672945e-08, "loss": 4.0633, "step": 294500 }, { "epoch": 3.28, "learning_rate": 3.276103324967239e-08, "loss": 4.0385, "step": 295000 }, { "epoch": 3.28, "learning_rate": 3.2816560424671834e-08, "loss": 4.036, "step": 295500 }, { "epoch": 3.29, "learning_rate": 3.287208759967128e-08, "loss": 4.0401, "step": 296000 }, { "epoch": 3.29, "learning_rate": 3.292761477467072e-08, "loss": 4.0433, "step": 296500 }, { "epoch": 3.3, "learning_rate": 3.2983141949670166e-08, "loss": 4.0394, "step": 297000 }, { "epoch": 3.3, "learning_rate": 3.303866912466961e-08, "loss": 4.0434, "step": 297500 }, { "epoch": 3.31, "learning_rate": 3.309419629966906e-08, "loss": 4.0629, "step": 298000 }, { "epoch": 3.31, "learning_rate": 3.31497234746685e-08, "loss": 4.0306, "step": 298500 }, { "epoch": 3.32, "learning_rate": 3.320525064966795e-08, "loss": 4.0449, "step": 299000 }, { "epoch": 3.33, "learning_rate": 3.3260777824667394e-08, "loss": 4.0438, "step": 299500 }, { "epoch": 3.33, "learning_rate": 3.331630499966683e-08, "loss": 4.0442, "step": 300000 }, { "epoch": 3.34, "learning_rate": 3.337183217466628e-08, "loss": 4.0438, "step": 300500 }, { "epoch": 3.34, "learning_rate": 3.3427359349665726e-08, "loss": 4.0358, "step": 301000 }, { "epoch": 3.35, "learning_rate": 3.348288652466517e-08, "loss": 4.0654, "step": 301500 }, { "epoch": 3.35, "learning_rate": 3.3538413699664615e-08, "loss": 4.0596, "step": 302000 }, { "epoch": 3.36, "learning_rate": 3.359394087466406e-08, "loss": 4.0342, "step": 302500 }, { "epoch": 3.36, "learning_rate": 3.36494680496635e-08, "loss": 4.0457, "step": 303000 }, { "epoch": 3.37, "learning_rate": 3.370499522466295e-08, "loss": 4.0387, "step": 303500 }, { "epoch": 3.38, "learning_rate": 3.376052239966239e-08, "loss": 4.0336, "step": 304000 }, { "epoch": 3.38, "learning_rate": 3.3816049574661836e-08, "loss": 4.0226, "step": 304500 }, { "epoch": 3.39, "learning_rate": 3.3871576749661286e-08, "loss": 4.0373, "step": 305000 }, { "epoch": 3.39, "learning_rate": 3.3927103924660724e-08, "loss": 4.0409, "step": 305500 }, { "epoch": 3.4, "learning_rate": 3.398263109966017e-08, "loss": 4.0429, "step": 306000 }, { "epoch": 3.4, "learning_rate": 3.403815827465962e-08, "loss": 4.0652, "step": 306500 }, { "epoch": 3.41, "learning_rate": 3.4093685449659057e-08, "loss": 4.0422, "step": 307000 }, { "epoch": 3.41, "learning_rate": 3.414921262465851e-08, "loss": 4.0615, "step": 307500 }, { "epoch": 3.42, "learning_rate": 3.420473979965795e-08, "loss": 4.0351, "step": 308000 }, { "epoch": 3.43, "learning_rate": 3.4260266974657396e-08, "loss": 4.0478, "step": 308500 }, { "epoch": 3.43, "learning_rate": 3.431579414965684e-08, "loss": 4.0489, "step": 309000 }, { "epoch": 3.44, "learning_rate": 3.4371321324656284e-08, "loss": 4.0405, "step": 309500 }, { "epoch": 3.44, "learning_rate": 3.442684849965573e-08, "loss": 4.0276, "step": 310000 }, { "epoch": 3.45, "learning_rate": 3.448237567465517e-08, "loss": 4.0416, "step": 310500 }, { "epoch": 3.45, "learning_rate": 3.453790284965462e-08, "loss": 4.0589, "step": 311000 }, { "epoch": 3.46, "learning_rate": 3.459343002465406e-08, "loss": 4.0261, "step": 311500 }, { "epoch": 3.46, "learning_rate": 3.464895719965351e-08, "loss": 4.0363, "step": 312000 }, { "epoch": 3.47, "learning_rate": 3.4704484374652956e-08, "loss": 4.0582, "step": 312500 }, { "epoch": 3.48, "learning_rate": 3.4760011549652393e-08, "loss": 4.0281, "step": 313000 }, { "epoch": 3.48, "learning_rate": 3.4815538724651844e-08, "loss": 4.0409, "step": 313500 }, { "epoch": 3.49, "learning_rate": 3.487106589965129e-08, "loss": 4.0402, "step": 314000 }, { "epoch": 3.49, "learning_rate": 3.492659307465073e-08, "loss": 4.0204, "step": 314500 }, { "epoch": 3.5, "learning_rate": 3.4982120249650177e-08, "loss": 4.0187, "step": 315000 }, { "epoch": 3.5, "learning_rate": 3.503764742464963e-08, "loss": 4.0387, "step": 315500 }, { "epoch": 3.51, "learning_rate": 3.5093174599649065e-08, "loss": 4.028, "step": 316000 }, { "epoch": 3.51, "learning_rate": 3.514870177464851e-08, "loss": 4.0498, "step": 316500 }, { "epoch": 3.52, "learning_rate": 3.520422894964796e-08, "loss": 4.035, "step": 317000 }, { "epoch": 3.53, "learning_rate": 3.52597561246474e-08, "loss": 4.0359, "step": 317500 }, { "epoch": 3.53, "learning_rate": 3.531528329964685e-08, "loss": 4.0442, "step": 318000 }, { "epoch": 3.54, "learning_rate": 3.537081047464629e-08, "loss": 4.032, "step": 318500 }, { "epoch": 3.54, "learning_rate": 3.542633764964574e-08, "loss": 4.0291, "step": 319000 }, { "epoch": 3.55, "learning_rate": 3.548186482464518e-08, "loss": 4.0364, "step": 319500 }, { "epoch": 3.55, "learning_rate": 3.5537391999644625e-08, "loss": 4.0294, "step": 320000 }, { "epoch": 3.56, "learning_rate": 3.559291917464407e-08, "loss": 4.0276, "step": 320500 }, { "epoch": 3.56, "learning_rate": 3.5648446349643513e-08, "loss": 4.0239, "step": 321000 }, { "epoch": 3.57, "learning_rate": 3.570397352464296e-08, "loss": 4.0317, "step": 321500 }, { "epoch": 3.58, "learning_rate": 3.57595006996424e-08, "loss": 4.0278, "step": 322000 }, { "epoch": 3.58, "learning_rate": 3.5815027874641846e-08, "loss": 4.0462, "step": 322500 }, { "epoch": 3.59, "learning_rate": 3.587055504964129e-08, "loss": 4.0241, "step": 323000 }, { "epoch": 3.59, "learning_rate": 3.5926082224640734e-08, "loss": 4.0157, "step": 323500 }, { "epoch": 3.6, "learning_rate": 3.5981609399640185e-08, "loss": 4.0438, "step": 324000 }, { "epoch": 3.6, "learning_rate": 3.603713657463963e-08, "loss": 4.0424, "step": 324500 }, { "epoch": 3.61, "learning_rate": 3.6092663749639073e-08, "loss": 4.0295, "step": 325000 }, { "epoch": 3.61, "learning_rate": 3.614819092463852e-08, "loss": 4.0269, "step": 325500 }, { "epoch": 3.62, "learning_rate": 3.620371809963796e-08, "loss": 4.0431, "step": 326000 }, { "epoch": 3.63, "learning_rate": 3.6259245274637406e-08, "loss": 4.0322, "step": 326500 }, { "epoch": 3.63, "learning_rate": 3.631477244963685e-08, "loss": 4.0323, "step": 327000 }, { "epoch": 3.64, "learning_rate": 3.6370299624636294e-08, "loss": 4.0452, "step": 327500 }, { "epoch": 3.64, "learning_rate": 3.642582679963574e-08, "loss": 4.0394, "step": 328000 }, { "epoch": 3.65, "learning_rate": 3.648135397463519e-08, "loss": 4.0444, "step": 328500 }, { "epoch": 3.65, "learning_rate": 3.653688114963463e-08, "loss": 4.0201, "step": 329000 }, { "epoch": 3.66, "learning_rate": 3.659240832463407e-08, "loss": 4.0265, "step": 329500 }, { "epoch": 3.66, "learning_rate": 3.664793549963352e-08, "loss": 4.0365, "step": 330000 }, { "epoch": 3.67, "learning_rate": 3.670346267463296e-08, "loss": 4.0136, "step": 330500 }, { "epoch": 3.68, "learning_rate": 3.675898984963241e-08, "loss": 4.0301, "step": 331000 }, { "epoch": 3.68, "learning_rate": 3.6814517024631854e-08, "loss": 4.0269, "step": 331500 }, { "epoch": 3.69, "learning_rate": 3.68700441996313e-08, "loss": 4.0471, "step": 332000 }, { "epoch": 3.69, "learning_rate": 3.692557137463074e-08, "loss": 4.0178, "step": 332500 }, { "epoch": 3.7, "learning_rate": 3.698109854963019e-08, "loss": 4.0266, "step": 333000 }, { "epoch": 3.7, "learning_rate": 3.703662572462963e-08, "loss": 4.0309, "step": 333500 }, { "epoch": 3.71, "learning_rate": 3.7092152899629075e-08, "loss": 4.0293, "step": 334000 }, { "epoch": 3.71, "learning_rate": 3.7147680074628526e-08, "loss": 4.0329, "step": 334500 }, { "epoch": 3.72, "learning_rate": 3.7203207249627964e-08, "loss": 4.0213, "step": 335000 }, { "epoch": 3.73, "learning_rate": 3.725873442462741e-08, "loss": 4.0302, "step": 335500 }, { "epoch": 3.73, "learning_rate": 3.731426159962686e-08, "loss": 4.0358, "step": 336000 }, { "epoch": 3.74, "learning_rate": 3.7369788774626296e-08, "loss": 4.008, "step": 336500 }, { "epoch": 3.74, "learning_rate": 3.742531594962575e-08, "loss": 4.0094, "step": 337000 }, { "epoch": 3.75, "learning_rate": 3.748084312462519e-08, "loss": 4.0164, "step": 337500 }, { "epoch": 3.75, "learning_rate": 3.7536370299624635e-08, "loss": 4.0193, "step": 338000 }, { "epoch": 3.76, "learning_rate": 3.759189747462408e-08, "loss": 4.0287, "step": 338500 }, { "epoch": 3.76, "learning_rate": 3.7647424649623524e-08, "loss": 4.023, "step": 339000 }, { "epoch": 3.77, "learning_rate": 3.770295182462297e-08, "loss": 4.0225, "step": 339500 }, { "epoch": 3.78, "learning_rate": 3.775847899962241e-08, "loss": 4.0399, "step": 340000 }, { "epoch": 3.78, "learning_rate": 3.781400617462186e-08, "loss": 4.0306, "step": 340500 }, { "epoch": 3.79, "learning_rate": 3.78695333496213e-08, "loss": 4.022, "step": 341000 }, { "epoch": 3.79, "learning_rate": 3.792506052462075e-08, "loss": 4.0275, "step": 341500 }, { "epoch": 3.8, "learning_rate": 3.7980587699620195e-08, "loss": 4.0376, "step": 342000 }, { "epoch": 3.8, "learning_rate": 3.803611487461963e-08, "loss": 4.0126, "step": 342500 }, { "epoch": 3.81, "learning_rate": 3.8091642049619084e-08, "loss": 4.0314, "step": 343000 }, { "epoch": 3.81, "learning_rate": 3.814716922461853e-08, "loss": 4.0159, "step": 343500 }, { "epoch": 3.82, "learning_rate": 3.820269639961797e-08, "loss": 4.0281, "step": 344000 }, { "epoch": 3.83, "learning_rate": 3.8258223574617416e-08, "loss": 4.0251, "step": 344500 }, { "epoch": 3.83, "learning_rate": 3.831375074961686e-08, "loss": 4.0242, "step": 345000 }, { "epoch": 3.84, "learning_rate": 3.8369277924616305e-08, "loss": 4.0365, "step": 345500 }, { "epoch": 3.84, "learning_rate": 3.842480509961575e-08, "loss": 4.0134, "step": 346000 }, { "epoch": 3.85, "learning_rate": 3.848033227461519e-08, "loss": 4.0163, "step": 346500 }, { "epoch": 3.85, "learning_rate": 3.853585944961464e-08, "loss": 4.0371, "step": 347000 }, { "epoch": 3.86, "learning_rate": 3.859138662461409e-08, "loss": 4.0047, "step": 347500 }, { "epoch": 3.86, "learning_rate": 3.8646913799613526e-08, "loss": 4.016, "step": 348000 }, { "epoch": 3.87, "learning_rate": 3.8702440974612976e-08, "loss": 4.0389, "step": 348500 }, { "epoch": 3.88, "learning_rate": 3.875796814961242e-08, "loss": 4.0015, "step": 349000 }, { "epoch": 3.88, "learning_rate": 3.881349532461186e-08, "loss": 4.0132, "step": 349500 }, { "epoch": 3.89, "learning_rate": 3.886902249961131e-08, "loss": 4.0028, "step": 350000 }, { "epoch": 3.89, "learning_rate": 3.892454967461075e-08, "loss": 4.0079, "step": 350500 }, { "epoch": 3.9, "learning_rate": 3.89800768496102e-08, "loss": 4.0428, "step": 351000 }, { "epoch": 3.9, "learning_rate": 3.903560402460964e-08, "loss": 4.0088, "step": 351500 }, { "epoch": 3.91, "learning_rate": 3.9091131199609086e-08, "loss": 4.0158, "step": 352000 }, { "epoch": 3.91, "learning_rate": 3.914665837460853e-08, "loss": 4.0181, "step": 352500 }, { "epoch": 3.92, "learning_rate": 3.9202185549607974e-08, "loss": 4.0193, "step": 353000 }, { "epoch": 3.93, "learning_rate": 3.9257712724607425e-08, "loss": 4.0056, "step": 353500 }, { "epoch": 3.93, "learning_rate": 3.931323989960686e-08, "loss": 4.0166, "step": 354000 }, { "epoch": 3.94, "learning_rate": 3.936876707460631e-08, "loss": 4.0256, "step": 354500 }, { "epoch": 3.94, "learning_rate": 3.942429424960576e-08, "loss": 4.014, "step": 355000 }, { "epoch": 3.95, "learning_rate": 3.9479821424605195e-08, "loss": 4.0526, "step": 355500 }, { "epoch": 3.95, "learning_rate": 3.9535348599604646e-08, "loss": 4.0243, "step": 356000 }, { "epoch": 3.96, "learning_rate": 3.959087577460409e-08, "loss": 4.0224, "step": 356500 }, { "epoch": 3.96, "learning_rate": 3.9646402949603534e-08, "loss": 4.0319, "step": 357000 }, { "epoch": 3.97, "learning_rate": 3.970193012460298e-08, "loss": 4.0028, "step": 357500 }, { "epoch": 3.98, "learning_rate": 3.975745729960243e-08, "loss": 3.9964, "step": 358000 }, { "epoch": 3.98, "learning_rate": 3.9812984474601867e-08, "loss": 4.0005, "step": 358500 }, { "epoch": 3.99, "learning_rate": 3.986851164960131e-08, "loss": 4.0167, "step": 359000 }, { "epoch": 3.99, "learning_rate": 3.992403882460076e-08, "loss": 4.0178, "step": 359500 }, { "epoch": 4.0, "learning_rate": 3.99795659996002e-08, "loss": 4.0169, "step": 360000 }, { "epoch": 4.0, "eval_loss": 4.012733459472656, "eval_runtime": 6.3084, "eval_samples_per_second": 246.337, "step": 360184 }, { "epoch": 4.0, "learning_rate": 4.003509317459965e-08, "loss": 4.0111, "step": 360500 }, { "epoch": 4.01, "learning_rate": 4.0090620349599094e-08, "loss": 4.0056, "step": 361000 }, { "epoch": 4.01, "learning_rate": 4.014614752459854e-08, "loss": 4.0092, "step": 361500 }, { "epoch": 4.02, "learning_rate": 4.020167469959798e-08, "loss": 4.0285, "step": 362000 }, { "epoch": 4.03, "learning_rate": 4.0257201874597427e-08, "loss": 4.0084, "step": 362500 }, { "epoch": 4.03, "learning_rate": 4.031272904959687e-08, "loss": 4.0263, "step": 363000 }, { "epoch": 4.04, "learning_rate": 4.0368256224596315e-08, "loss": 4.0244, "step": 363500 }, { "epoch": 4.04, "learning_rate": 4.042378339959576e-08, "loss": 3.9977, "step": 364000 }, { "epoch": 4.05, "learning_rate": 4.04793105745952e-08, "loss": 3.9967, "step": 364500 }, { "epoch": 4.05, "learning_rate": 4.0534837749594654e-08, "loss": 4.0248, "step": 365000 }, { "epoch": 4.06, "learning_rate": 4.059036492459409e-08, "loss": 4.0071, "step": 365500 }, { "epoch": 4.06, "learning_rate": 4.0645892099593536e-08, "loss": 4.016, "step": 366000 }, { "epoch": 4.07, "learning_rate": 4.0701419274592987e-08, "loss": 3.9927, "step": 366500 }, { "epoch": 4.08, "learning_rate": 4.075694644959243e-08, "loss": 4.0108, "step": 367000 }, { "epoch": 4.08, "learning_rate": 4.0812473624591875e-08, "loss": 4.0183, "step": 367500 }, { "epoch": 4.09, "learning_rate": 4.086800079959132e-08, "loss": 4.0027, "step": 368000 }, { "epoch": 4.09, "learning_rate": 4.0923527974590763e-08, "loss": 4.0056, "step": 368500 }, { "epoch": 4.1, "learning_rate": 4.097905514959021e-08, "loss": 4.0082, "step": 369000 }, { "epoch": 4.1, "learning_rate": 4.103458232458965e-08, "loss": 4.0038, "step": 369500 }, { "epoch": 4.11, "learning_rate": 4.1090109499589096e-08, "loss": 3.9956, "step": 370000 }, { "epoch": 4.11, "learning_rate": 4.114563667458854e-08, "loss": 4.0033, "step": 370500 }, { "epoch": 4.12, "learning_rate": 4.120116384958799e-08, "loss": 3.9961, "step": 371000 }, { "epoch": 4.13, "learning_rate": 4.125669102458743e-08, "loss": 4.0267, "step": 371500 }, { "epoch": 4.13, "learning_rate": 4.131221819958687e-08, "loss": 3.9941, "step": 372000 }, { "epoch": 4.14, "learning_rate": 4.1367745374586323e-08, "loss": 4.0191, "step": 372500 }, { "epoch": 4.14, "learning_rate": 4.142327254958576e-08, "loss": 4.0141, "step": 373000 }, { "epoch": 4.15, "learning_rate": 4.147879972458521e-08, "loss": 4.0063, "step": 373500 }, { "epoch": 4.15, "learning_rate": 4.1534326899584656e-08, "loss": 4.0067, "step": 374000 }, { "epoch": 4.16, "learning_rate": 4.15898540745841e-08, "loss": 4.0085, "step": 374500 }, { "epoch": 4.16, "learning_rate": 4.1645381249583544e-08, "loss": 4.0114, "step": 375000 }, { "epoch": 4.17, "learning_rate": 4.170090842458299e-08, "loss": 3.9937, "step": 375500 }, { "epoch": 4.18, "learning_rate": 4.175643559958243e-08, "loss": 4.001, "step": 376000 }, { "epoch": 4.18, "learning_rate": 4.181196277458188e-08, "loss": 4.0154, "step": 376500 }, { "epoch": 4.19, "learning_rate": 4.186748994958133e-08, "loss": 3.9987, "step": 377000 }, { "epoch": 4.19, "learning_rate": 4.1923017124580765e-08, "loss": 4.0165, "step": 377500 }, { "epoch": 4.2, "learning_rate": 4.1978544299580216e-08, "loss": 3.9932, "step": 378000 }, { "epoch": 4.2, "learning_rate": 4.203407147457966e-08, "loss": 4.0054, "step": 378500 }, { "epoch": 4.21, "learning_rate": 4.20895986495791e-08, "loss": 4.0084, "step": 379000 }, { "epoch": 4.21, "learning_rate": 4.214512582457855e-08, "loss": 4.0094, "step": 379500 }, { "epoch": 4.22, "learning_rate": 4.220065299957799e-08, "loss": 3.9905, "step": 380000 }, { "epoch": 4.23, "learning_rate": 4.225618017457744e-08, "loss": 4.0, "step": 380500 }, { "epoch": 4.23, "learning_rate": 4.231170734957688e-08, "loss": 4.0141, "step": 381000 }, { "epoch": 4.24, "learning_rate": 4.2367234524576325e-08, "loss": 4.0034, "step": 381500 }, { "epoch": 4.24, "learning_rate": 4.242276169957577e-08, "loss": 4.0126, "step": 382000 }, { "epoch": 4.25, "learning_rate": 4.2478288874575214e-08, "loss": 3.9771, "step": 382500 }, { "epoch": 4.25, "learning_rate": 4.2533816049574664e-08, "loss": 4.0017, "step": 383000 }, { "epoch": 4.26, "learning_rate": 4.25893432245741e-08, "loss": 4.0028, "step": 383500 }, { "epoch": 4.26, "learning_rate": 4.264487039957355e-08, "loss": 3.9836, "step": 384000 }, { "epoch": 4.27, "learning_rate": 4.2700397574573e-08, "loss": 3.987, "step": 384500 }, { "epoch": 4.28, "learning_rate": 4.2755924749572434e-08, "loss": 4.0078, "step": 385000 }, { "epoch": 4.28, "learning_rate": 4.2811451924571885e-08, "loss": 3.9905, "step": 385500 }, { "epoch": 4.29, "learning_rate": 4.286697909957133e-08, "loss": 3.9939, "step": 386000 }, { "epoch": 4.29, "learning_rate": 4.2922506274570774e-08, "loss": 4.0043, "step": 386500 }, { "epoch": 4.3, "learning_rate": 4.297803344957022e-08, "loss": 3.9899, "step": 387000 }, { "epoch": 4.3, "learning_rate": 4.303356062456966e-08, "loss": 3.9869, "step": 387500 }, { "epoch": 4.31, "learning_rate": 4.3089087799569106e-08, "loss": 3.9914, "step": 388000 }, { "epoch": 4.31, "learning_rate": 4.314461497456855e-08, "loss": 4.0073, "step": 388500 }, { "epoch": 4.32, "learning_rate": 4.3200142149567995e-08, "loss": 4.0109, "step": 389000 }, { "epoch": 4.33, "learning_rate": 4.325566932456744e-08, "loss": 3.9705, "step": 389500 }, { "epoch": 4.33, "learning_rate": 4.331119649956689e-08, "loss": 4.0004, "step": 390000 }, { "epoch": 4.34, "learning_rate": 4.336672367456633e-08, "loss": 4.0117, "step": 390500 }, { "epoch": 4.34, "learning_rate": 4.342225084956578e-08, "loss": 3.9868, "step": 391000 }, { "epoch": 4.35, "learning_rate": 4.347777802456522e-08, "loss": 3.9959, "step": 391500 }, { "epoch": 4.35, "learning_rate": 4.353330519956466e-08, "loss": 4.006, "step": 392000 }, { "epoch": 4.36, "learning_rate": 4.358883237456411e-08, "loss": 3.9973, "step": 392500 }, { "epoch": 4.36, "learning_rate": 4.3644359549563555e-08, "loss": 3.9842, "step": 393000 }, { "epoch": 4.37, "learning_rate": 4.3699886724563e-08, "loss": 3.993, "step": 393500 }, { "epoch": 4.38, "learning_rate": 4.375541389956244e-08, "loss": 3.9979, "step": 394000 }, { "epoch": 4.38, "learning_rate": 4.3810941074561894e-08, "loss": 3.9982, "step": 394500 }, { "epoch": 4.39, "learning_rate": 4.386646824956133e-08, "loss": 4.0057, "step": 395000 }, { "epoch": 4.39, "learning_rate": 4.3921995424560775e-08, "loss": 3.9749, "step": 395500 }, { "epoch": 4.4, "learning_rate": 4.3977522599560226e-08, "loss": 4.0134, "step": 396000 }, { "epoch": 4.4, "learning_rate": 4.4033049774559664e-08, "loss": 3.9945, "step": 396500 }, { "epoch": 4.41, "learning_rate": 4.4088576949559115e-08, "loss": 3.9779, "step": 397000 }, { "epoch": 4.41, "learning_rate": 4.414410412455856e-08, "loss": 3.9846, "step": 397500 }, { "epoch": 4.42, "learning_rate": 4.4199631299557996e-08, "loss": 4.002, "step": 398000 }, { "epoch": 4.43, "learning_rate": 4.425515847455745e-08, "loss": 3.9968, "step": 398500 }, { "epoch": 4.43, "learning_rate": 4.431068564955689e-08, "loss": 4.0025, "step": 399000 }, { "epoch": 4.44, "learning_rate": 4.4366212824556336e-08, "loss": 3.9933, "step": 399500 }, { "epoch": 4.44, "learning_rate": 4.442173999955578e-08, "loss": 3.9982, "step": 400000 }, { "epoch": 4.45, "learning_rate": 4.447726717455523e-08, "loss": 3.9919, "step": 400500 }, { "epoch": 4.45, "learning_rate": 4.453279434955467e-08, "loss": 3.9767, "step": 401000 }, { "epoch": 4.46, "learning_rate": 4.458832152455411e-08, "loss": 4.0057, "step": 401500 }, { "epoch": 4.46, "learning_rate": 4.464384869955356e-08, "loss": 4.0115, "step": 402000 }, { "epoch": 4.47, "learning_rate": 4.4699375874553e-08, "loss": 3.9792, "step": 402500 }, { "epoch": 4.48, "learning_rate": 4.475490304955245e-08, "loss": 3.9911, "step": 403000 }, { "epoch": 4.48, "learning_rate": 4.4810430224551896e-08, "loss": 3.9975, "step": 403500 }, { "epoch": 4.49, "learning_rate": 4.486595739955134e-08, "loss": 3.9752, "step": 404000 }, { "epoch": 4.49, "learning_rate": 4.4921484574550784e-08, "loss": 3.9821, "step": 404500 }, { "epoch": 4.5, "learning_rate": 4.497701174955023e-08, "loss": 3.986, "step": 405000 }, { "epoch": 4.5, "learning_rate": 4.503253892454967e-08, "loss": 4.0016, "step": 405500 }, { "epoch": 4.51, "learning_rate": 4.5088066099549116e-08, "loss": 3.9924, "step": 406000 }, { "epoch": 4.51, "learning_rate": 4.514359327454856e-08, "loss": 3.9965, "step": 406500 }, { "epoch": 4.52, "learning_rate": 4.5199120449548005e-08, "loss": 3.9853, "step": 407000 }, { "epoch": 4.53, "learning_rate": 4.5254647624547456e-08, "loss": 3.9895, "step": 407500 }, { "epoch": 4.53, "learning_rate": 4.531017479954689e-08, "loss": 3.9945, "step": 408000 }, { "epoch": 4.54, "learning_rate": 4.536570197454634e-08, "loss": 3.9956, "step": 408500 }, { "epoch": 4.54, "learning_rate": 4.542122914954579e-08, "loss": 3.9892, "step": 409000 }, { "epoch": 4.55, "learning_rate": 4.547675632454523e-08, "loss": 3.9985, "step": 409500 }, { "epoch": 4.55, "learning_rate": 4.5532283499544677e-08, "loss": 4.0087, "step": 410000 }, { "epoch": 4.56, "learning_rate": 4.558781067454412e-08, "loss": 3.9899, "step": 410500 }, { "epoch": 4.56, "learning_rate": 4.5643337849543565e-08, "loss": 4.0053, "step": 411000 }, { "epoch": 4.57, "learning_rate": 4.569886502454301e-08, "loss": 3.973, "step": 411500 }, { "epoch": 4.58, "learning_rate": 4.575439219954245e-08, "loss": 3.9917, "step": 412000 }, { "epoch": 4.58, "learning_rate": 4.58099193745419e-08, "loss": 3.9872, "step": 412500 }, { "epoch": 4.59, "learning_rate": 4.586544654954134e-08, "loss": 3.9766, "step": 413000 }, { "epoch": 4.59, "learning_rate": 4.592097372454079e-08, "loss": 3.9831, "step": 413500 }, { "epoch": 4.6, "learning_rate": 4.597650089954023e-08, "loss": 3.9769, "step": 414000 }, { "epoch": 4.6, "learning_rate": 4.6032028074539674e-08, "loss": 3.987, "step": 414500 }, { "epoch": 4.61, "learning_rate": 4.6087555249539125e-08, "loss": 3.9891, "step": 415000 }, { "epoch": 4.61, "learning_rate": 4.614308242453856e-08, "loss": 3.9911, "step": 415500 }, { "epoch": 4.62, "learning_rate": 4.619860959953801e-08, "loss": 3.9969, "step": 416000 }, { "epoch": 4.63, "learning_rate": 4.625413677453746e-08, "loss": 4.0026, "step": 416500 }, { "epoch": 4.63, "learning_rate": 4.63096639495369e-08, "loss": 3.9894, "step": 417000 }, { "epoch": 4.64, "learning_rate": 4.6365191124536346e-08, "loss": 3.9728, "step": 417500 }, { "epoch": 4.64, "learning_rate": 4.642071829953579e-08, "loss": 3.9784, "step": 418000 }, { "epoch": 4.65, "learning_rate": 4.6476245474535234e-08, "loss": 3.9917, "step": 418500 }, { "epoch": 4.65, "learning_rate": 4.653177264953468e-08, "loss": 3.9886, "step": 419000 }, { "epoch": 4.66, "learning_rate": 4.658729982453413e-08, "loss": 3.9813, "step": 419500 }, { "epoch": 4.66, "learning_rate": 4.664282699953357e-08, "loss": 3.9863, "step": 420000 }, { "epoch": 4.67, "learning_rate": 4.669835417453302e-08, "loss": 4.0092, "step": 420500 }, { "epoch": 4.68, "learning_rate": 4.675388134953246e-08, "loss": 3.9895, "step": 421000 }, { "epoch": 4.68, "learning_rate": 4.68094085245319e-08, "loss": 3.9985, "step": 421500 }, { "epoch": 4.69, "learning_rate": 4.686493569953135e-08, "loss": 3.9822, "step": 422000 }, { "epoch": 4.69, "learning_rate": 4.6920462874530794e-08, "loss": 3.9833, "step": 422500 }, { "epoch": 4.7, "learning_rate": 4.697599004953024e-08, "loss": 4.0023, "step": 423000 }, { "epoch": 4.7, "learning_rate": 4.703151722452968e-08, "loss": 3.9726, "step": 423500 }, { "epoch": 4.71, "learning_rate": 4.708704439952913e-08, "loss": 3.956, "step": 424000 }, { "epoch": 4.71, "learning_rate": 4.714257157452857e-08, "loss": 3.9939, "step": 424500 }, { "epoch": 4.72, "learning_rate": 4.7198098749528015e-08, "loss": 3.988, "step": 425000 }, { "epoch": 4.73, "learning_rate": 4.7253625924527466e-08, "loss": 3.9827, "step": 425500 }, { "epoch": 4.73, "learning_rate": 4.7309153099526903e-08, "loss": 3.9837, "step": 426000 }, { "epoch": 4.74, "learning_rate": 4.7364680274526354e-08, "loss": 3.9843, "step": 426500 }, { "epoch": 4.74, "learning_rate": 4.74202074495258e-08, "loss": 3.9812, "step": 427000 }, { "epoch": 4.75, "learning_rate": 4.747573462452524e-08, "loss": 3.9722, "step": 427500 }, { "epoch": 4.75, "learning_rate": 4.753126179952469e-08, "loss": 3.9839, "step": 428000 }, { "epoch": 4.76, "learning_rate": 4.758678897452413e-08, "loss": 3.9772, "step": 428500 }, { "epoch": 4.76, "learning_rate": 4.7642316149523575e-08, "loss": 4.0006, "step": 429000 }, { "epoch": 4.77, "learning_rate": 4.769784332452302e-08, "loss": 3.9973, "step": 429500 }, { "epoch": 4.78, "learning_rate": 4.7753370499522464e-08, "loss": 3.9879, "step": 430000 }, { "epoch": 4.78, "learning_rate": 4.780889767452191e-08, "loss": 3.9749, "step": 430500 }, { "epoch": 4.79, "learning_rate": 4.786442484952135e-08, "loss": 3.9939, "step": 431000 }, { "epoch": 4.79, "learning_rate": 4.7919952024520796e-08, "loss": 3.9714, "step": 431500 }, { "epoch": 4.8, "learning_rate": 4.797547919952024e-08, "loss": 3.9883, "step": 432000 }, { "epoch": 4.8, "learning_rate": 4.803100637451969e-08, "loss": 3.9879, "step": 432500 }, { "epoch": 4.81, "learning_rate": 4.808653354951913e-08, "loss": 3.9729, "step": 433000 }, { "epoch": 4.81, "learning_rate": 4.814206072451858e-08, "loss": 3.9623, "step": 433500 }, { "epoch": 4.82, "learning_rate": 4.8197587899518024e-08, "loss": 3.9784, "step": 434000 }, { "epoch": 4.83, "learning_rate": 4.825311507451746e-08, "loss": 3.9872, "step": 434500 }, { "epoch": 4.83, "learning_rate": 4.830864224951691e-08, "loss": 3.984, "step": 435000 }, { "epoch": 4.84, "learning_rate": 4.8364169424516356e-08, "loss": 3.9745, "step": 435500 }, { "epoch": 4.84, "learning_rate": 4.84196965995158e-08, "loss": 3.9719, "step": 436000 }, { "epoch": 4.85, "learning_rate": 4.8475223774515244e-08, "loss": 3.9711, "step": 436500 }, { "epoch": 4.85, "learning_rate": 4.8530750949514695e-08, "loss": 3.9703, "step": 437000 }, { "epoch": 4.86, "learning_rate": 4.858627812451413e-08, "loss": 3.9586, "step": 437500 }, { "epoch": 4.86, "learning_rate": 4.864180529951358e-08, "loss": 3.9699, "step": 438000 }, { "epoch": 4.87, "learning_rate": 4.869733247451303e-08, "loss": 3.9747, "step": 438500 }, { "epoch": 4.88, "learning_rate": 4.8752859649512465e-08, "loss": 3.9673, "step": 439000 }, { "epoch": 4.88, "learning_rate": 4.8808386824511916e-08, "loss": 3.9681, "step": 439500 }, { "epoch": 4.89, "learning_rate": 4.886391399951136e-08, "loss": 3.96, "step": 440000 }, { "epoch": 4.89, "learning_rate": 4.8919441174510804e-08, "loss": 3.9928, "step": 440500 }, { "epoch": 4.9, "learning_rate": 4.897496834951025e-08, "loss": 3.9976, "step": 441000 }, { "epoch": 4.9, "learning_rate": 4.903049552450969e-08, "loss": 3.9696, "step": 441500 }, { "epoch": 4.91, "learning_rate": 4.908602269950914e-08, "loss": 3.9719, "step": 442000 }, { "epoch": 4.91, "learning_rate": 4.914154987450858e-08, "loss": 3.9623, "step": 442500 }, { "epoch": 4.92, "learning_rate": 4.919707704950803e-08, "loss": 3.9835, "step": 443000 }, { "epoch": 4.93, "learning_rate": 4.925260422450747e-08, "loss": 3.9696, "step": 443500 }, { "epoch": 4.93, "learning_rate": 4.9308131399506914e-08, "loss": 3.9761, "step": 444000 }, { "epoch": 4.94, "learning_rate": 4.9363658574506365e-08, "loss": 3.9787, "step": 444500 }, { "epoch": 4.94, "learning_rate": 4.94191857495058e-08, "loss": 3.9933, "step": 445000 }, { "epoch": 4.95, "learning_rate": 4.947471292450525e-08, "loss": 3.9899, "step": 445500 }, { "epoch": 4.95, "learning_rate": 4.95302400995047e-08, "loss": 3.9763, "step": 446000 }, { "epoch": 4.96, "learning_rate": 4.958576727450414e-08, "loss": 3.9708, "step": 446500 }, { "epoch": 4.96, "learning_rate": 4.9641294449503585e-08, "loss": 3.9889, "step": 447000 }, { "epoch": 4.97, "learning_rate": 4.969682162450303e-08, "loss": 3.9769, "step": 447500 }, { "epoch": 4.98, "learning_rate": 4.9752348799502474e-08, "loss": 3.9738, "step": 448000 }, { "epoch": 4.98, "learning_rate": 4.980787597450192e-08, "loss": 3.9591, "step": 448500 }, { "epoch": 4.99, "learning_rate": 4.986340314950136e-08, "loss": 3.9796, "step": 449000 }, { "epoch": 4.99, "learning_rate": 4.9918930324500806e-08, "loss": 3.9772, "step": 449500 }, { "epoch": 5.0, "learning_rate": 4.997445749950026e-08, "loss": 3.9604, "step": 450000 }, { "epoch": 5.0, "eval_loss": 3.979917526245117, "eval_runtime": 6.3032, "eval_samples_per_second": 246.54, "step": 450230 }, { "epoch": 5.0, "learning_rate": 5.00299846744997e-08, "loss": 3.9679, "step": 450500 }, { "epoch": 5.01, "learning_rate": 5.008551184949914e-08, "loss": 3.9582, "step": 451000 }, { "epoch": 5.01, "learning_rate": 5.014103902449859e-08, "loss": 3.9734, "step": 451500 }, { "epoch": 5.02, "learning_rate": 5.0196566199498034e-08, "loss": 3.9825, "step": 452000 }, { "epoch": 5.03, "learning_rate": 5.025209337449747e-08, "loss": 3.9878, "step": 452500 }, { "epoch": 5.03, "learning_rate": 5.030762054949692e-08, "loss": 3.969, "step": 453000 }, { "epoch": 5.04, "learning_rate": 5.0363147724496366e-08, "loss": 3.9616, "step": 453500 }, { "epoch": 5.04, "learning_rate": 5.0418674899495804e-08, "loss": 3.9926, "step": 454000 }, { "epoch": 5.05, "learning_rate": 5.0474202074495255e-08, "loss": 3.9751, "step": 454500 }, { "epoch": 5.05, "learning_rate": 5.05297292494947e-08, "loss": 3.9791, "step": 455000 }, { "epoch": 5.06, "learning_rate": 5.058525642449415e-08, "loss": 3.9649, "step": 455500 }, { "epoch": 5.06, "learning_rate": 5.064078359949359e-08, "loss": 3.9741, "step": 456000 }, { "epoch": 5.07, "learning_rate": 5.069631077449303e-08, "loss": 3.9608, "step": 456500 }, { "epoch": 5.08, "learning_rate": 5.075183794949248e-08, "loss": 3.9766, "step": 457000 }, { "epoch": 5.08, "learning_rate": 5.080736512449192e-08, "loss": 3.9694, "step": 457500 }, { "epoch": 5.09, "learning_rate": 5.0862892299491364e-08, "loss": 3.9779, "step": 458000 }, { "epoch": 5.09, "learning_rate": 5.0918419474490815e-08, "loss": 3.9594, "step": 458500 }, { "epoch": 5.1, "learning_rate": 5.0973946649490266e-08, "loss": 3.9589, "step": 459000 }, { "epoch": 5.1, "learning_rate": 5.1029473824489697e-08, "loss": 3.9799, "step": 459500 }, { "epoch": 5.11, "learning_rate": 5.108500099948915e-08, "loss": 3.9652, "step": 460000 }, { "epoch": 5.11, "learning_rate": 5.11405281744886e-08, "loss": 3.9576, "step": 460500 }, { "epoch": 5.12, "learning_rate": 5.119605534948803e-08, "loss": 3.9753, "step": 461000 }, { "epoch": 5.13, "learning_rate": 5.125158252448748e-08, "loss": 3.9681, "step": 461500 }, { "epoch": 5.13, "learning_rate": 5.130710969948693e-08, "loss": 3.9559, "step": 462000 }, { "epoch": 5.14, "learning_rate": 5.1362636874486375e-08, "loss": 3.9708, "step": 462500 }, { "epoch": 5.14, "learning_rate": 5.141816404948581e-08, "loss": 3.953, "step": 463000 }, { "epoch": 5.15, "learning_rate": 5.147369122448526e-08, "loss": 3.9638, "step": 463500 }, { "epoch": 5.15, "learning_rate": 5.152921839948471e-08, "loss": 3.9588, "step": 464000 }, { "epoch": 5.16, "learning_rate": 5.1584745574484145e-08, "loss": 3.9676, "step": 464500 }, { "epoch": 5.16, "learning_rate": 5.1640272749483596e-08, "loss": 3.9801, "step": 465000 }, { "epoch": 5.17, "learning_rate": 5.169579992448304e-08, "loss": 3.9834, "step": 465500 }, { "epoch": 5.18, "learning_rate": 5.175132709948249e-08, "loss": 3.9709, "step": 466000 }, { "epoch": 5.18, "learning_rate": 5.180685427448193e-08, "loss": 3.9634, "step": 466500 }, { "epoch": 5.19, "learning_rate": 5.186238144948137e-08, "loss": 3.9807, "step": 467000 }, { "epoch": 5.19, "learning_rate": 5.191790862448082e-08, "loss": 3.9807, "step": 467500 }, { "epoch": 5.2, "learning_rate": 5.197343579948026e-08, "loss": 3.9588, "step": 468000 }, { "epoch": 5.2, "learning_rate": 5.2028962974479705e-08, "loss": 3.9785, "step": 468500 }, { "epoch": 5.21, "learning_rate": 5.2084490149479156e-08, "loss": 3.9755, "step": 469000 }, { "epoch": 5.21, "learning_rate": 5.21400173244786e-08, "loss": 3.9691, "step": 469500 }, { "epoch": 5.22, "learning_rate": 5.219554449947804e-08, "loss": 3.9581, "step": 470000 }, { "epoch": 5.23, "learning_rate": 5.225107167447749e-08, "loss": 3.9749, "step": 470500 }, { "epoch": 5.23, "learning_rate": 5.230659884947693e-08, "loss": 3.9744, "step": 471000 }, { "epoch": 5.24, "learning_rate": 5.236212602447637e-08, "loss": 3.9533, "step": 471500 }, { "epoch": 5.24, "learning_rate": 5.241765319947582e-08, "loss": 3.9659, "step": 472000 }, { "epoch": 5.25, "learning_rate": 5.2473180374475265e-08, "loss": 3.9525, "step": 472500 }, { "epoch": 5.25, "learning_rate": 5.2528707549474716e-08, "loss": 3.9506, "step": 473000 }, { "epoch": 5.26, "learning_rate": 5.2584234724474153e-08, "loss": 3.9397, "step": 473500 }, { "epoch": 5.26, "learning_rate": 5.26397618994736e-08, "loss": 3.9579, "step": 474000 }, { "epoch": 5.27, "learning_rate": 5.269528907447305e-08, "loss": 3.9607, "step": 474500 }, { "epoch": 5.28, "learning_rate": 5.2750816249472486e-08, "loss": 3.9663, "step": 475000 }, { "epoch": 5.28, "learning_rate": 5.280634342447193e-08, "loss": 3.9651, "step": 475500 }, { "epoch": 5.29, "learning_rate": 5.286187059947138e-08, "loss": 3.953, "step": 476000 }, { "epoch": 5.29, "learning_rate": 5.291739777447083e-08, "loss": 3.9471, "step": 476500 }, { "epoch": 5.3, "learning_rate": 5.297292494947026e-08, "loss": 3.9738, "step": 477000 }, { "epoch": 5.3, "learning_rate": 5.3028452124469713e-08, "loss": 3.9717, "step": 477500 }, { "epoch": 5.31, "learning_rate": 5.3083979299469164e-08, "loss": 3.9548, "step": 478000 }, { "epoch": 5.31, "learning_rate": 5.3139506474468595e-08, "loss": 3.9373, "step": 478500 }, { "epoch": 5.32, "learning_rate": 5.3195033649468046e-08, "loss": 3.9567, "step": 479000 }, { "epoch": 5.33, "learning_rate": 5.32505608244675e-08, "loss": 3.9676, "step": 479500 }, { "epoch": 5.33, "learning_rate": 5.330608799946694e-08, "loss": 3.9717, "step": 480000 }, { "epoch": 5.34, "learning_rate": 5.336161517446638e-08, "loss": 3.957, "step": 480500 }, { "epoch": 5.34, "learning_rate": 5.341714234946583e-08, "loss": 3.9736, "step": 481000 }, { "epoch": 5.35, "learning_rate": 5.3472669524465273e-08, "loss": 3.9484, "step": 481500 }, { "epoch": 5.35, "learning_rate": 5.352819669946471e-08, "loss": 3.9439, "step": 482000 }, { "epoch": 5.36, "learning_rate": 5.358372387446416e-08, "loss": 3.9685, "step": 482500 }, { "epoch": 5.36, "learning_rate": 5.3639251049463606e-08, "loss": 3.9692, "step": 483000 }, { "epoch": 5.37, "learning_rate": 5.3694778224463044e-08, "loss": 3.9468, "step": 483500 }, { "epoch": 5.38, "learning_rate": 5.3750305399462494e-08, "loss": 3.947, "step": 484000 }, { "epoch": 5.38, "learning_rate": 5.380583257446194e-08, "loss": 3.9577, "step": 484500 }, { "epoch": 5.39, "learning_rate": 5.386135974946139e-08, "loss": 3.9634, "step": 485000 }, { "epoch": 5.39, "learning_rate": 5.391688692446083e-08, "loss": 3.9416, "step": 485500 }, { "epoch": 5.4, "learning_rate": 5.397241409946027e-08, "loss": 3.962, "step": 486000 }, { "epoch": 5.4, "learning_rate": 5.402794127445972e-08, "loss": 3.9677, "step": 486500 }, { "epoch": 5.41, "learning_rate": 5.408346844945916e-08, "loss": 3.9538, "step": 487000 }, { "epoch": 5.41, "learning_rate": 5.4138995624458604e-08, "loss": 3.9689, "step": 487500 }, { "epoch": 5.42, "learning_rate": 5.4194522799458054e-08, "loss": 3.9649, "step": 488000 }, { "epoch": 5.43, "learning_rate": 5.42500499744575e-08, "loss": 3.956, "step": 488500 }, { "epoch": 5.43, "learning_rate": 5.4305577149456936e-08, "loss": 3.964, "step": 489000 }, { "epoch": 5.44, "learning_rate": 5.436110432445639e-08, "loss": 3.9513, "step": 489500 }, { "epoch": 5.44, "learning_rate": 5.441663149945583e-08, "loss": 3.9641, "step": 490000 }, { "epoch": 5.45, "learning_rate": 5.447215867445527e-08, "loss": 3.9624, "step": 490500 }, { "epoch": 5.45, "learning_rate": 5.452768584945472e-08, "loss": 3.9448, "step": 491000 }, { "epoch": 5.46, "learning_rate": 5.4583213024454164e-08, "loss": 3.9784, "step": 491500 }, { "epoch": 5.46, "learning_rate": 5.4638740199453614e-08, "loss": 3.9588, "step": 492000 }, { "epoch": 5.47, "learning_rate": 5.469426737445305e-08, "loss": 3.9761, "step": 492500 }, { "epoch": 5.47, "learning_rate": 5.4749794549452496e-08, "loss": 3.9593, "step": 493000 }, { "epoch": 5.48, "learning_rate": 5.480532172445195e-08, "loss": 3.9539, "step": 493500 }, { "epoch": 5.49, "learning_rate": 5.4860848899451385e-08, "loss": 3.9435, "step": 494000 }, { "epoch": 5.49, "learning_rate": 5.491637607445083e-08, "loss": 3.9597, "step": 494500 }, { "epoch": 5.5, "learning_rate": 5.497190324945028e-08, "loss": 3.962, "step": 495000 }, { "epoch": 5.5, "learning_rate": 5.502743042444973e-08, "loss": 3.9746, "step": 495500 }, { "epoch": 5.51, "learning_rate": 5.508295759944917e-08, "loss": 3.9431, "step": 496000 }, { "epoch": 5.51, "learning_rate": 5.513848477444861e-08, "loss": 3.948, "step": 496500 }, { "epoch": 5.52, "learning_rate": 5.519401194944806e-08, "loss": 3.9546, "step": 497000 }, { "epoch": 5.52, "learning_rate": 5.52495391244475e-08, "loss": 3.9528, "step": 497500 }, { "epoch": 5.53, "learning_rate": 5.5305066299446945e-08, "loss": 3.9582, "step": 498000 }, { "epoch": 5.54, "learning_rate": 5.5360593474446395e-08, "loss": 3.9466, "step": 498500 }, { "epoch": 5.54, "learning_rate": 5.541612064944584e-08, "loss": 3.9822, "step": 499000 }, { "epoch": 5.55, "learning_rate": 5.547164782444528e-08, "loss": 3.9714, "step": 499500 }, { "epoch": 5.55, "learning_rate": 5.552717499944473e-08, "loss": 3.9581, "step": 500000 }, { "epoch": 5.56, "learning_rate": 5.558270217444417e-08, "loss": 3.9612, "step": 500500 }, { "epoch": 5.56, "learning_rate": 5.563822934944361e-08, "loss": 3.9731, "step": 501000 }, { "epoch": 5.57, "learning_rate": 5.569375652444306e-08, "loss": 3.9718, "step": 501500 }, { "epoch": 5.57, "learning_rate": 5.5749283699442505e-08, "loss": 3.9534, "step": 502000 }, { "epoch": 5.58, "learning_rate": 5.5804810874441955e-08, "loss": 3.9642, "step": 502500 }, { "epoch": 5.59, "learning_rate": 5.586033804944139e-08, "loss": 3.9509, "step": 503000 }, { "epoch": 5.59, "learning_rate": 5.591586522444084e-08, "loss": 3.9404, "step": 503500 }, { "epoch": 5.6, "learning_rate": 5.597139239944029e-08, "loss": 3.941, "step": 504000 }, { "epoch": 5.6, "learning_rate": 5.6026919574439726e-08, "loss": 3.9572, "step": 504500 }, { "epoch": 5.61, "learning_rate": 5.608244674943917e-08, "loss": 3.9336, "step": 505000 }, { "epoch": 5.61, "learning_rate": 5.613797392443862e-08, "loss": 3.9493, "step": 505500 }, { "epoch": 5.62, "learning_rate": 5.6193501099438065e-08, "loss": 3.9511, "step": 506000 }, { "epoch": 5.62, "learning_rate": 5.62490282744375e-08, "loss": 3.937, "step": 506500 }, { "epoch": 5.63, "learning_rate": 5.630455544943695e-08, "loss": 3.9533, "step": 507000 }, { "epoch": 5.64, "learning_rate": 5.63600826244364e-08, "loss": 3.9673, "step": 507500 }, { "epoch": 5.64, "learning_rate": 5.6415609799435835e-08, "loss": 3.9642, "step": 508000 }, { "epoch": 5.65, "learning_rate": 5.6471136974435286e-08, "loss": 3.9343, "step": 508500 }, { "epoch": 5.65, "learning_rate": 5.652666414943473e-08, "loss": 3.9659, "step": 509000 }, { "epoch": 5.66, "learning_rate": 5.658219132443418e-08, "loss": 3.9633, "step": 509500 }, { "epoch": 5.66, "learning_rate": 5.663771849943362e-08, "loss": 3.957, "step": 510000 }, { "epoch": 5.67, "learning_rate": 5.669324567443306e-08, "loss": 3.9357, "step": 510500 }, { "epoch": 5.67, "learning_rate": 5.674877284943251e-08, "loss": 3.9515, "step": 511000 }, { "epoch": 5.68, "learning_rate": 5.680430002443195e-08, "loss": 3.9693, "step": 511500 }, { "epoch": 5.69, "learning_rate": 5.68598271994314e-08, "loss": 3.9498, "step": 512000 }, { "epoch": 5.69, "learning_rate": 5.6915354374430846e-08, "loss": 3.9483, "step": 512500 }, { "epoch": 5.7, "learning_rate": 5.6970881549430296e-08, "loss": 3.96, "step": 513000 }, { "epoch": 5.7, "learning_rate": 5.7026408724429734e-08, "loss": 3.9513, "step": 513500 }, { "epoch": 5.71, "learning_rate": 5.708193589942918e-08, "loss": 3.94, "step": 514000 }, { "epoch": 5.71, "learning_rate": 5.713746307442863e-08, "loss": 3.9433, "step": 514500 }, { "epoch": 5.72, "learning_rate": 5.7192990249428067e-08, "loss": 3.938, "step": 515000 }, { "epoch": 5.72, "learning_rate": 5.724851742442751e-08, "loss": 3.944, "step": 515500 }, { "epoch": 5.73, "learning_rate": 5.730404459942696e-08, "loss": 3.9501, "step": 516000 }, { "epoch": 5.74, "learning_rate": 5.73595717744264e-08, "loss": 3.954, "step": 516500 }, { "epoch": 5.74, "learning_rate": 5.741509894942584e-08, "loss": 3.9366, "step": 517000 }, { "epoch": 5.75, "learning_rate": 5.7470626124425294e-08, "loss": 3.9513, "step": 517500 }, { "epoch": 5.75, "learning_rate": 5.752615329942474e-08, "loss": 3.9388, "step": 518000 }, { "epoch": 5.76, "learning_rate": 5.7581680474424176e-08, "loss": 3.9471, "step": 518500 }, { "epoch": 5.76, "learning_rate": 5.7637207649423627e-08, "loss": 3.9411, "step": 519000 }, { "epoch": 5.77, "learning_rate": 5.769273482442307e-08, "loss": 3.9785, "step": 519500 }, { "epoch": 5.77, "learning_rate": 5.774826199942251e-08, "loss": 3.9397, "step": 520000 }, { "epoch": 5.78, "learning_rate": 5.780378917442196e-08, "loss": 3.9626, "step": 520500 }, { "epoch": 5.79, "learning_rate": 5.78593163494214e-08, "loss": 3.9436, "step": 521000 }, { "epoch": 5.79, "learning_rate": 5.7914843524420854e-08, "loss": 3.9672, "step": 521500 }, { "epoch": 5.8, "learning_rate": 5.797037069942029e-08, "loss": 3.949, "step": 522000 }, { "epoch": 5.8, "learning_rate": 5.8025897874419736e-08, "loss": 3.9333, "step": 522500 }, { "epoch": 5.81, "learning_rate": 5.8081425049419187e-08, "loss": 3.963, "step": 523000 }, { "epoch": 5.81, "learning_rate": 5.8136952224418624e-08, "loss": 3.9595, "step": 523500 }, { "epoch": 5.82, "learning_rate": 5.819247939941807e-08, "loss": 3.9429, "step": 524000 }, { "epoch": 5.82, "learning_rate": 5.824800657441752e-08, "loss": 3.9546, "step": 524500 }, { "epoch": 5.83, "learning_rate": 5.8303533749416963e-08, "loss": 3.935, "step": 525000 }, { "epoch": 5.84, "learning_rate": 5.83590609244164e-08, "loss": 3.9467, "step": 525500 }, { "epoch": 5.84, "learning_rate": 5.841458809941585e-08, "loss": 3.9507, "step": 526000 }, { "epoch": 5.85, "learning_rate": 5.84701152744153e-08, "loss": 3.9562, "step": 526500 }, { "epoch": 5.85, "learning_rate": 5.8525642449414733e-08, "loss": 3.9703, "step": 527000 }, { "epoch": 5.86, "learning_rate": 5.8581169624414184e-08, "loss": 3.9301, "step": 527500 }, { "epoch": 5.86, "learning_rate": 5.8636696799413635e-08, "loss": 3.9348, "step": 528000 }, { "epoch": 5.87, "learning_rate": 5.869222397441308e-08, "loss": 3.9399, "step": 528500 }, { "epoch": 5.87, "learning_rate": 5.874775114941252e-08, "loss": 3.967, "step": 529000 }, { "epoch": 5.88, "learning_rate": 5.880327832441197e-08, "loss": 3.9575, "step": 529500 }, { "epoch": 5.89, "learning_rate": 5.885880549941141e-08, "loss": 3.9382, "step": 530000 }, { "epoch": 5.89, "learning_rate": 5.891433267441085e-08, "loss": 3.9322, "step": 530500 }, { "epoch": 5.9, "learning_rate": 5.89698598494103e-08, "loss": 3.9357, "step": 531000 }, { "epoch": 5.9, "learning_rate": 5.9025387024409744e-08, "loss": 3.9619, "step": 531500 }, { "epoch": 5.91, "learning_rate": 5.9080914199409195e-08, "loss": 3.9453, "step": 532000 }, { "epoch": 5.91, "learning_rate": 5.913644137440863e-08, "loss": 3.9507, "step": 532500 }, { "epoch": 5.92, "learning_rate": 5.919196854940808e-08, "loss": 3.9565, "step": 533000 }, { "epoch": 5.92, "learning_rate": 5.924749572440753e-08, "loss": 3.9495, "step": 533500 }, { "epoch": 5.93, "learning_rate": 5.9303022899406965e-08, "loss": 3.9427, "step": 534000 }, { "epoch": 5.94, "learning_rate": 5.935855007440641e-08, "loss": 3.9472, "step": 534500 }, { "epoch": 5.94, "learning_rate": 5.941407724940586e-08, "loss": 3.9472, "step": 535000 }, { "epoch": 5.95, "learning_rate": 5.9469604424405304e-08, "loss": 3.9411, "step": 535500 }, { "epoch": 5.95, "learning_rate": 5.952513159940474e-08, "loss": 3.9418, "step": 536000 }, { "epoch": 5.96, "learning_rate": 5.958065877440419e-08, "loss": 3.9293, "step": 536500 }, { "epoch": 5.96, "learning_rate": 5.963618594940364e-08, "loss": 3.9538, "step": 537000 }, { "epoch": 5.97, "learning_rate": 5.969171312440307e-08, "loss": 3.9491, "step": 537500 }, { "epoch": 5.97, "learning_rate": 5.974724029940252e-08, "loss": 3.9373, "step": 538000 }, { "epoch": 5.98, "learning_rate": 5.980276747440198e-08, "loss": 3.9537, "step": 538500 }, { "epoch": 5.99, "learning_rate": 5.985829464940142e-08, "loss": 3.9417, "step": 539000 }, { "epoch": 5.99, "learning_rate": 5.991382182440085e-08, "loss": 3.9653, "step": 539500 }, { "epoch": 6.0, "learning_rate": 5.996934899940031e-08, "loss": 3.9557, "step": 540000 }, { "epoch": 6.0, "eval_loss": 3.9568018913269043, "eval_runtime": 6.3151, "eval_samples_per_second": 246.077, "step": 540276 }, { "epoch": 6.0, "learning_rate": 6.002487617439975e-08, "loss": 3.9361, "step": 540500 }, { "epoch": 6.01, "learning_rate": 6.008040334939918e-08, "loss": 3.945, "step": 541000 }, { "epoch": 6.01, "learning_rate": 6.013593052439864e-08, "loss": 3.9389, "step": 541500 }, { "epoch": 6.02, "learning_rate": 6.019145769939809e-08, "loss": 3.9583, "step": 542000 }, { "epoch": 6.02, "learning_rate": 6.024698487439753e-08, "loss": 3.9412, "step": 542500 }, { "epoch": 6.03, "learning_rate": 6.030251204939697e-08, "loss": 3.9477, "step": 543000 }, { "epoch": 6.04, "learning_rate": 6.035803922439642e-08, "loss": 3.9518, "step": 543500 }, { "epoch": 6.04, "learning_rate": 6.041356639939586e-08, "loss": 3.9443, "step": 544000 }, { "epoch": 6.05, "learning_rate": 6.04690935743953e-08, "loss": 3.9274, "step": 544500 }, { "epoch": 6.05, "learning_rate": 6.052462074939475e-08, "loss": 3.9232, "step": 545000 }, { "epoch": 6.06, "learning_rate": 6.05801479243942e-08, "loss": 3.9389, "step": 545500 }, { "epoch": 6.06, "learning_rate": 6.063567509939364e-08, "loss": 3.9506, "step": 546000 }, { "epoch": 6.07, "learning_rate": 6.069120227439308e-08, "loss": 3.9553, "step": 546500 }, { "epoch": 6.07, "learning_rate": 6.074672944939253e-08, "loss": 3.9612, "step": 547000 }, { "epoch": 6.08, "learning_rate": 6.080225662439198e-08, "loss": 3.94, "step": 547500 }, { "epoch": 6.09, "learning_rate": 6.085778379939142e-08, "loss": 3.953, "step": 548000 }, { "epoch": 6.09, "learning_rate": 6.091331097439086e-08, "loss": 3.9462, "step": 548500 }, { "epoch": 6.1, "learning_rate": 6.096883814939032e-08, "loss": 3.9316, "step": 549000 }, { "epoch": 6.1, "learning_rate": 6.102436532438975e-08, "loss": 3.9408, "step": 549500 }, { "epoch": 6.11, "learning_rate": 6.107989249938919e-08, "loss": 3.9347, "step": 550000 }, { "epoch": 6.11, "learning_rate": 6.113541967438865e-08, "loss": 3.9506, "step": 550500 }, { "epoch": 6.12, "learning_rate": 6.11909468493881e-08, "loss": 3.9241, "step": 551000 }, { "epoch": 6.12, "learning_rate": 6.124647402438752e-08, "loss": 3.9242, "step": 551500 }, { "epoch": 6.13, "learning_rate": 6.130200119938698e-08, "loss": 3.941, "step": 552000 }, { "epoch": 6.14, "learning_rate": 6.135752837438643e-08, "loss": 3.9496, "step": 552500 }, { "epoch": 6.14, "learning_rate": 6.141305554938586e-08, "loss": 3.9441, "step": 553000 }, { "epoch": 6.15, "learning_rate": 6.146858272438531e-08, "loss": 3.9308, "step": 553500 }, { "epoch": 6.15, "learning_rate": 6.152410989938476e-08, "loss": 3.9358, "step": 554000 }, { "epoch": 6.16, "learning_rate": 6.15796370743842e-08, "loss": 3.9401, "step": 554500 }, { "epoch": 6.16, "learning_rate": 6.163516424938365e-08, "loss": 3.9411, "step": 555000 }, { "epoch": 6.17, "learning_rate": 6.169069142438309e-08, "loss": 3.9419, "step": 555500 }, { "epoch": 6.17, "learning_rate": 6.174621859938254e-08, "loss": 3.9487, "step": 556000 }, { "epoch": 6.18, "learning_rate": 6.180174577438198e-08, "loss": 3.952, "step": 556500 }, { "epoch": 6.19, "learning_rate": 6.185727294938142e-08, "loss": 3.9228, "step": 557000 }, { "epoch": 6.19, "learning_rate": 6.191280012438087e-08, "loss": 3.9396, "step": 557500 }, { "epoch": 6.2, "learning_rate": 6.196832729938031e-08, "loss": 3.9284, "step": 558000 }, { "epoch": 6.2, "learning_rate": 6.202385447437976e-08, "loss": 3.9472, "step": 558500 }, { "epoch": 6.21, "learning_rate": 6.20793816493792e-08, "loss": 3.9273, "step": 559000 }, { "epoch": 6.21, "learning_rate": 6.213490882437864e-08, "loss": 3.9503, "step": 559500 }, { "epoch": 6.22, "learning_rate": 6.219043599937809e-08, "loss": 3.9372, "step": 560000 }, { "epoch": 6.22, "learning_rate": 6.224596317437753e-08, "loss": 3.9395, "step": 560500 }, { "epoch": 6.23, "learning_rate": 6.230149034937698e-08, "loss": 3.9382, "step": 561000 }, { "epoch": 6.24, "learning_rate": 6.235701752437643e-08, "loss": 3.9413, "step": 561500 }, { "epoch": 6.24, "learning_rate": 6.241254469937587e-08, "loss": 3.9435, "step": 562000 }, { "epoch": 6.25, "learning_rate": 6.246807187437532e-08, "loss": 3.9339, "step": 562500 }, { "epoch": 6.25, "learning_rate": 6.252359904937477e-08, "loss": 3.9205, "step": 563000 }, { "epoch": 6.26, "learning_rate": 6.25791262243742e-08, "loss": 3.9363, "step": 563500 }, { "epoch": 6.26, "learning_rate": 6.263465339937366e-08, "loss": 3.9355, "step": 564000 }, { "epoch": 6.27, "learning_rate": 6.26901805743731e-08, "loss": 3.9259, "step": 564500 }, { "epoch": 6.27, "learning_rate": 6.274570774937254e-08, "loss": 3.9428, "step": 565000 }, { "epoch": 6.28, "learning_rate": 6.280123492437199e-08, "loss": 3.9435, "step": 565500 }, { "epoch": 6.29, "learning_rate": 6.285676209937143e-08, "loss": 3.9427, "step": 566000 }, { "epoch": 6.29, "learning_rate": 6.291228927437088e-08, "loss": 3.9305, "step": 566500 }, { "epoch": 6.3, "learning_rate": 6.296781644937032e-08, "loss": 3.9313, "step": 567000 }, { "epoch": 6.3, "learning_rate": 6.302334362436976e-08, "loss": 3.9337, "step": 567500 }, { "epoch": 6.31, "learning_rate": 6.307887079936921e-08, "loss": 3.926, "step": 568000 }, { "epoch": 6.31, "learning_rate": 6.313439797436865e-08, "loss": 3.9373, "step": 568500 }, { "epoch": 6.32, "learning_rate": 6.31899251493681e-08, "loss": 3.9312, "step": 569000 }, { "epoch": 6.32, "learning_rate": 6.324545232436754e-08, "loss": 3.9607, "step": 569500 }, { "epoch": 6.33, "learning_rate": 6.330097949936699e-08, "loss": 3.9486, "step": 570000 }, { "epoch": 6.34, "learning_rate": 6.335650667436643e-08, "loss": 3.9454, "step": 570500 }, { "epoch": 6.34, "learning_rate": 6.341203384936587e-08, "loss": 3.9454, "step": 571000 }, { "epoch": 6.35, "learning_rate": 6.346756102436532e-08, "loss": 3.9355, "step": 571500 }, { "epoch": 6.35, "learning_rate": 6.352308819936478e-08, "loss": 3.9447, "step": 572000 }, { "epoch": 6.36, "learning_rate": 6.357861537436421e-08, "loss": 3.946, "step": 572500 }, { "epoch": 6.36, "learning_rate": 6.363414254936365e-08, "loss": 3.9447, "step": 573000 }, { "epoch": 6.37, "learning_rate": 6.368966972436311e-08, "loss": 3.9245, "step": 573500 }, { "epoch": 6.37, "learning_rate": 6.374519689936254e-08, "loss": 3.9139, "step": 574000 }, { "epoch": 6.38, "learning_rate": 6.380072407436198e-08, "loss": 3.9303, "step": 574500 }, { "epoch": 6.39, "learning_rate": 6.385625124936144e-08, "loss": 3.9308, "step": 575000 }, { "epoch": 6.39, "learning_rate": 6.391177842436088e-08, "loss": 3.9342, "step": 575500 }, { "epoch": 6.4, "learning_rate": 6.396730559936032e-08, "loss": 3.9615, "step": 576000 }, { "epoch": 6.4, "learning_rate": 6.402283277435977e-08, "loss": 3.9385, "step": 576500 }, { "epoch": 6.41, "learning_rate": 6.407835994935922e-08, "loss": 3.9267, "step": 577000 }, { "epoch": 6.41, "learning_rate": 6.413388712435865e-08, "loss": 3.9361, "step": 577500 }, { "epoch": 6.42, "learning_rate": 6.41894142993581e-08, "loss": 3.9193, "step": 578000 }, { "epoch": 6.42, "learning_rate": 6.424494147435755e-08, "loss": 3.9314, "step": 578500 }, { "epoch": 6.43, "learning_rate": 6.430046864935698e-08, "loss": 3.9402, "step": 579000 }, { "epoch": 6.44, "learning_rate": 6.435599582435644e-08, "loss": 3.9199, "step": 579500 }, { "epoch": 6.44, "learning_rate": 6.441152299935588e-08, "loss": 3.9192, "step": 580000 }, { "epoch": 6.45, "learning_rate": 6.446705017435533e-08, "loss": 3.9321, "step": 580500 }, { "epoch": 6.45, "learning_rate": 6.452257734935477e-08, "loss": 3.937, "step": 581000 }, { "epoch": 6.46, "learning_rate": 6.457810452435422e-08, "loss": 3.9252, "step": 581500 }, { "epoch": 6.46, "learning_rate": 6.463363169935366e-08, "loss": 3.9259, "step": 582000 }, { "epoch": 6.47, "learning_rate": 6.46891588743531e-08, "loss": 3.9177, "step": 582500 }, { "epoch": 6.47, "learning_rate": 6.474468604935255e-08, "loss": 3.9188, "step": 583000 }, { "epoch": 6.48, "learning_rate": 6.480021322435199e-08, "loss": 3.9307, "step": 583500 }, { "epoch": 6.49, "learning_rate": 6.485574039935145e-08, "loss": 3.9509, "step": 584000 }, { "epoch": 6.49, "learning_rate": 6.491126757435088e-08, "loss": 3.9356, "step": 584500 }, { "epoch": 6.5, "learning_rate": 6.496679474935032e-08, "loss": 3.9305, "step": 585000 }, { "epoch": 6.5, "learning_rate": 6.502232192434978e-08, "loss": 3.9323, "step": 585500 }, { "epoch": 6.51, "learning_rate": 6.507784909934921e-08, "loss": 3.9217, "step": 586000 }, { "epoch": 6.51, "learning_rate": 6.513337627434866e-08, "loss": 3.9264, "step": 586500 }, { "epoch": 6.52, "learning_rate": 6.518890344934811e-08, "loss": 3.9398, "step": 587000 }, { "epoch": 6.52, "learning_rate": 6.524443062434756e-08, "loss": 3.9205, "step": 587500 }, { "epoch": 6.53, "learning_rate": 6.529995779934699e-08, "loss": 3.937, "step": 588000 }, { "epoch": 6.54, "learning_rate": 6.535548497434645e-08, "loss": 3.9312, "step": 588500 }, { "epoch": 6.54, "learning_rate": 6.541101214934589e-08, "loss": 3.9102, "step": 589000 }, { "epoch": 6.55, "learning_rate": 6.546653932434532e-08, "loss": 3.9319, "step": 589500 }, { "epoch": 6.55, "learning_rate": 6.552206649934478e-08, "loss": 3.9355, "step": 590000 }, { "epoch": 6.56, "learning_rate": 6.557759367434422e-08, "loss": 3.9361, "step": 590500 }, { "epoch": 6.56, "learning_rate": 6.563312084934367e-08, "loss": 3.9254, "step": 591000 }, { "epoch": 6.57, "learning_rate": 6.568864802434311e-08, "loss": 3.9286, "step": 591500 }, { "epoch": 6.57, "learning_rate": 6.574417519934256e-08, "loss": 3.9225, "step": 592000 }, { "epoch": 6.58, "learning_rate": 6.5799702374342e-08, "loss": 3.9242, "step": 592500 }, { "epoch": 6.59, "learning_rate": 6.585522954934144e-08, "loss": 3.9423, "step": 593000 }, { "epoch": 6.59, "learning_rate": 6.591075672434089e-08, "loss": 3.9053, "step": 593500 }, { "epoch": 6.6, "learning_rate": 6.596628389934033e-08, "loss": 3.9128, "step": 594000 }, { "epoch": 6.6, "learning_rate": 6.602181107433979e-08, "loss": 3.9311, "step": 594500 }, { "epoch": 6.61, "learning_rate": 6.607733824933922e-08, "loss": 3.9216, "step": 595000 }, { "epoch": 6.61, "learning_rate": 6.613286542433867e-08, "loss": 3.9226, "step": 595500 }, { "epoch": 6.62, "learning_rate": 6.618839259933812e-08, "loss": 3.929, "step": 596000 }, { "epoch": 6.62, "learning_rate": 6.624391977433755e-08, "loss": 3.9284, "step": 596500 }, { "epoch": 6.63, "learning_rate": 6.6299446949337e-08, "loss": 3.9281, "step": 597000 }, { "epoch": 6.64, "learning_rate": 6.635497412433646e-08, "loss": 3.9203, "step": 597500 }, { "epoch": 6.64, "learning_rate": 6.64105012993359e-08, "loss": 3.9338, "step": 598000 }, { "epoch": 6.65, "learning_rate": 6.646602847433533e-08, "loss": 3.9226, "step": 598500 }, { "epoch": 6.65, "learning_rate": 6.652155564933479e-08, "loss": 3.9085, "step": 599000 }, { "epoch": 6.66, "learning_rate": 6.657708282433423e-08, "loss": 3.9467, "step": 599500 }, { "epoch": 6.66, "learning_rate": 6.663260999933366e-08, "loss": 3.9172, "step": 600000 }, { "epoch": 6.67, "learning_rate": 6.668813717433312e-08, "loss": 3.932, "step": 600500 }, { "epoch": 6.67, "learning_rate": 6.674366434933256e-08, "loss": 3.9004, "step": 601000 }, { "epoch": 6.68, "learning_rate": 6.679919152433201e-08, "loss": 3.916, "step": 601500 }, { "epoch": 6.69, "learning_rate": 6.685471869933145e-08, "loss": 3.9365, "step": 602000 }, { "epoch": 6.69, "learning_rate": 6.69102458743309e-08, "loss": 3.9196, "step": 602500 }, { "epoch": 6.7, "learning_rate": 6.696577304933034e-08, "loss": 3.9324, "step": 603000 }, { "epoch": 6.7, "learning_rate": 6.702130022432979e-08, "loss": 3.9126, "step": 603500 }, { "epoch": 6.71, "learning_rate": 6.707682739932923e-08, "loss": 3.9314, "step": 604000 }, { "epoch": 6.71, "learning_rate": 6.713235457432867e-08, "loss": 3.9297, "step": 604500 }, { "epoch": 6.72, "learning_rate": 6.718788174932812e-08, "loss": 3.9211, "step": 605000 }, { "epoch": 6.72, "learning_rate": 6.724340892432756e-08, "loss": 3.9295, "step": 605500 }, { "epoch": 6.73, "learning_rate": 6.7298936099327e-08, "loss": 3.9194, "step": 606000 }, { "epoch": 6.74, "learning_rate": 6.735446327432645e-08, "loss": 3.9063, "step": 606500 }, { "epoch": 6.74, "learning_rate": 6.74099904493259e-08, "loss": 3.9325, "step": 607000 }, { "epoch": 6.75, "learning_rate": 6.746551762432534e-08, "loss": 3.9129, "step": 607500 }, { "epoch": 6.75, "learning_rate": 6.752104479932478e-08, "loss": 3.922, "step": 608000 }, { "epoch": 6.76, "learning_rate": 6.757657197432423e-08, "loss": 3.937, "step": 608500 }, { "epoch": 6.76, "learning_rate": 6.763209914932367e-08, "loss": 3.9429, "step": 609000 }, { "epoch": 6.77, "learning_rate": 6.768762632432312e-08, "loss": 3.9045, "step": 609500 }, { "epoch": 6.77, "learning_rate": 6.774315349932257e-08, "loss": 3.9464, "step": 610000 }, { "epoch": 6.78, "learning_rate": 6.7798680674322e-08, "loss": 3.9216, "step": 610500 }, { "epoch": 6.79, "learning_rate": 6.785420784932145e-08, "loss": 3.9371, "step": 611000 }, { "epoch": 6.79, "learning_rate": 6.79097350243209e-08, "loss": 3.9265, "step": 611500 }, { "epoch": 6.8, "learning_rate": 6.796526219932034e-08, "loss": 3.938, "step": 612000 }, { "epoch": 6.8, "learning_rate": 6.802078937431978e-08, "loss": 3.9481, "step": 612500 }, { "epoch": 6.81, "learning_rate": 6.807631654931924e-08, "loss": 3.9402, "step": 613000 }, { "epoch": 6.81, "learning_rate": 6.813184372431868e-08, "loss": 3.9236, "step": 613500 }, { "epoch": 6.82, "learning_rate": 6.818737089931811e-08, "loss": 3.9269, "step": 614000 }, { "epoch": 6.82, "learning_rate": 6.824289807431757e-08, "loss": 3.9182, "step": 614500 }, { "epoch": 6.83, "learning_rate": 6.829842524931701e-08, "loss": 3.9493, "step": 615000 }, { "epoch": 6.84, "learning_rate": 6.835395242431645e-08, "loss": 3.9467, "step": 615500 }, { "epoch": 6.84, "learning_rate": 6.84094795993159e-08, "loss": 3.9133, "step": 616000 }, { "epoch": 6.85, "learning_rate": 6.846500677431535e-08, "loss": 3.9211, "step": 616500 }, { "epoch": 6.85, "learning_rate": 6.852053394931479e-08, "loss": 3.933, "step": 617000 }, { "epoch": 6.86, "learning_rate": 6.857606112431424e-08, "loss": 3.9194, "step": 617500 }, { "epoch": 6.86, "learning_rate": 6.863158829931368e-08, "loss": 3.9159, "step": 618000 }, { "epoch": 6.87, "learning_rate": 6.868711547431312e-08, "loss": 3.9272, "step": 618500 }, { "epoch": 6.87, "learning_rate": 6.874264264931257e-08, "loss": 3.9273, "step": 619000 }, { "epoch": 6.88, "learning_rate": 6.879816982431201e-08, "loss": 3.9304, "step": 619500 }, { "epoch": 6.89, "learning_rate": 6.885369699931146e-08, "loss": 3.9261, "step": 620000 }, { "epoch": 6.89, "learning_rate": 6.890922417431091e-08, "loss": 3.9158, "step": 620500 }, { "epoch": 6.9, "learning_rate": 6.896475134931034e-08, "loss": 3.929, "step": 621000 }, { "epoch": 6.9, "learning_rate": 6.902027852430979e-08, "loss": 3.916, "step": 621500 }, { "epoch": 6.91, "learning_rate": 6.907580569930925e-08, "loss": 3.9179, "step": 622000 }, { "epoch": 6.91, "learning_rate": 6.913133287430868e-08, "loss": 3.9178, "step": 622500 }, { "epoch": 6.92, "learning_rate": 6.918686004930812e-08, "loss": 3.9373, "step": 623000 }, { "epoch": 6.92, "learning_rate": 6.924238722430758e-08, "loss": 3.929, "step": 623500 }, { "epoch": 6.93, "learning_rate": 6.929791439930702e-08, "loss": 3.9087, "step": 624000 }, { "epoch": 6.94, "learning_rate": 6.935344157430645e-08, "loss": 3.9404, "step": 624500 }, { "epoch": 6.94, "learning_rate": 6.940896874930591e-08, "loss": 3.9256, "step": 625000 }, { "epoch": 6.95, "learning_rate": 6.946449592430536e-08, "loss": 3.9075, "step": 625500 }, { "epoch": 6.95, "learning_rate": 6.952002309930479e-08, "loss": 3.9193, "step": 626000 }, { "epoch": 6.96, "learning_rate": 6.957555027430424e-08, "loss": 3.9285, "step": 626500 }, { "epoch": 6.96, "learning_rate": 6.963107744930369e-08, "loss": 3.9006, "step": 627000 }, { "epoch": 6.97, "learning_rate": 6.968660462430313e-08, "loss": 3.9346, "step": 627500 }, { "epoch": 6.97, "learning_rate": 6.974213179930258e-08, "loss": 3.9115, "step": 628000 }, { "epoch": 6.98, "learning_rate": 6.979765897430202e-08, "loss": 3.9158, "step": 628500 }, { "epoch": 6.99, "learning_rate": 6.985318614930146e-08, "loss": 3.8951, "step": 629000 }, { "epoch": 6.99, "learning_rate": 6.990871332430091e-08, "loss": 3.9125, "step": 629500 }, { "epoch": 7.0, "learning_rate": 6.996424049930035e-08, "loss": 3.9119, "step": 630000 }, { "epoch": 7.0, "eval_loss": 3.940045118331909, "eval_runtime": 6.3095, "eval_samples_per_second": 246.294, "step": 630322 }, { "epoch": 7.0, "learning_rate": 7.00197676742998e-08, "loss": 3.9333, "step": 630500 }, { "epoch": 7.01, "learning_rate": 7.007529484929925e-08, "loss": 3.9177, "step": 631000 }, { "epoch": 7.01, "learning_rate": 7.013082202429869e-08, "loss": 3.915, "step": 631500 }, { "epoch": 7.02, "learning_rate": 7.018634919929813e-08, "loss": 3.9332, "step": 632000 }, { "epoch": 7.02, "learning_rate": 7.024187637429759e-08, "loss": 3.9191, "step": 632500 }, { "epoch": 7.03, "learning_rate": 7.029740354929702e-08, "loss": 3.919, "step": 633000 }, { "epoch": 7.04, "learning_rate": 7.035293072429646e-08, "loss": 3.9161, "step": 633500 }, { "epoch": 7.04, "learning_rate": 7.040845789929592e-08, "loss": 3.9204, "step": 634000 }, { "epoch": 7.05, "learning_rate": 7.046398507429536e-08, "loss": 3.9058, "step": 634500 }, { "epoch": 7.05, "learning_rate": 7.05195122492948e-08, "loss": 3.9288, "step": 635000 }, { "epoch": 7.06, "learning_rate": 7.057503942429425e-08, "loss": 3.9139, "step": 635500 }, { "epoch": 7.06, "learning_rate": 7.06305665992937e-08, "loss": 3.9273, "step": 636000 }, { "epoch": 7.07, "learning_rate": 7.068609377429313e-08, "loss": 3.9258, "step": 636500 }, { "epoch": 7.07, "learning_rate": 7.074162094929259e-08, "loss": 3.9207, "step": 637000 }, { "epoch": 7.08, "learning_rate": 7.079714812429203e-08, "loss": 3.9064, "step": 637500 }, { "epoch": 7.09, "learning_rate": 7.085267529929147e-08, "loss": 3.9279, "step": 638000 }, { "epoch": 7.09, "learning_rate": 7.090820247429092e-08, "loss": 3.9297, "step": 638500 }, { "epoch": 7.1, "learning_rate": 7.096372964929036e-08, "loss": 3.9044, "step": 639000 }, { "epoch": 7.1, "learning_rate": 7.10192568242898e-08, "loss": 3.9123, "step": 639500 }, { "epoch": 7.11, "learning_rate": 7.107478399928925e-08, "loss": 3.8989, "step": 640000 }, { "epoch": 7.11, "learning_rate": 7.11303111742887e-08, "loss": 3.94, "step": 640500 }, { "epoch": 7.12, "learning_rate": 7.118583834928814e-08, "loss": 3.9098, "step": 641000 }, { "epoch": 7.12, "learning_rate": 7.124136552428758e-08, "loss": 3.8942, "step": 641500 }, { "epoch": 7.13, "learning_rate": 7.129689269928703e-08, "loss": 3.9097, "step": 642000 }, { "epoch": 7.14, "learning_rate": 7.135241987428647e-08, "loss": 3.9137, "step": 642500 }, { "epoch": 7.14, "learning_rate": 7.140794704928592e-08, "loss": 3.91, "step": 643000 }, { "epoch": 7.15, "learning_rate": 7.146347422428536e-08, "loss": 3.9204, "step": 643500 }, { "epoch": 7.15, "learning_rate": 7.15190013992848e-08, "loss": 3.9341, "step": 644000 }, { "epoch": 7.16, "learning_rate": 7.157452857428425e-08, "loss": 3.9228, "step": 644500 }, { "epoch": 7.16, "learning_rate": 7.163005574928369e-08, "loss": 3.9228, "step": 645000 }, { "epoch": 7.17, "learning_rate": 7.168558292428314e-08, "loss": 3.9125, "step": 645500 }, { "epoch": 7.17, "learning_rate": 7.174111009928258e-08, "loss": 3.9016, "step": 646000 }, { "epoch": 7.18, "learning_rate": 7.179663727428204e-08, "loss": 3.9306, "step": 646500 }, { "epoch": 7.19, "learning_rate": 7.185216444928147e-08, "loss": 3.9173, "step": 647000 }, { "epoch": 7.19, "learning_rate": 7.190769162428093e-08, "loss": 3.907, "step": 647500 }, { "epoch": 7.2, "learning_rate": 7.196321879928037e-08, "loss": 3.9289, "step": 648000 }, { "epoch": 7.2, "learning_rate": 7.20187459742798e-08, "loss": 3.9323, "step": 648500 }, { "epoch": 7.21, "learning_rate": 7.207427314927926e-08, "loss": 3.9233, "step": 649000 }, { "epoch": 7.21, "learning_rate": 7.21298003242787e-08, "loss": 3.9141, "step": 649500 }, { "epoch": 7.22, "learning_rate": 7.218532749927815e-08, "loss": 3.9246, "step": 650000 }, { "epoch": 7.22, "learning_rate": 7.224085467427759e-08, "loss": 3.9174, "step": 650500 }, { "epoch": 7.23, "learning_rate": 7.229638184927704e-08, "loss": 3.9108, "step": 651000 }, { "epoch": 7.24, "learning_rate": 7.235190902427648e-08, "loss": 3.9237, "step": 651500 }, { "epoch": 7.24, "learning_rate": 7.240743619927592e-08, "loss": 3.9005, "step": 652000 }, { "epoch": 7.25, "learning_rate": 7.246296337427537e-08, "loss": 3.9105, "step": 652500 }, { "epoch": 7.25, "learning_rate": 7.251849054927481e-08, "loss": 3.9028, "step": 653000 }, { "epoch": 7.26, "learning_rate": 7.257401772427426e-08, "loss": 3.9251, "step": 653500 }, { "epoch": 7.26, "learning_rate": 7.26295448992737e-08, "loss": 3.9139, "step": 654000 }, { "epoch": 7.27, "learning_rate": 7.268507207427314e-08, "loss": 3.9035, "step": 654500 }, { "epoch": 7.27, "learning_rate": 7.274059924927259e-08, "loss": 3.9356, "step": 655000 }, { "epoch": 7.28, "learning_rate": 7.279612642427203e-08, "loss": 3.8926, "step": 655500 }, { "epoch": 7.29, "learning_rate": 7.285165359927148e-08, "loss": 3.9119, "step": 656000 }, { "epoch": 7.29, "learning_rate": 7.290718077427092e-08, "loss": 3.9163, "step": 656500 }, { "epoch": 7.3, "learning_rate": 7.296270794927038e-08, "loss": 3.887, "step": 657000 }, { "epoch": 7.3, "learning_rate": 7.301823512426981e-08, "loss": 3.9306, "step": 657500 }, { "epoch": 7.31, "learning_rate": 7.307376229926925e-08, "loss": 3.9175, "step": 658000 }, { "epoch": 7.31, "learning_rate": 7.312928947426871e-08, "loss": 3.9034, "step": 658500 }, { "epoch": 7.32, "learning_rate": 7.318481664926814e-08, "loss": 3.912, "step": 659000 }, { "epoch": 7.32, "learning_rate": 7.324034382426759e-08, "loss": 3.9079, "step": 659500 }, { "epoch": 7.33, "learning_rate": 7.329587099926704e-08, "loss": 3.9262, "step": 660000 }, { "epoch": 7.34, "learning_rate": 7.335139817426649e-08, "loss": 3.9054, "step": 660500 }, { "epoch": 7.34, "learning_rate": 7.340692534926592e-08, "loss": 3.9286, "step": 661000 }, { "epoch": 7.35, "learning_rate": 7.346245252426538e-08, "loss": 3.9119, "step": 661500 }, { "epoch": 7.35, "learning_rate": 7.351797969926482e-08, "loss": 3.9145, "step": 662000 }, { "epoch": 7.36, "learning_rate": 7.357350687426425e-08, "loss": 3.8954, "step": 662500 }, { "epoch": 7.36, "learning_rate": 7.362903404926371e-08, "loss": 3.9087, "step": 663000 }, { "epoch": 7.37, "learning_rate": 7.368456122426315e-08, "loss": 3.9164, "step": 663500 }, { "epoch": 7.37, "learning_rate": 7.37400883992626e-08, "loss": 3.9016, "step": 664000 }, { "epoch": 7.38, "learning_rate": 7.379561557426204e-08, "loss": 3.9077, "step": 664500 }, { "epoch": 7.39, "learning_rate": 7.385114274926149e-08, "loss": 3.907, "step": 665000 }, { "epoch": 7.39, "learning_rate": 7.390666992426093e-08, "loss": 3.9029, "step": 665500 }, { "epoch": 7.4, "learning_rate": 7.396219709926037e-08, "loss": 3.9104, "step": 666000 }, { "epoch": 7.4, "learning_rate": 7.401772427425982e-08, "loss": 3.9427, "step": 666500 }, { "epoch": 7.41, "learning_rate": 7.407325144925926e-08, "loss": 3.9092, "step": 667000 }, { "epoch": 7.41, "learning_rate": 7.412877862425872e-08, "loss": 3.9066, "step": 667500 }, { "epoch": 7.42, "learning_rate": 7.418430579925815e-08, "loss": 3.9082, "step": 668000 }, { "epoch": 7.42, "learning_rate": 7.42398329742576e-08, "loss": 3.8989, "step": 668500 }, { "epoch": 7.43, "learning_rate": 7.429536014925705e-08, "loss": 3.9068, "step": 669000 }, { "epoch": 7.44, "learning_rate": 7.435088732425648e-08, "loss": 3.919, "step": 669500 }, { "epoch": 7.44, "learning_rate": 7.440641449925593e-08, "loss": 3.898, "step": 670000 }, { "epoch": 7.45, "learning_rate": 7.446194167425538e-08, "loss": 3.9319, "step": 670500 }, { "epoch": 7.45, "learning_rate": 7.451746884925482e-08, "loss": 3.9045, "step": 671000 }, { "epoch": 7.46, "learning_rate": 7.457299602425426e-08, "loss": 3.94, "step": 671500 }, { "epoch": 7.46, "learning_rate": 7.462852319925372e-08, "loss": 3.9074, "step": 672000 }, { "epoch": 7.47, "learning_rate": 7.468405037425316e-08, "loss": 3.9192, "step": 672500 }, { "epoch": 7.47, "learning_rate": 7.473957754925259e-08, "loss": 3.9254, "step": 673000 }, { "epoch": 7.48, "learning_rate": 7.479510472425205e-08, "loss": 3.8787, "step": 673500 }, { "epoch": 7.49, "learning_rate": 7.48506318992515e-08, "loss": 3.9068, "step": 674000 }, { "epoch": 7.49, "learning_rate": 7.490615907425092e-08, "loss": 3.9256, "step": 674500 }, { "epoch": 7.5, "learning_rate": 7.496168624925038e-08, "loss": 3.9091, "step": 675000 }, { "epoch": 7.5, "learning_rate": 7.501721342424983e-08, "loss": 3.9024, "step": 675500 }, { "epoch": 7.51, "learning_rate": 7.507274059924927e-08, "loss": 3.8895, "step": 676000 }, { "epoch": 7.51, "learning_rate": 7.512826777424871e-08, "loss": 3.8965, "step": 676500 }, { "epoch": 7.52, "learning_rate": 7.518379494924816e-08, "loss": 3.9147, "step": 677000 }, { "epoch": 7.52, "learning_rate": 7.52393221242476e-08, "loss": 3.9156, "step": 677500 }, { "epoch": 7.53, "learning_rate": 7.529484929924705e-08, "loss": 3.8979, "step": 678000 }, { "epoch": 7.54, "learning_rate": 7.535037647424649e-08, "loss": 3.9147, "step": 678500 }, { "epoch": 7.54, "learning_rate": 7.540590364924594e-08, "loss": 3.9017, "step": 679000 }, { "epoch": 7.55, "learning_rate": 7.546143082424539e-08, "loss": 3.9015, "step": 679500 }, { "epoch": 7.55, "learning_rate": 7.551695799924482e-08, "loss": 3.9117, "step": 680000 }, { "epoch": 7.56, "learning_rate": 7.557248517424427e-08, "loss": 3.9013, "step": 680500 }, { "epoch": 7.56, "learning_rate": 7.562801234924373e-08, "loss": 3.9035, "step": 681000 }, { "epoch": 7.57, "learning_rate": 7.568353952424316e-08, "loss": 3.9101, "step": 681500 }, { "epoch": 7.57, "learning_rate": 7.57390666992426e-08, "loss": 3.9151, "step": 682000 }, { "epoch": 7.58, "learning_rate": 7.579459387424206e-08, "loss": 3.9254, "step": 682500 }, { "epoch": 7.59, "learning_rate": 7.58501210492415e-08, "loss": 3.9193, "step": 683000 }, { "epoch": 7.59, "learning_rate": 7.590564822424093e-08, "loss": 3.8989, "step": 683500 }, { "epoch": 7.6, "learning_rate": 7.596117539924039e-08, "loss": 3.9069, "step": 684000 }, { "epoch": 7.6, "learning_rate": 7.601670257423984e-08, "loss": 3.8954, "step": 684500 }, { "epoch": 7.61, "learning_rate": 7.607222974923927e-08, "loss": 3.9021, "step": 685000 }, { "epoch": 7.61, "learning_rate": 7.612775692423872e-08, "loss": 3.9001, "step": 685500 }, { "epoch": 7.62, "learning_rate": 7.618328409923817e-08, "loss": 3.9204, "step": 686000 }, { "epoch": 7.62, "learning_rate": 7.623881127423761e-08, "loss": 3.9168, "step": 686500 }, { "epoch": 7.63, "learning_rate": 7.629433844923706e-08, "loss": 3.9207, "step": 687000 }, { "epoch": 7.63, "learning_rate": 7.63498656242365e-08, "loss": 3.9028, "step": 687500 }, { "epoch": 7.64, "learning_rate": 7.640539279923594e-08, "loss": 3.9302, "step": 688000 }, { "epoch": 7.65, "learning_rate": 7.646091997423539e-08, "loss": 3.93, "step": 688500 }, { "epoch": 7.65, "learning_rate": 7.651644714923483e-08, "loss": 3.9112, "step": 689000 }, { "epoch": 7.66, "learning_rate": 7.657197432423428e-08, "loss": 3.8938, "step": 689500 }, { "epoch": 7.66, "learning_rate": 7.662750149923372e-08, "loss": 3.9033, "step": 690000 }, { "epoch": 7.67, "learning_rate": 7.668302867423317e-08, "loss": 3.9132, "step": 690500 }, { "epoch": 7.67, "learning_rate": 7.673855584923261e-08, "loss": 3.9075, "step": 691000 }, { "epoch": 7.68, "learning_rate": 7.679408302423205e-08, "loss": 3.913, "step": 691500 }, { "epoch": 7.68, "learning_rate": 7.68496101992315e-08, "loss": 3.9108, "step": 692000 }, { "epoch": 7.69, "learning_rate": 7.690513737423094e-08, "loss": 3.8918, "step": 692500 }, { "epoch": 7.7, "learning_rate": 7.696066454923039e-08, "loss": 3.9039, "step": 693000 }, { "epoch": 7.7, "learning_rate": 7.701619172422984e-08, "loss": 3.9054, "step": 693500 }, { "epoch": 7.71, "learning_rate": 7.707171889922927e-08, "loss": 3.9161, "step": 694000 }, { "epoch": 7.71, "learning_rate": 7.712724607422872e-08, "loss": 3.9053, "step": 694500 }, { "epoch": 7.72, "learning_rate": 7.718277324922818e-08, "loss": 3.9258, "step": 695000 }, { "epoch": 7.72, "learning_rate": 7.723830042422761e-08, "loss": 3.8918, "step": 695500 }, { "epoch": 7.73, "learning_rate": 7.729382759922705e-08, "loss": 3.9156, "step": 696000 }, { "epoch": 7.73, "learning_rate": 7.734935477422651e-08, "loss": 3.9374, "step": 696500 }, { "epoch": 7.74, "learning_rate": 7.740488194922595e-08, "loss": 3.8993, "step": 697000 }, { "epoch": 7.75, "learning_rate": 7.746040912422538e-08, "loss": 3.9103, "step": 697500 }, { "epoch": 7.75, "learning_rate": 7.751593629922484e-08, "loss": 3.8828, "step": 698000 }, { "epoch": 7.76, "learning_rate": 7.757146347422429e-08, "loss": 3.9187, "step": 698500 }, { "epoch": 7.76, "learning_rate": 7.762699064922372e-08, "loss": 3.9223, "step": 699000 }, { "epoch": 7.77, "learning_rate": 7.768251782422317e-08, "loss": 3.9157, "step": 699500 }, { "epoch": 7.77, "learning_rate": 7.773804499922262e-08, "loss": 3.8826, "step": 700000 }, { "epoch": 7.78, "learning_rate": 7.779357217422206e-08, "loss": 3.9057, "step": 700500 }, { "epoch": 7.78, "learning_rate": 7.78490993492215e-08, "loss": 3.9149, "step": 701000 }, { "epoch": 7.79, "learning_rate": 7.790462652422095e-08, "loss": 3.8994, "step": 701500 }, { "epoch": 7.8, "learning_rate": 7.79601536992204e-08, "loss": 3.9032, "step": 702000 }, { "epoch": 7.8, "learning_rate": 7.801568087421984e-08, "loss": 3.8996, "step": 702500 }, { "epoch": 7.81, "learning_rate": 7.807120804921928e-08, "loss": 3.9012, "step": 703000 }, { "epoch": 7.81, "learning_rate": 7.812673522421873e-08, "loss": 3.8985, "step": 703500 }, { "epoch": 7.82, "learning_rate": 7.818226239921817e-08, "loss": 3.8717, "step": 704000 }, { "epoch": 7.82, "learning_rate": 7.823778957421762e-08, "loss": 3.8897, "step": 704500 }, { "epoch": 7.83, "learning_rate": 7.829331674921706e-08, "loss": 3.9134, "step": 705000 }, { "epoch": 7.83, "learning_rate": 7.834884392421652e-08, "loss": 3.9101, "step": 705500 }, { "epoch": 7.84, "learning_rate": 7.840437109921595e-08, "loss": 3.9264, "step": 706000 }, { "epoch": 7.85, "learning_rate": 7.845989827421539e-08, "loss": 3.919, "step": 706500 }, { "epoch": 7.85, "learning_rate": 7.851542544921485e-08, "loss": 3.9073, "step": 707000 }, { "epoch": 7.86, "learning_rate": 7.857095262421428e-08, "loss": 3.9105, "step": 707500 }, { "epoch": 7.86, "learning_rate": 7.862647979921372e-08, "loss": 3.9152, "step": 708000 }, { "epoch": 7.87, "learning_rate": 7.868200697421318e-08, "loss": 3.8899, "step": 708500 }, { "epoch": 7.87, "learning_rate": 7.873753414921263e-08, "loss": 3.8969, "step": 709000 }, { "epoch": 7.88, "learning_rate": 7.879306132421206e-08, "loss": 3.8993, "step": 709500 }, { "epoch": 7.88, "learning_rate": 7.884858849921151e-08, "loss": 3.8914, "step": 710000 }, { "epoch": 7.89, "learning_rate": 7.890411567421096e-08, "loss": 3.9001, "step": 710500 }, { "epoch": 7.9, "learning_rate": 7.895964284921039e-08, "loss": 3.9019, "step": 711000 }, { "epoch": 7.9, "learning_rate": 7.901517002420985e-08, "loss": 3.9078, "step": 711500 }, { "epoch": 7.91, "learning_rate": 7.907069719920929e-08, "loss": 3.9036, "step": 712000 }, { "epoch": 7.91, "learning_rate": 7.912622437420874e-08, "loss": 3.9092, "step": 712500 }, { "epoch": 7.92, "learning_rate": 7.918175154920818e-08, "loss": 3.8802, "step": 713000 }, { "epoch": 7.92, "learning_rate": 7.923727872420762e-08, "loss": 3.9023, "step": 713500 }, { "epoch": 7.93, "learning_rate": 7.929280589920707e-08, "loss": 3.9171, "step": 714000 }, { "epoch": 7.93, "learning_rate": 7.934833307420651e-08, "loss": 3.8838, "step": 714500 }, { "epoch": 7.94, "learning_rate": 7.940386024920596e-08, "loss": 3.8974, "step": 715000 }, { "epoch": 7.95, "learning_rate": 7.94593874242054e-08, "loss": 3.9014, "step": 715500 }, { "epoch": 7.95, "learning_rate": 7.951491459920486e-08, "loss": 3.8962, "step": 716000 }, { "epoch": 7.96, "learning_rate": 7.957044177420429e-08, "loss": 3.9001, "step": 716500 }, { "epoch": 7.96, "learning_rate": 7.962596894920373e-08, "loss": 3.9074, "step": 717000 }, { "epoch": 7.97, "learning_rate": 7.968149612420319e-08, "loss": 3.8985, "step": 717500 }, { "epoch": 7.97, "learning_rate": 7.973702329920262e-08, "loss": 3.9001, "step": 718000 }, { "epoch": 7.98, "learning_rate": 7.979255047420207e-08, "loss": 3.9109, "step": 718500 }, { "epoch": 7.98, "learning_rate": 7.984807764920152e-08, "loss": 3.8939, "step": 719000 }, { "epoch": 7.99, "learning_rate": 7.990360482420097e-08, "loss": 3.9072, "step": 719500 }, { "epoch": 8.0, "learning_rate": 7.99591319992004e-08, "loss": 3.9048, "step": 720000 }, { "epoch": 8.0, "eval_loss": 3.9263479709625244, "eval_runtime": 6.3049, "eval_samples_per_second": 246.475, "step": 720368 }, { "epoch": 8.0, "learning_rate": 8.001465917419986e-08, "loss": 3.9229, "step": 720500 }, { "epoch": 8.01, "learning_rate": 8.00701863491993e-08, "loss": 3.8859, "step": 721000 }, { "epoch": 8.01, "learning_rate": 8.012571352419873e-08, "loss": 3.9058, "step": 721500 }, { "epoch": 8.02, "learning_rate": 8.018124069919819e-08, "loss": 3.9237, "step": 722000 }, { "epoch": 8.02, "learning_rate": 8.023676787419763e-08, "loss": 3.9156, "step": 722500 }, { "epoch": 8.03, "learning_rate": 8.029229504919708e-08, "loss": 3.8977, "step": 723000 }, { "epoch": 8.03, "learning_rate": 8.034782222419652e-08, "loss": 3.894, "step": 723500 }, { "epoch": 8.04, "learning_rate": 8.040334939919596e-08, "loss": 3.8925, "step": 724000 }, { "epoch": 8.05, "learning_rate": 8.045887657419541e-08, "loss": 3.8966, "step": 724500 }, { "epoch": 8.05, "learning_rate": 8.051440374919485e-08, "loss": 3.9057, "step": 725000 }, { "epoch": 8.06, "learning_rate": 8.05699309241943e-08, "loss": 3.9155, "step": 725500 }, { "epoch": 8.06, "learning_rate": 8.062545809919374e-08, "loss": 3.8936, "step": 726000 }, { "epoch": 8.07, "learning_rate": 8.068098527419319e-08, "loss": 3.8901, "step": 726500 }, { "epoch": 8.07, "learning_rate": 8.073651244919263e-08, "loss": 3.9014, "step": 727000 }, { "epoch": 8.08, "learning_rate": 8.079203962419207e-08, "loss": 3.8952, "step": 727500 }, { "epoch": 8.08, "learning_rate": 8.084756679919152e-08, "loss": 3.8791, "step": 728000 }, { "epoch": 8.09, "learning_rate": 8.090309397419096e-08, "loss": 3.8913, "step": 728500 }, { "epoch": 8.1, "learning_rate": 8.09586211491904e-08, "loss": 3.9013, "step": 729000 }, { "epoch": 8.1, "learning_rate": 8.101414832418985e-08, "loss": 3.9127, "step": 729500 }, { "epoch": 8.11, "learning_rate": 8.106967549918931e-08, "loss": 3.8911, "step": 730000 }, { "epoch": 8.11, "learning_rate": 8.112520267418874e-08, "loss": 3.9138, "step": 730500 }, { "epoch": 8.12, "learning_rate": 8.118072984918818e-08, "loss": 3.8962, "step": 731000 }, { "epoch": 8.12, "learning_rate": 8.123625702418764e-08, "loss": 3.8867, "step": 731500 }, { "epoch": 8.13, "learning_rate": 8.129178419918707e-08, "loss": 3.8883, "step": 732000 }, { "epoch": 8.13, "learning_rate": 8.134731137418652e-08, "loss": 3.8978, "step": 732500 }, { "epoch": 8.14, "learning_rate": 8.140283854918597e-08, "loss": 3.8904, "step": 733000 }, { "epoch": 8.15, "learning_rate": 8.14583657241854e-08, "loss": 3.9044, "step": 733500 }, { "epoch": 8.15, "learning_rate": 8.151389289918486e-08, "loss": 3.8837, "step": 734000 }, { "epoch": 8.16, "learning_rate": 8.15694200741843e-08, "loss": 3.8745, "step": 734500 }, { "epoch": 8.16, "learning_rate": 8.162494724918375e-08, "loss": 3.8998, "step": 735000 }, { "epoch": 8.17, "learning_rate": 8.16804744241832e-08, "loss": 3.886, "step": 735500 }, { "epoch": 8.17, "learning_rate": 8.173600159918264e-08, "loss": 3.8898, "step": 736000 }, { "epoch": 8.18, "learning_rate": 8.179152877418208e-08, "loss": 3.8781, "step": 736500 }, { "epoch": 8.18, "learning_rate": 8.184705594918153e-08, "loss": 3.8862, "step": 737000 }, { "epoch": 8.19, "learning_rate": 8.190258312418097e-08, "loss": 3.9022, "step": 737500 }, { "epoch": 8.2, "learning_rate": 8.195811029918042e-08, "loss": 3.8999, "step": 738000 }, { "epoch": 8.2, "learning_rate": 8.201363747417986e-08, "loss": 3.9063, "step": 738500 }, { "epoch": 8.21, "learning_rate": 8.20691646491793e-08, "loss": 3.8978, "step": 739000 }, { "epoch": 8.21, "learning_rate": 8.212469182417875e-08, "loss": 3.8926, "step": 739500 }, { "epoch": 8.22, "learning_rate": 8.218021899917819e-08, "loss": 3.9185, "step": 740000 }, { "epoch": 8.22, "learning_rate": 8.223574617417764e-08, "loss": 3.8823, "step": 740500 }, { "epoch": 8.23, "learning_rate": 8.229127334917708e-08, "loss": 3.9003, "step": 741000 }, { "epoch": 8.23, "learning_rate": 8.234680052417652e-08, "loss": 3.8836, "step": 741500 }, { "epoch": 8.24, "learning_rate": 8.240232769917598e-08, "loss": 3.9047, "step": 742000 }, { "epoch": 8.25, "learning_rate": 8.245785487417541e-08, "loss": 3.9053, "step": 742500 }, { "epoch": 8.25, "learning_rate": 8.251338204917486e-08, "loss": 3.8932, "step": 743000 }, { "epoch": 8.26, "learning_rate": 8.256890922417431e-08, "loss": 3.8972, "step": 743500 }, { "epoch": 8.26, "learning_rate": 8.262443639917375e-08, "loss": 3.8906, "step": 744000 }, { "epoch": 8.27, "learning_rate": 8.267996357417319e-08, "loss": 3.9004, "step": 744500 }, { "epoch": 8.27, "learning_rate": 8.273549074917265e-08, "loss": 3.9118, "step": 745000 }, { "epoch": 8.28, "learning_rate": 8.279101792417209e-08, "loss": 3.8961, "step": 745500 }, { "epoch": 8.28, "learning_rate": 8.284654509917152e-08, "loss": 3.8916, "step": 746000 }, { "epoch": 8.29, "learning_rate": 8.290207227417098e-08, "loss": 3.9015, "step": 746500 }, { "epoch": 8.3, "learning_rate": 8.295759944917042e-08, "loss": 3.902, "step": 747000 }, { "epoch": 8.3, "learning_rate": 8.301312662416985e-08, "loss": 3.9076, "step": 747500 }, { "epoch": 8.31, "learning_rate": 8.306865379916931e-08, "loss": 3.8931, "step": 748000 }, { "epoch": 8.31, "learning_rate": 8.312418097416876e-08, "loss": 3.9017, "step": 748500 }, { "epoch": 8.32, "learning_rate": 8.31797081491682e-08, "loss": 3.9095, "step": 749000 }, { "epoch": 8.32, "learning_rate": 8.323523532416764e-08, "loss": 3.8899, "step": 749500 }, { "epoch": 8.33, "learning_rate": 8.329076249916709e-08, "loss": 3.8843, "step": 750000 }, { "epoch": 8.33, "learning_rate": 8.334628967416653e-08, "loss": 3.89, "step": 750500 }, { "epoch": 8.34, "learning_rate": 8.340181684916598e-08, "loss": 3.8959, "step": 751000 }, { "epoch": 8.35, "learning_rate": 8.345734402416542e-08, "loss": 3.9121, "step": 751500 }, { "epoch": 8.35, "learning_rate": 8.351287119916487e-08, "loss": 3.8851, "step": 752000 }, { "epoch": 8.36, "learning_rate": 8.356839837416432e-08, "loss": 3.8879, "step": 752500 }, { "epoch": 8.36, "learning_rate": 8.362392554916375e-08, "loss": 3.884, "step": 753000 }, { "epoch": 8.37, "learning_rate": 8.36794527241632e-08, "loss": 3.8702, "step": 753500 }, { "epoch": 8.37, "learning_rate": 8.373497989916266e-08, "loss": 3.8989, "step": 754000 }, { "epoch": 8.38, "learning_rate": 8.379050707416209e-08, "loss": 3.9003, "step": 754500 }, { "epoch": 8.38, "learning_rate": 8.384603424916153e-08, "loss": 3.8784, "step": 755000 }, { "epoch": 8.39, "learning_rate": 8.390156142416099e-08, "loss": 3.8971, "step": 755500 }, { "epoch": 8.4, "learning_rate": 8.395708859916043e-08, "loss": 3.8979, "step": 756000 }, { "epoch": 8.4, "learning_rate": 8.401261577415986e-08, "loss": 3.8878, "step": 756500 }, { "epoch": 8.41, "learning_rate": 8.406814294915932e-08, "loss": 3.8994, "step": 757000 }, { "epoch": 8.41, "learning_rate": 8.412367012415876e-08, "loss": 3.8919, "step": 757500 }, { "epoch": 8.42, "learning_rate": 8.41791972991582e-08, "loss": 3.8979, "step": 758000 }, { "epoch": 8.42, "learning_rate": 8.423472447415765e-08, "loss": 3.8847, "step": 758500 }, { "epoch": 8.43, "learning_rate": 8.42902516491571e-08, "loss": 3.8884, "step": 759000 }, { "epoch": 8.43, "learning_rate": 8.434577882415654e-08, "loss": 3.8885, "step": 759500 }, { "epoch": 8.44, "learning_rate": 8.440130599915599e-08, "loss": 3.8975, "step": 760000 }, { "epoch": 8.45, "learning_rate": 8.445683317415543e-08, "loss": 3.8973, "step": 760500 }, { "epoch": 8.45, "learning_rate": 8.451236034915487e-08, "loss": 3.9104, "step": 761000 }, { "epoch": 8.46, "learning_rate": 8.456788752415432e-08, "loss": 3.908, "step": 761500 }, { "epoch": 8.46, "learning_rate": 8.462341469915376e-08, "loss": 3.8881, "step": 762000 }, { "epoch": 8.47, "learning_rate": 8.46789418741532e-08, "loss": 3.9037, "step": 762500 }, { "epoch": 8.47, "learning_rate": 8.473446904915265e-08, "loss": 3.8861, "step": 763000 }, { "epoch": 8.48, "learning_rate": 8.47899962241521e-08, "loss": 3.8965, "step": 763500 }, { "epoch": 8.48, "learning_rate": 8.484552339915154e-08, "loss": 3.8883, "step": 764000 }, { "epoch": 8.49, "learning_rate": 8.4901050574151e-08, "loss": 3.8852, "step": 764500 }, { "epoch": 8.5, "learning_rate": 8.495657774915043e-08, "loss": 3.9045, "step": 765000 }, { "epoch": 8.5, "learning_rate": 8.501210492414987e-08, "loss": 3.895, "step": 765500 }, { "epoch": 8.51, "learning_rate": 8.506763209914933e-08, "loss": 3.8931, "step": 766000 }, { "epoch": 8.51, "learning_rate": 8.512315927414876e-08, "loss": 3.8788, "step": 766500 }, { "epoch": 8.52, "learning_rate": 8.51786864491482e-08, "loss": 3.8993, "step": 767000 }, { "epoch": 8.52, "learning_rate": 8.523421362414766e-08, "loss": 3.9073, "step": 767500 }, { "epoch": 8.53, "learning_rate": 8.52897407991471e-08, "loss": 3.8985, "step": 768000 }, { "epoch": 8.53, "learning_rate": 8.534526797414654e-08, "loss": 3.8953, "step": 768500 }, { "epoch": 8.54, "learning_rate": 8.5400795149146e-08, "loss": 3.9153, "step": 769000 }, { "epoch": 8.55, "learning_rate": 8.545632232414544e-08, "loss": 3.8869, "step": 769500 }, { "epoch": 8.55, "learning_rate": 8.551184949914487e-08, "loss": 3.9031, "step": 770000 }, { "epoch": 8.56, "learning_rate": 8.556737667414433e-08, "loss": 3.8703, "step": 770500 }, { "epoch": 8.56, "learning_rate": 8.562290384914377e-08, "loss": 3.9019, "step": 771000 }, { "epoch": 8.57, "learning_rate": 8.567843102414321e-08, "loss": 3.9049, "step": 771500 }, { "epoch": 8.57, "learning_rate": 8.573395819914266e-08, "loss": 3.8816, "step": 772000 }, { "epoch": 8.58, "learning_rate": 8.57894853741421e-08, "loss": 3.8858, "step": 772500 }, { "epoch": 8.58, "learning_rate": 8.584501254914155e-08, "loss": 3.9054, "step": 773000 }, { "epoch": 8.59, "learning_rate": 8.590053972414099e-08, "loss": 3.8777, "step": 773500 }, { "epoch": 8.6, "learning_rate": 8.595606689914044e-08, "loss": 3.8941, "step": 774000 }, { "epoch": 8.6, "learning_rate": 8.601159407413988e-08, "loss": 3.883, "step": 774500 }, { "epoch": 8.61, "learning_rate": 8.606712124913932e-08, "loss": 3.8956, "step": 775000 }, { "epoch": 8.61, "learning_rate": 8.612264842413877e-08, "loss": 3.8736, "step": 775500 }, { "epoch": 8.62, "learning_rate": 8.617817559913821e-08, "loss": 3.8941, "step": 776000 }, { "epoch": 8.62, "learning_rate": 8.623370277413766e-08, "loss": 3.8871, "step": 776500 }, { "epoch": 8.63, "learning_rate": 8.62892299491371e-08, "loss": 3.8699, "step": 777000 }, { "epoch": 8.63, "learning_rate": 8.634475712413654e-08, "loss": 3.8748, "step": 777500 }, { "epoch": 8.64, "learning_rate": 8.640028429913599e-08, "loss": 3.8798, "step": 778000 }, { "epoch": 8.65, "learning_rate": 8.645581147413545e-08, "loss": 3.9068, "step": 778500 }, { "epoch": 8.65, "learning_rate": 8.651133864913488e-08, "loss": 3.8883, "step": 779000 }, { "epoch": 8.66, "learning_rate": 8.656686582413432e-08, "loss": 3.8784, "step": 779500 }, { "epoch": 8.66, "learning_rate": 8.662239299913378e-08, "loss": 3.8872, "step": 780000 }, { "epoch": 8.67, "learning_rate": 8.667792017413321e-08, "loss": 3.8873, "step": 780500 }, { "epoch": 8.67, "learning_rate": 8.673344734913265e-08, "loss": 3.8939, "step": 781000 }, { "epoch": 8.68, "learning_rate": 8.678897452413211e-08, "loss": 3.9098, "step": 781500 }, { "epoch": 8.68, "learning_rate": 8.684450169913156e-08, "loss": 3.8935, "step": 782000 }, { "epoch": 8.69, "learning_rate": 8.690002887413099e-08, "loss": 3.8902, "step": 782500 }, { "epoch": 8.7, "learning_rate": 8.695555604913044e-08, "loss": 3.8944, "step": 783000 }, { "epoch": 8.7, "learning_rate": 8.701108322412989e-08, "loss": 3.8918, "step": 783500 }, { "epoch": 8.71, "learning_rate": 8.706661039912932e-08, "loss": 3.8821, "step": 784000 }, { "epoch": 8.71, "learning_rate": 8.712213757412878e-08, "loss": 3.877, "step": 784500 }, { "epoch": 8.72, "learning_rate": 8.717766474912822e-08, "loss": 3.8996, "step": 785000 }, { "epoch": 8.72, "learning_rate": 8.723319192412766e-08, "loss": 3.9013, "step": 785500 }, { "epoch": 8.73, "learning_rate": 8.728871909912711e-08, "loss": 3.9013, "step": 786000 }, { "epoch": 8.73, "learning_rate": 8.734424627412655e-08, "loss": 3.907, "step": 786500 }, { "epoch": 8.74, "learning_rate": 8.7399773449126e-08, "loss": 3.8645, "step": 787000 }, { "epoch": 8.75, "learning_rate": 8.745530062412544e-08, "loss": 3.88, "step": 787500 }, { "epoch": 8.75, "learning_rate": 8.751082779912489e-08, "loss": 3.8873, "step": 788000 }, { "epoch": 8.76, "learning_rate": 8.756635497412433e-08, "loss": 3.884, "step": 788500 }, { "epoch": 8.76, "learning_rate": 8.762188214912379e-08, "loss": 3.8772, "step": 789000 }, { "epoch": 8.77, "learning_rate": 8.767740932412322e-08, "loss": 3.9034, "step": 789500 }, { "epoch": 8.77, "learning_rate": 8.773293649912266e-08, "loss": 3.869, "step": 790000 }, { "epoch": 8.78, "learning_rate": 8.778846367412212e-08, "loss": 3.8748, "step": 790500 }, { "epoch": 8.78, "learning_rate": 8.784399084912155e-08, "loss": 3.9012, "step": 791000 }, { "epoch": 8.79, "learning_rate": 8.7899518024121e-08, "loss": 3.894, "step": 791500 }, { "epoch": 8.8, "learning_rate": 8.795504519912045e-08, "loss": 3.8737, "step": 792000 }, { "epoch": 8.8, "learning_rate": 8.80105723741199e-08, "loss": 3.8753, "step": 792500 }, { "epoch": 8.81, "learning_rate": 8.806609954911933e-08, "loss": 3.8971, "step": 793000 }, { "epoch": 8.81, "learning_rate": 8.812162672411879e-08, "loss": 3.9061, "step": 793500 }, { "epoch": 8.82, "learning_rate": 8.817715389911823e-08, "loss": 3.8857, "step": 794000 }, { "epoch": 8.82, "learning_rate": 8.823268107411766e-08, "loss": 3.9059, "step": 794500 }, { "epoch": 8.83, "learning_rate": 8.828820824911712e-08, "loss": 3.889, "step": 795000 }, { "epoch": 8.83, "learning_rate": 8.834373542411656e-08, "loss": 3.8867, "step": 795500 }, { "epoch": 8.84, "learning_rate": 8.839926259911599e-08, "loss": 3.8931, "step": 796000 }, { "epoch": 8.85, "learning_rate": 8.845478977411545e-08, "loss": 3.8838, "step": 796500 }, { "epoch": 8.85, "learning_rate": 8.85103169491149e-08, "loss": 3.8986, "step": 797000 }, { "epoch": 8.86, "learning_rate": 8.856584412411434e-08, "loss": 3.9002, "step": 797500 }, { "epoch": 8.86, "learning_rate": 8.862137129911378e-08, "loss": 3.8997, "step": 798000 }, { "epoch": 8.87, "learning_rate": 8.867689847411323e-08, "loss": 3.8823, "step": 798500 }, { "epoch": 8.87, "learning_rate": 8.873242564911267e-08, "loss": 3.8719, "step": 799000 }, { "epoch": 8.88, "learning_rate": 8.878795282411212e-08, "loss": 3.8995, "step": 799500 }, { "epoch": 8.88, "learning_rate": 8.884347999911156e-08, "loss": 3.8661, "step": 800000 }, { "epoch": 8.89, "learning_rate": 8.8899007174111e-08, "loss": 3.8937, "step": 800500 }, { "epoch": 8.9, "learning_rate": 8.895453434911046e-08, "loss": 3.8786, "step": 801000 }, { "epoch": 8.9, "learning_rate": 8.901006152410989e-08, "loss": 3.9042, "step": 801500 }, { "epoch": 8.91, "learning_rate": 8.906558869910934e-08, "loss": 3.8988, "step": 802000 }, { "epoch": 8.91, "learning_rate": 8.91211158741088e-08, "loss": 3.8838, "step": 802500 }, { "epoch": 8.92, "learning_rate": 8.917664304910822e-08, "loss": 3.8926, "step": 803000 }, { "epoch": 8.92, "learning_rate": 8.923217022410767e-08, "loss": 3.8851, "step": 803500 }, { "epoch": 8.93, "learning_rate": 8.928769739910713e-08, "loss": 3.8916, "step": 804000 }, { "epoch": 8.93, "learning_rate": 8.934322457410657e-08, "loss": 3.879, "step": 804500 }, { "epoch": 8.94, "learning_rate": 8.9398751749106e-08, "loss": 3.8766, "step": 805000 }, { "epoch": 8.95, "learning_rate": 8.945427892410546e-08, "loss": 3.8952, "step": 805500 }, { "epoch": 8.95, "learning_rate": 8.95098060991049e-08, "loss": 3.8778, "step": 806000 }, { "epoch": 8.96, "learning_rate": 8.956533327410433e-08, "loss": 3.8736, "step": 806500 }, { "epoch": 8.96, "learning_rate": 8.962086044910379e-08, "loss": 3.9052, "step": 807000 }, { "epoch": 8.97, "learning_rate": 8.967638762410324e-08, "loss": 3.8695, "step": 807500 }, { "epoch": 8.97, "learning_rate": 8.973191479910268e-08, "loss": 3.8882, "step": 808000 }, { "epoch": 8.98, "learning_rate": 8.978744197410212e-08, "loss": 3.8876, "step": 808500 }, { "epoch": 8.98, "learning_rate": 8.984296914910157e-08, "loss": 3.8702, "step": 809000 }, { "epoch": 8.99, "learning_rate": 8.989849632410101e-08, "loss": 3.8824, "step": 809500 }, { "epoch": 9.0, "learning_rate": 8.995402349910046e-08, "loss": 3.8871, "step": 810000 }, { "epoch": 9.0, "eval_loss": 3.914140462875366, "eval_runtime": 6.3069, "eval_samples_per_second": 246.396, "step": 810414 }, { "epoch": 9.0, "learning_rate": 9.00095506740999e-08, "loss": 3.8657, "step": 810500 }, { "epoch": 9.01, "learning_rate": 9.006507784909934e-08, "loss": 3.8844, "step": 811000 }, { "epoch": 9.01, "learning_rate": 9.012060502409879e-08, "loss": 3.8874, "step": 811500 }, { "epoch": 9.02, "learning_rate": 9.017613219909823e-08, "loss": 3.8844, "step": 812000 }, { "epoch": 9.02, "learning_rate": 9.023165937409768e-08, "loss": 3.8817, "step": 812500 }, { "epoch": 9.03, "learning_rate": 9.028718654909712e-08, "loss": 3.8831, "step": 813000 }, { "epoch": 9.03, "learning_rate": 9.034271372409657e-08, "loss": 3.8799, "step": 813500 }, { "epoch": 9.04, "learning_rate": 9.039824089909601e-08, "loss": 3.8895, "step": 814000 }, { "epoch": 9.05, "learning_rate": 9.045376807409545e-08, "loss": 3.8962, "step": 814500 }, { "epoch": 9.05, "learning_rate": 9.050929524909491e-08, "loss": 3.8889, "step": 815000 }, { "epoch": 9.06, "learning_rate": 9.056482242409434e-08, "loss": 3.8899, "step": 815500 }, { "epoch": 9.06, "learning_rate": 9.062034959909379e-08, "loss": 3.8798, "step": 816000 }, { "epoch": 9.07, "learning_rate": 9.067587677409324e-08, "loss": 3.8775, "step": 816500 }, { "epoch": 9.07, "learning_rate": 9.073140394909267e-08, "loss": 3.886, "step": 817000 }, { "epoch": 9.08, "learning_rate": 9.078693112409212e-08, "loss": 3.8718, "step": 817500 }, { "epoch": 9.08, "learning_rate": 9.084245829909158e-08, "loss": 3.8816, "step": 818000 }, { "epoch": 9.09, "learning_rate": 9.089798547409102e-08, "loss": 3.8599, "step": 818500 }, { "epoch": 9.1, "learning_rate": 9.095351264909046e-08, "loss": 3.8926, "step": 819000 }, { "epoch": 9.1, "learning_rate": 9.100903982408991e-08, "loss": 3.8887, "step": 819500 }, { "epoch": 9.11, "learning_rate": 9.106456699908935e-08, "loss": 3.8721, "step": 820000 }, { "epoch": 9.11, "learning_rate": 9.11200941740888e-08, "loss": 3.8773, "step": 820500 }, { "epoch": 9.12, "learning_rate": 9.117562134908824e-08, "loss": 3.8793, "step": 821000 }, { "epoch": 9.12, "learning_rate": 9.123114852408769e-08, "loss": 3.8708, "step": 821500 }, { "epoch": 9.13, "learning_rate": 9.128667569908713e-08, "loss": 3.8767, "step": 822000 }, { "epoch": 9.13, "learning_rate": 9.134220287408657e-08, "loss": 3.8931, "step": 822500 }, { "epoch": 9.14, "learning_rate": 9.139773004908602e-08, "loss": 3.8804, "step": 823000 }, { "epoch": 9.15, "learning_rate": 9.145325722408546e-08, "loss": 3.8907, "step": 823500 }, { "epoch": 9.15, "learning_rate": 9.15087843990849e-08, "loss": 3.8805, "step": 824000 }, { "epoch": 9.16, "learning_rate": 9.156431157408435e-08, "loss": 3.8702, "step": 824500 }, { "epoch": 9.16, "learning_rate": 9.16198387490838e-08, "loss": 3.8796, "step": 825000 }, { "epoch": 9.17, "learning_rate": 9.167536592408324e-08, "loss": 3.8664, "step": 825500 }, { "epoch": 9.17, "learning_rate": 9.173089309908268e-08, "loss": 3.8737, "step": 826000 }, { "epoch": 9.18, "learning_rate": 9.178642027408213e-08, "loss": 3.8827, "step": 826500 }, { "epoch": 9.18, "learning_rate": 9.184194744908158e-08, "loss": 3.8768, "step": 827000 }, { "epoch": 9.19, "learning_rate": 9.189747462408102e-08, "loss": 3.879, "step": 827500 }, { "epoch": 9.2, "learning_rate": 9.195300179908046e-08, "loss": 3.8686, "step": 828000 }, { "epoch": 9.2, "learning_rate": 9.200852897407992e-08, "loss": 3.8816, "step": 828500 }, { "epoch": 9.21, "learning_rate": 9.206405614907935e-08, "loss": 3.8592, "step": 829000 }, { "epoch": 9.21, "learning_rate": 9.211958332407879e-08, "loss": 3.8852, "step": 829500 }, { "epoch": 9.22, "learning_rate": 9.217511049907825e-08, "loss": 3.8721, "step": 830000 }, { "epoch": 9.22, "learning_rate": 9.22306376740777e-08, "loss": 3.8841, "step": 830500 }, { "epoch": 9.23, "learning_rate": 9.228616484907712e-08, "loss": 3.8828, "step": 831000 }, { "epoch": 9.23, "learning_rate": 9.234169202407658e-08, "loss": 3.8903, "step": 831500 }, { "epoch": 9.24, "learning_rate": 9.239721919907603e-08, "loss": 3.8985, "step": 832000 }, { "epoch": 9.25, "learning_rate": 9.245274637407546e-08, "loss": 3.8794, "step": 832500 }, { "epoch": 9.25, "learning_rate": 9.250827354907491e-08, "loss": 3.8764, "step": 833000 }, { "epoch": 9.26, "learning_rate": 9.256380072407436e-08, "loss": 3.8839, "step": 833500 }, { "epoch": 9.26, "learning_rate": 9.26193278990738e-08, "loss": 3.8986, "step": 834000 }, { "epoch": 9.27, "learning_rate": 9.267485507407325e-08, "loss": 3.8807, "step": 834500 }, { "epoch": 9.27, "learning_rate": 9.273038224907269e-08, "loss": 3.8898, "step": 835000 }, { "epoch": 9.28, "learning_rate": 9.278590942407214e-08, "loss": 3.8828, "step": 835500 }, { "epoch": 9.28, "learning_rate": 9.284143659907158e-08, "loss": 3.8808, "step": 836000 }, { "epoch": 9.29, "learning_rate": 9.289696377407102e-08, "loss": 3.8742, "step": 836500 }, { "epoch": 9.3, "learning_rate": 9.295249094907047e-08, "loss": 3.8849, "step": 837000 }, { "epoch": 9.3, "learning_rate": 9.300801812406993e-08, "loss": 3.8946, "step": 837500 }, { "epoch": 9.31, "learning_rate": 9.306354529906936e-08, "loss": 3.8681, "step": 838000 }, { "epoch": 9.31, "learning_rate": 9.31190724740688e-08, "loss": 3.8925, "step": 838500 }, { "epoch": 9.32, "learning_rate": 9.317459964906826e-08, "loss": 3.877, "step": 839000 }, { "epoch": 9.32, "learning_rate": 9.323012682406769e-08, "loss": 3.8872, "step": 839500 }, { "epoch": 9.33, "learning_rate": 9.328565399906713e-08, "loss": 3.883, "step": 840000 }, { "epoch": 9.33, "learning_rate": 9.334118117406659e-08, "loss": 3.8679, "step": 840500 }, { "epoch": 9.34, "learning_rate": 9.339670834906603e-08, "loss": 3.8822, "step": 841000 }, { "epoch": 9.35, "learning_rate": 9.345223552406547e-08, "loss": 3.8719, "step": 841500 }, { "epoch": 9.35, "learning_rate": 9.350776269906492e-08, "loss": 3.8754, "step": 842000 }, { "epoch": 9.36, "learning_rate": 9.356328987406437e-08, "loss": 3.8793, "step": 842500 }, { "epoch": 9.36, "learning_rate": 9.36188170490638e-08, "loss": 3.8817, "step": 843000 }, { "epoch": 9.37, "learning_rate": 9.367434422406326e-08, "loss": 3.878, "step": 843500 }, { "epoch": 9.37, "learning_rate": 9.37298713990627e-08, "loss": 3.8828, "step": 844000 }, { "epoch": 9.38, "learning_rate": 9.378539857406214e-08, "loss": 3.866, "step": 844500 }, { "epoch": 9.38, "learning_rate": 9.384092574906159e-08, "loss": 3.864, "step": 845000 }, { "epoch": 9.39, "learning_rate": 9.389645292406103e-08, "loss": 3.8654, "step": 845500 }, { "epoch": 9.4, "learning_rate": 9.395198009906048e-08, "loss": 3.8571, "step": 846000 }, { "epoch": 9.4, "learning_rate": 9.400750727405992e-08, "loss": 3.8963, "step": 846500 }, { "epoch": 9.41, "learning_rate": 9.406303444905937e-08, "loss": 3.8774, "step": 847000 }, { "epoch": 9.41, "learning_rate": 9.411856162405881e-08, "loss": 3.8724, "step": 847500 }, { "epoch": 9.42, "learning_rate": 9.417408879905825e-08, "loss": 3.8785, "step": 848000 }, { "epoch": 9.42, "learning_rate": 9.42296159740577e-08, "loss": 3.8871, "step": 848500 }, { "epoch": 9.43, "learning_rate": 9.428514314905714e-08, "loss": 3.8695, "step": 849000 }, { "epoch": 9.43, "learning_rate": 9.43406703240566e-08, "loss": 3.8861, "step": 849500 }, { "epoch": 9.44, "learning_rate": 9.439619749905603e-08, "loss": 3.8775, "step": 850000 }, { "epoch": 9.45, "learning_rate": 9.445172467405547e-08, "loss": 3.8893, "step": 850500 }, { "epoch": 9.45, "learning_rate": 9.450725184905493e-08, "loss": 3.8753, "step": 851000 }, { "epoch": 9.46, "learning_rate": 9.456277902405438e-08, "loss": 3.882, "step": 851500 }, { "epoch": 9.46, "learning_rate": 9.461830619905381e-08, "loss": 3.8722, "step": 852000 }, { "epoch": 9.47, "learning_rate": 9.467383337405326e-08, "loss": 3.8835, "step": 852500 }, { "epoch": 9.47, "learning_rate": 9.472936054905271e-08, "loss": 3.8712, "step": 853000 }, { "epoch": 9.48, "learning_rate": 9.478488772405214e-08, "loss": 3.8814, "step": 853500 }, { "epoch": 9.48, "learning_rate": 9.48404148990516e-08, "loss": 3.882, "step": 854000 }, { "epoch": 9.49, "learning_rate": 9.489594207405104e-08, "loss": 3.8849, "step": 854500 }, { "epoch": 9.5, "learning_rate": 9.495146924905049e-08, "loss": 3.8773, "step": 855000 }, { "epoch": 9.5, "learning_rate": 9.500699642404993e-08, "loss": 3.8978, "step": 855500 }, { "epoch": 9.51, "learning_rate": 9.506252359904937e-08, "loss": 3.8764, "step": 856000 }, { "epoch": 9.51, "learning_rate": 9.511805077404882e-08, "loss": 3.8656, "step": 856500 }, { "epoch": 9.52, "learning_rate": 9.517357794904826e-08, "loss": 3.8782, "step": 857000 }, { "epoch": 9.52, "learning_rate": 9.52291051240477e-08, "loss": 3.8779, "step": 857500 }, { "epoch": 9.53, "learning_rate": 9.528463229904715e-08, "loss": 3.8603, "step": 858000 }, { "epoch": 9.53, "learning_rate": 9.53401594740466e-08, "loss": 3.8769, "step": 858500 }, { "epoch": 9.54, "learning_rate": 9.539568664904604e-08, "loss": 3.8872, "step": 859000 }, { "epoch": 9.55, "learning_rate": 9.545121382404548e-08, "loss": 3.8783, "step": 859500 }, { "epoch": 9.55, "learning_rate": 9.550674099904493e-08, "loss": 3.8729, "step": 860000 }, { "epoch": 9.56, "learning_rate": 9.556226817404437e-08, "loss": 3.8633, "step": 860500 }, { "epoch": 9.56, "learning_rate": 9.561779534904382e-08, "loss": 3.8976, "step": 861000 }, { "epoch": 9.57, "learning_rate": 9.567332252404326e-08, "loss": 3.8628, "step": 861500 }, { "epoch": 9.57, "learning_rate": 9.57288496990427e-08, "loss": 3.8705, "step": 862000 }, { "epoch": 9.58, "learning_rate": 9.578437687404215e-08, "loss": 3.8724, "step": 862500 }, { "epoch": 9.58, "learning_rate": 9.583990404904159e-08, "loss": 3.8578, "step": 863000 }, { "epoch": 9.59, "learning_rate": 9.589543122404105e-08, "loss": 3.8741, "step": 863500 }, { "epoch": 9.6, "learning_rate": 9.595095839904048e-08, "loss": 3.874, "step": 864000 }, { "epoch": 9.6, "learning_rate": 9.600648557403992e-08, "loss": 3.8756, "step": 864500 }, { "epoch": 9.61, "learning_rate": 9.606201274903938e-08, "loss": 3.8754, "step": 865000 }, { "epoch": 9.61, "learning_rate": 9.611753992403881e-08, "loss": 3.8983, "step": 865500 }, { "epoch": 9.62, "learning_rate": 9.617306709903826e-08, "loss": 3.8807, "step": 866000 }, { "epoch": 9.62, "learning_rate": 9.622859427403771e-08, "loss": 3.874, "step": 866500 }, { "epoch": 9.63, "learning_rate": 9.628412144903716e-08, "loss": 3.8741, "step": 867000 }, { "epoch": 9.63, "learning_rate": 9.633964862403659e-08, "loss": 3.8721, "step": 867500 }, { "epoch": 9.64, "learning_rate": 9.639517579903605e-08, "loss": 3.8652, "step": 868000 }, { "epoch": 9.65, "learning_rate": 9.645070297403549e-08, "loss": 3.8871, "step": 868500 }, { "epoch": 9.65, "learning_rate": 9.650623014903492e-08, "loss": 3.8646, "step": 869000 }, { "epoch": 9.66, "learning_rate": 9.656175732403438e-08, "loss": 3.8755, "step": 869500 }, { "epoch": 9.66, "learning_rate": 9.661728449903382e-08, "loss": 3.9076, "step": 870000 }, { "epoch": 9.67, "learning_rate": 9.667281167403327e-08, "loss": 3.8841, "step": 870500 }, { "epoch": 9.67, "learning_rate": 9.672833884903271e-08, "loss": 3.8814, "step": 871000 }, { "epoch": 9.68, "learning_rate": 9.678386602403216e-08, "loss": 3.8679, "step": 871500 }, { "epoch": 9.68, "learning_rate": 9.68393931990316e-08, "loss": 3.8467, "step": 872000 }, { "epoch": 9.69, "learning_rate": 9.689492037403104e-08, "loss": 3.8771, "step": 872500 }, { "epoch": 9.7, "learning_rate": 9.695044754903049e-08, "loss": 3.8775, "step": 873000 }, { "epoch": 9.7, "learning_rate": 9.700597472402993e-08, "loss": 3.876, "step": 873500 }, { "epoch": 9.71, "learning_rate": 9.706150189902939e-08, "loss": 3.8744, "step": 874000 }, { "epoch": 9.71, "learning_rate": 9.711702907402882e-08, "loss": 3.8832, "step": 874500 }, { "epoch": 9.72, "learning_rate": 9.717255624902827e-08, "loss": 3.8682, "step": 875000 }, { "epoch": 9.72, "learning_rate": 9.722808342402772e-08, "loss": 3.8732, "step": 875500 }, { "epoch": 9.73, "learning_rate": 9.728361059902715e-08, "loss": 3.8604, "step": 876000 }, { "epoch": 9.73, "learning_rate": 9.73391377740266e-08, "loss": 3.8781, "step": 876500 }, { "epoch": 9.74, "learning_rate": 9.739466494902606e-08, "loss": 3.8699, "step": 877000 }, { "epoch": 9.75, "learning_rate": 9.74501921240255e-08, "loss": 3.8748, "step": 877500 }, { "epoch": 9.75, "learning_rate": 9.750571929902493e-08, "loss": 3.8769, "step": 878000 }, { "epoch": 9.76, "learning_rate": 9.756124647402439e-08, "loss": 3.8775, "step": 878500 }, { "epoch": 9.76, "learning_rate": 9.761677364902383e-08, "loss": 3.8547, "step": 879000 }, { "epoch": 9.77, "learning_rate": 9.767230082402326e-08, "loss": 3.8644, "step": 879500 }, { "epoch": 9.77, "learning_rate": 9.772782799902272e-08, "loss": 3.8892, "step": 880000 }, { "epoch": 9.78, "learning_rate": 9.778335517402216e-08, "loss": 3.8671, "step": 880500 }, { "epoch": 9.78, "learning_rate": 9.783888234902161e-08, "loss": 3.8643, "step": 881000 }, { "epoch": 9.79, "learning_rate": 9.789440952402105e-08, "loss": 3.8543, "step": 881500 }, { "epoch": 9.79, "learning_rate": 9.79499366990205e-08, "loss": 3.8538, "step": 882000 }, { "epoch": 9.8, "learning_rate": 9.800546387401994e-08, "loss": 3.8693, "step": 882500 }, { "epoch": 9.81, "learning_rate": 9.806099104901939e-08, "loss": 3.8696, "step": 883000 }, { "epoch": 9.81, "learning_rate": 9.811651822401883e-08, "loss": 3.8674, "step": 883500 }, { "epoch": 9.82, "learning_rate": 9.817204539901827e-08, "loss": 3.8574, "step": 884000 }, { "epoch": 9.82, "learning_rate": 9.822757257401773e-08, "loss": 3.8854, "step": 884500 }, { "epoch": 9.83, "learning_rate": 9.828309974901716e-08, "loss": 3.8651, "step": 885000 }, { "epoch": 9.83, "learning_rate": 9.83386269240166e-08, "loss": 3.8884, "step": 885500 }, { "epoch": 9.84, "learning_rate": 9.839415409901606e-08, "loss": 3.8805, "step": 886000 }, { "epoch": 9.84, "learning_rate": 9.84496812740155e-08, "loss": 3.8592, "step": 886500 }, { "epoch": 9.85, "learning_rate": 9.850520844901494e-08, "loss": 3.8688, "step": 887000 }, { "epoch": 9.86, "learning_rate": 9.85607356240144e-08, "loss": 3.8754, "step": 887500 }, { "epoch": 9.86, "learning_rate": 9.861626279901383e-08, "loss": 3.8672, "step": 888000 }, { "epoch": 9.87, "learning_rate": 9.867178997401327e-08, "loss": 3.8811, "step": 888500 }, { "epoch": 9.87, "learning_rate": 9.872731714901273e-08, "loss": 3.8959, "step": 889000 }, { "epoch": 9.88, "learning_rate": 9.878284432401217e-08, "loss": 3.8589, "step": 889500 }, { "epoch": 9.88, "learning_rate": 9.88383714990116e-08, "loss": 3.8649, "step": 890000 }, { "epoch": 9.89, "learning_rate": 9.889389867401106e-08, "loss": 3.8791, "step": 890500 }, { "epoch": 9.89, "learning_rate": 9.89494258490105e-08, "loss": 3.8653, "step": 891000 }, { "epoch": 9.9, "learning_rate": 9.900495302400994e-08, "loss": 3.8719, "step": 891500 }, { "epoch": 9.91, "learning_rate": 9.90604801990094e-08, "loss": 3.869, "step": 892000 }, { "epoch": 9.91, "learning_rate": 9.911600737400884e-08, "loss": 3.8764, "step": 892500 }, { "epoch": 9.92, "learning_rate": 9.917153454900828e-08, "loss": 3.856, "step": 893000 }, { "epoch": 9.92, "learning_rate": 9.922706172400773e-08, "loss": 3.8704, "step": 893500 }, { "epoch": 9.93, "learning_rate": 9.928258889900717e-08, "loss": 3.8762, "step": 894000 }, { "epoch": 9.93, "learning_rate": 9.933811607400662e-08, "loss": 3.8826, "step": 894500 }, { "epoch": 9.94, "learning_rate": 9.939364324900606e-08, "loss": 3.872, "step": 895000 }, { "epoch": 9.94, "learning_rate": 9.94491704240055e-08, "loss": 3.8834, "step": 895500 }, { "epoch": 9.95, "learning_rate": 9.950469759900495e-08, "loss": 3.8752, "step": 896000 }, { "epoch": 9.96, "learning_rate": 9.956022477400439e-08, "loss": 3.8826, "step": 896500 }, { "epoch": 9.96, "learning_rate": 9.961575194900384e-08, "loss": 3.8484, "step": 897000 }, { "epoch": 9.97, "learning_rate": 9.967127912400328e-08, "loss": 3.8693, "step": 897500 }, { "epoch": 9.97, "learning_rate": 9.972680629900272e-08, "loss": 3.8736, "step": 898000 }, { "epoch": 9.98, "learning_rate": 9.978233347400217e-08, "loss": 3.8756, "step": 898500 }, { "epoch": 9.98, "learning_rate": 9.983786064900161e-08, "loss": 3.8857, "step": 899000 }, { "epoch": 9.99, "learning_rate": 9.989338782400106e-08, "loss": 3.8839, "step": 899500 }, { "epoch": 9.99, "learning_rate": 9.994891499900051e-08, "loss": 3.8706, "step": 900000 }, { "epoch": 10.0, "eval_loss": 3.904014825820923, "eval_runtime": 6.3112, "eval_samples_per_second": 246.227, "step": 900460 }, { "epoch": 10.0, "learning_rate": 9.99988894565e-08, "loss": 3.8669, "step": 900500 }, { "epoch": 10.01, "learning_rate": 9.998500766275015e-08, "loss": 3.859, "step": 901000 }, { "epoch": 10.01, "learning_rate": 9.997112586900029e-08, "loss": 3.8591, "step": 901500 }, { "epoch": 10.02, "learning_rate": 9.995724407525042e-08, "loss": 3.8883, "step": 902000 }, { "epoch": 10.02, "learning_rate": 9.994336228150056e-08, "loss": 3.8605, "step": 902500 }, { "epoch": 10.03, "learning_rate": 9.99294804877507e-08, "loss": 3.8563, "step": 903000 }, { "epoch": 10.03, "learning_rate": 9.991559869400083e-08, "loss": 3.8597, "step": 903500 }, { "epoch": 10.04, "learning_rate": 9.990171690025097e-08, "loss": 3.8487, "step": 904000 }, { "epoch": 10.04, "learning_rate": 9.988783510650112e-08, "loss": 3.8893, "step": 904500 }, { "epoch": 10.05, "learning_rate": 9.987395331275126e-08, "loss": 3.8711, "step": 905000 }, { "epoch": 10.06, "learning_rate": 9.98600715190014e-08, "loss": 3.8548, "step": 905500 }, { "epoch": 10.06, "learning_rate": 9.984618972525153e-08, "loss": 3.8593, "step": 906000 }, { "epoch": 10.07, "learning_rate": 9.983230793150167e-08, "loss": 3.8685, "step": 906500 }, { "epoch": 10.07, "learning_rate": 9.981842613775181e-08, "loss": 3.8532, "step": 907000 }, { "epoch": 10.08, "learning_rate": 9.980454434400196e-08, "loss": 3.8611, "step": 907500 }, { "epoch": 10.08, "learning_rate": 9.979066255025209e-08, "loss": 3.8682, "step": 908000 }, { "epoch": 10.09, "learning_rate": 9.977678075650223e-08, "loss": 3.8595, "step": 908500 }, { "epoch": 10.09, "learning_rate": 9.976289896275236e-08, "loss": 3.8471, "step": 909000 }, { "epoch": 10.1, "learning_rate": 9.97490171690025e-08, "loss": 3.8813, "step": 909500 }, { "epoch": 10.11, "learning_rate": 9.973513537525264e-08, "loss": 3.8517, "step": 910000 }, { "epoch": 10.11, "learning_rate": 9.972125358150278e-08, "loss": 3.857, "step": 910500 }, { "epoch": 10.12, "learning_rate": 9.970737178775293e-08, "loss": 3.889, "step": 911000 }, { "epoch": 10.12, "learning_rate": 9.969348999400307e-08, "loss": 3.8742, "step": 911500 }, { "epoch": 10.13, "learning_rate": 9.96796082002532e-08, "loss": 3.8673, "step": 912000 }, { "epoch": 10.13, "learning_rate": 9.966572640650334e-08, "loss": 3.8758, "step": 912500 }, { "epoch": 10.14, "learning_rate": 9.965184461275348e-08, "loss": 3.8907, "step": 913000 }, { "epoch": 10.14, "learning_rate": 9.963796281900362e-08, "loss": 3.8586, "step": 913500 }, { "epoch": 10.15, "learning_rate": 9.962408102525375e-08, "loss": 3.8576, "step": 914000 }, { "epoch": 10.16, "learning_rate": 9.961019923150388e-08, "loss": 3.8707, "step": 914500 }, { "epoch": 10.16, "learning_rate": 9.959631743775402e-08, "loss": 3.8731, "step": 915000 }, { "epoch": 10.17, "learning_rate": 9.958243564400417e-08, "loss": 3.8753, "step": 915500 }, { "epoch": 10.17, "learning_rate": 9.956855385025431e-08, "loss": 3.8705, "step": 916000 }, { "epoch": 10.18, "learning_rate": 9.955467205650445e-08, "loss": 3.8709, "step": 916500 }, { "epoch": 10.18, "learning_rate": 9.95407902627546e-08, "loss": 3.8832, "step": 917000 }, { "epoch": 10.19, "learning_rate": 9.952690846900472e-08, "loss": 3.8674, "step": 917500 }, { "epoch": 10.19, "learning_rate": 9.951302667525486e-08, "loss": 3.8332, "step": 918000 }, { "epoch": 10.2, "learning_rate": 9.949914488150501e-08, "loss": 3.8863, "step": 918500 }, { "epoch": 10.21, "learning_rate": 9.948526308775515e-08, "loss": 3.8694, "step": 919000 }, { "epoch": 10.21, "learning_rate": 9.947138129400529e-08, "loss": 3.8543, "step": 919500 }, { "epoch": 10.22, "learning_rate": 9.945749950025542e-08, "loss": 3.8511, "step": 920000 }, { "epoch": 10.22, "learning_rate": 9.944361770650555e-08, "loss": 3.8791, "step": 920500 }, { "epoch": 10.23, "learning_rate": 9.942973591275569e-08, "loss": 3.8505, "step": 921000 }, { "epoch": 10.23, "learning_rate": 9.941585411900583e-08, "loss": 3.8666, "step": 921500 }, { "epoch": 10.24, "learning_rate": 9.940197232525598e-08, "loss": 3.8796, "step": 922000 }, { "epoch": 10.24, "learning_rate": 9.938809053150612e-08, "loss": 3.8867, "step": 922500 }, { "epoch": 10.25, "learning_rate": 9.937420873775625e-08, "loss": 3.8659, "step": 923000 }, { "epoch": 10.26, "learning_rate": 9.936032694400639e-08, "loss": 3.8811, "step": 923500 }, { "epoch": 10.26, "learning_rate": 9.934644515025653e-08, "loss": 3.849, "step": 924000 }, { "epoch": 10.27, "learning_rate": 9.933256335650667e-08, "loss": 3.8673, "step": 924500 }, { "epoch": 10.27, "learning_rate": 9.931868156275682e-08, "loss": 3.8738, "step": 925000 }, { "epoch": 10.28, "learning_rate": 9.930479976900696e-08, "loss": 3.8682, "step": 925500 }, { "epoch": 10.28, "learning_rate": 9.929091797525709e-08, "loss": 3.8537, "step": 926000 }, { "epoch": 10.29, "learning_rate": 9.927703618150722e-08, "loss": 3.865, "step": 926500 }, { "epoch": 10.29, "learning_rate": 9.926315438775736e-08, "loss": 3.8733, "step": 927000 }, { "epoch": 10.3, "learning_rate": 9.92492725940075e-08, "loss": 3.8726, "step": 927500 }, { "epoch": 10.31, "learning_rate": 9.923539080025764e-08, "loss": 3.8549, "step": 928000 }, { "epoch": 10.31, "learning_rate": 9.922150900650777e-08, "loss": 3.8419, "step": 928500 }, { "epoch": 10.32, "learning_rate": 9.920762721275792e-08, "loss": 3.8696, "step": 929000 }, { "epoch": 10.32, "learning_rate": 9.919374541900806e-08, "loss": 3.8543, "step": 929500 }, { "epoch": 10.33, "learning_rate": 9.91798636252582e-08, "loss": 3.8731, "step": 930000 }, { "epoch": 10.33, "learning_rate": 9.916598183150834e-08, "loss": 3.8611, "step": 930500 }, { "epoch": 10.34, "learning_rate": 9.915210003775848e-08, "loss": 3.8893, "step": 931000 }, { "epoch": 10.34, "learning_rate": 9.913821824400861e-08, "loss": 3.8672, "step": 931500 }, { "epoch": 10.35, "learning_rate": 9.912433645025876e-08, "loss": 3.8572, "step": 932000 }, { "epoch": 10.36, "learning_rate": 9.911045465650888e-08, "loss": 3.8588, "step": 932500 }, { "epoch": 10.36, "learning_rate": 9.909657286275903e-08, "loss": 3.8452, "step": 933000 }, { "epoch": 10.37, "learning_rate": 9.908269106900917e-08, "loss": 3.8802, "step": 933500 }, { "epoch": 10.37, "learning_rate": 9.90688092752593e-08, "loss": 3.8591, "step": 934000 }, { "epoch": 10.38, "learning_rate": 9.905492748150944e-08, "loss": 3.8646, "step": 934500 }, { "epoch": 10.38, "learning_rate": 9.904104568775958e-08, "loss": 3.8622, "step": 935000 }, { "epoch": 10.39, "learning_rate": 9.902716389400972e-08, "loss": 3.8741, "step": 935500 }, { "epoch": 10.39, "learning_rate": 9.901328210025987e-08, "loss": 3.8691, "step": 936000 }, { "epoch": 10.4, "learning_rate": 9.899940030651001e-08, "loss": 3.8632, "step": 936500 }, { "epoch": 10.41, "learning_rate": 9.898551851276014e-08, "loss": 3.8659, "step": 937000 }, { "epoch": 10.41, "learning_rate": 9.897163671901028e-08, "loss": 3.8764, "step": 937500 }, { "epoch": 10.42, "learning_rate": 9.895775492526042e-08, "loss": 3.8729, "step": 938000 }, { "epoch": 10.42, "learning_rate": 9.894387313151055e-08, "loss": 3.8833, "step": 938500 }, { "epoch": 10.43, "learning_rate": 9.89299913377607e-08, "loss": 3.8762, "step": 939000 }, { "epoch": 10.43, "learning_rate": 9.891610954401084e-08, "loss": 3.8472, "step": 939500 }, { "epoch": 10.44, "learning_rate": 9.890222775026097e-08, "loss": 3.8667, "step": 940000 }, { "epoch": 10.44, "learning_rate": 9.888834595651111e-08, "loss": 3.8745, "step": 940500 }, { "epoch": 10.45, "learning_rate": 9.887446416276125e-08, "loss": 3.8837, "step": 941000 }, { "epoch": 10.46, "learning_rate": 9.886058236901139e-08, "loss": 3.8511, "step": 941500 }, { "epoch": 10.46, "learning_rate": 9.884670057526153e-08, "loss": 3.8796, "step": 942000 }, { "epoch": 10.47, "learning_rate": 9.883281878151166e-08, "loss": 3.8682, "step": 942500 }, { "epoch": 10.47, "learning_rate": 9.88189369877618e-08, "loss": 3.8515, "step": 943000 }, { "epoch": 10.48, "learning_rate": 9.880505519401195e-08, "loss": 3.8572, "step": 943500 }, { "epoch": 10.48, "learning_rate": 9.879117340026209e-08, "loss": 3.8599, "step": 944000 }, { "epoch": 10.49, "learning_rate": 9.877729160651222e-08, "loss": 3.8675, "step": 944500 }, { "epoch": 10.49, "learning_rate": 9.876340981276236e-08, "loss": 3.8707, "step": 945000 }, { "epoch": 10.5, "learning_rate": 9.874952801901249e-08, "loss": 3.8825, "step": 945500 }, { "epoch": 10.51, "learning_rate": 9.873564622526263e-08, "loss": 3.8788, "step": 946000 }, { "epoch": 10.51, "learning_rate": 9.872176443151278e-08, "loss": 3.8524, "step": 946500 }, { "epoch": 10.52, "learning_rate": 9.870788263776292e-08, "loss": 3.8666, "step": 947000 }, { "epoch": 10.52, "learning_rate": 9.869400084401306e-08, "loss": 3.8758, "step": 947500 }, { "epoch": 10.53, "learning_rate": 9.86801190502632e-08, "loss": 3.8605, "step": 948000 }, { "epoch": 10.53, "learning_rate": 9.866623725651333e-08, "loss": 3.8542, "step": 948500 }, { "epoch": 10.54, "learning_rate": 9.865235546276347e-08, "loss": 3.8557, "step": 949000 }, { "epoch": 10.54, "learning_rate": 9.863847366901362e-08, "loss": 3.8531, "step": 949500 }, { "epoch": 10.55, "learning_rate": 9.862459187526376e-08, "loss": 3.8527, "step": 950000 }, { "epoch": 10.56, "learning_rate": 9.861071008151389e-08, "loss": 3.8588, "step": 950500 }, { "epoch": 10.56, "learning_rate": 9.859682828776402e-08, "loss": 3.8679, "step": 951000 }, { "epoch": 10.57, "learning_rate": 9.858294649401416e-08, "loss": 3.8518, "step": 951500 }, { "epoch": 10.57, "learning_rate": 9.85690647002643e-08, "loss": 3.8525, "step": 952000 }, { "epoch": 10.58, "learning_rate": 9.855518290651444e-08, "loss": 3.8524, "step": 952500 }, { "epoch": 10.58, "learning_rate": 9.854130111276459e-08, "loss": 3.8605, "step": 953000 }, { "epoch": 10.59, "learning_rate": 9.852741931901473e-08, "loss": 3.8719, "step": 953500 }, { "epoch": 10.59, "learning_rate": 9.851353752526486e-08, "loss": 3.8659, "step": 954000 }, { "epoch": 10.6, "learning_rate": 9.8499655731515e-08, "loss": 3.8484, "step": 954500 }, { "epoch": 10.61, "learning_rate": 9.848577393776514e-08, "loss": 3.871, "step": 955000 }, { "epoch": 10.61, "learning_rate": 9.847189214401528e-08, "loss": 3.8532, "step": 955500 }, { "epoch": 10.62, "learning_rate": 9.845801035026543e-08, "loss": 3.8531, "step": 956000 }, { "epoch": 10.62, "learning_rate": 9.844412855651555e-08, "loss": 3.8591, "step": 956500 }, { "epoch": 10.63, "learning_rate": 9.843024676276568e-08, "loss": 3.8737, "step": 957000 }, { "epoch": 10.63, "learning_rate": 9.841636496901583e-08, "loss": 3.8497, "step": 957500 }, { "epoch": 10.64, "learning_rate": 9.840248317526597e-08, "loss": 3.8671, "step": 958000 }, { "epoch": 10.64, "learning_rate": 9.838860138151611e-08, "loss": 3.8644, "step": 958500 }, { "epoch": 10.65, "learning_rate": 9.837471958776625e-08, "loss": 3.8626, "step": 959000 }, { "epoch": 10.66, "learning_rate": 9.836083779401638e-08, "loss": 3.8502, "step": 959500 }, { "epoch": 10.66, "learning_rate": 9.834695600026652e-08, "loss": 3.8785, "step": 960000 }, { "epoch": 10.67, "learning_rate": 9.833307420651667e-08, "loss": 3.8665, "step": 960500 }, { "epoch": 10.67, "learning_rate": 9.831919241276681e-08, "loss": 3.853, "step": 961000 }, { "epoch": 10.68, "learning_rate": 9.830531061901695e-08, "loss": 3.8561, "step": 961500 }, { "epoch": 10.68, "learning_rate": 9.829142882526709e-08, "loss": 3.8521, "step": 962000 }, { "epoch": 10.69, "learning_rate": 9.827754703151722e-08, "loss": 3.8828, "step": 962500 }, { "epoch": 10.69, "learning_rate": 9.826366523776735e-08, "loss": 3.8706, "step": 963000 }, { "epoch": 10.7, "learning_rate": 9.824978344401749e-08, "loss": 3.8622, "step": 963500 }, { "epoch": 10.71, "learning_rate": 9.823590165026764e-08, "loss": 3.8459, "step": 964000 }, { "epoch": 10.71, "learning_rate": 9.822201985651778e-08, "loss": 3.8465, "step": 964500 }, { "epoch": 10.72, "learning_rate": 9.820813806276791e-08, "loss": 3.8832, "step": 965000 }, { "epoch": 10.72, "learning_rate": 9.819425626901805e-08, "loss": 3.8757, "step": 965500 }, { "epoch": 10.73, "learning_rate": 9.818037447526819e-08, "loss": 3.8731, "step": 966000 }, { "epoch": 10.73, "learning_rate": 9.816649268151833e-08, "loss": 3.8566, "step": 966500 }, { "epoch": 10.74, "learning_rate": 9.815261088776848e-08, "loss": 3.8529, "step": 967000 }, { "epoch": 10.74, "learning_rate": 9.813872909401862e-08, "loss": 3.8567, "step": 967500 }, { "epoch": 10.75, "learning_rate": 9.812484730026875e-08, "loss": 3.8475, "step": 968000 }, { "epoch": 10.76, "learning_rate": 9.811096550651889e-08, "loss": 3.8522, "step": 968500 }, { "epoch": 10.76, "learning_rate": 9.809708371276902e-08, "loss": 3.8692, "step": 969000 }, { "epoch": 10.77, "learning_rate": 9.808320191901916e-08, "loss": 3.8667, "step": 969500 }, { "epoch": 10.77, "learning_rate": 9.80693201252693e-08, "loss": 3.869, "step": 970000 }, { "epoch": 10.78, "learning_rate": 9.805543833151945e-08, "loss": 3.8616, "step": 970500 }, { "epoch": 10.78, "learning_rate": 9.804155653776957e-08, "loss": 3.8601, "step": 971000 }, { "epoch": 10.79, "learning_rate": 9.802767474401972e-08, "loss": 3.867, "step": 971500 }, { "epoch": 10.79, "learning_rate": 9.801379295026986e-08, "loss": 3.8644, "step": 972000 }, { "epoch": 10.8, "learning_rate": 9.799991115652e-08, "loss": 3.8434, "step": 972500 }, { "epoch": 10.81, "learning_rate": 9.798602936277014e-08, "loss": 3.8574, "step": 973000 }, { "epoch": 10.81, "learning_rate": 9.797214756902027e-08, "loss": 3.8648, "step": 973500 }, { "epoch": 10.82, "learning_rate": 9.795826577527041e-08, "loss": 3.846, "step": 974000 }, { "epoch": 10.82, "learning_rate": 9.794438398152056e-08, "loss": 3.8593, "step": 974500 }, { "epoch": 10.83, "learning_rate": 9.793050218777069e-08, "loss": 3.8662, "step": 975000 }, { "epoch": 10.83, "learning_rate": 9.791662039402083e-08, "loss": 3.8412, "step": 975500 }, { "epoch": 10.84, "learning_rate": 9.790273860027097e-08, "loss": 3.8641, "step": 976000 }, { "epoch": 10.84, "learning_rate": 9.78888568065211e-08, "loss": 3.8525, "step": 976500 }, { "epoch": 10.85, "learning_rate": 9.787497501277124e-08, "loss": 3.8524, "step": 977000 }, { "epoch": 10.86, "learning_rate": 9.786109321902138e-08, "loss": 3.8351, "step": 977500 }, { "epoch": 10.86, "learning_rate": 9.784721142527153e-08, "loss": 3.8825, "step": 978000 }, { "epoch": 10.87, "learning_rate": 9.783332963152167e-08, "loss": 3.8685, "step": 978500 }, { "epoch": 10.87, "learning_rate": 9.78194478377718e-08, "loss": 3.8586, "step": 979000 }, { "epoch": 10.88, "learning_rate": 9.780556604402194e-08, "loss": 3.8522, "step": 979500 }, { "epoch": 10.88, "learning_rate": 9.779168425027208e-08, "loss": 3.8554, "step": 980000 }, { "epoch": 10.89, "learning_rate": 9.777780245652222e-08, "loss": 3.8516, "step": 980500 }, { "epoch": 10.89, "learning_rate": 9.776392066277235e-08, "loss": 3.8519, "step": 981000 }, { "epoch": 10.9, "learning_rate": 9.77500388690225e-08, "loss": 3.8716, "step": 981500 }, { "epoch": 10.91, "learning_rate": 9.773615707527262e-08, "loss": 3.8573, "step": 982000 }, { "epoch": 10.91, "learning_rate": 9.772227528152277e-08, "loss": 3.8609, "step": 982500 }, { "epoch": 10.92, "learning_rate": 9.770839348777291e-08, "loss": 3.8734, "step": 983000 }, { "epoch": 10.92, "learning_rate": 9.769451169402305e-08, "loss": 3.8622, "step": 983500 }, { "epoch": 10.93, "learning_rate": 9.76806299002732e-08, "loss": 3.8566, "step": 984000 }, { "epoch": 10.93, "learning_rate": 9.766674810652334e-08, "loss": 3.8702, "step": 984500 }, { "epoch": 10.94, "learning_rate": 9.765286631277347e-08, "loss": 3.86, "step": 985000 }, { "epoch": 10.94, "learning_rate": 9.763898451902361e-08, "loss": 3.8647, "step": 985500 }, { "epoch": 10.95, "learning_rate": 9.762510272527375e-08, "loss": 3.8523, "step": 986000 }, { "epoch": 10.96, "learning_rate": 9.761122093152389e-08, "loss": 3.8478, "step": 986500 }, { "epoch": 10.96, "learning_rate": 9.759733913777402e-08, "loss": 3.8731, "step": 987000 }, { "epoch": 10.97, "learning_rate": 9.758345734402415e-08, "loss": 3.8746, "step": 987500 }, { "epoch": 10.97, "learning_rate": 9.756957555027429e-08, "loss": 3.8547, "step": 988000 }, { "epoch": 10.98, "learning_rate": 9.755569375652443e-08, "loss": 3.8569, "step": 988500 }, { "epoch": 10.98, "learning_rate": 9.754181196277458e-08, "loss": 3.8595, "step": 989000 }, { "epoch": 10.99, "learning_rate": 9.752793016902472e-08, "loss": 3.8365, "step": 989500 }, { "epoch": 10.99, "learning_rate": 9.751404837527486e-08, "loss": 3.8444, "step": 990000 }, { "epoch": 11.0, "learning_rate": 9.750016658152499e-08, "loss": 3.8648, "step": 990500 }, { "epoch": 11.0, "eval_loss": 3.8952994346618652, "eval_runtime": 6.3144, "eval_samples_per_second": 246.105, "step": 990506 }, { "epoch": 11.01, "learning_rate": 9.748628478777513e-08, "loss": 3.8663, "step": 991000 }, { "epoch": 11.01, "learning_rate": 9.747240299402527e-08, "loss": 3.8649, "step": 991500 }, { "epoch": 11.02, "learning_rate": 9.745852120027542e-08, "loss": 3.8657, "step": 992000 }, { "epoch": 11.02, "learning_rate": 9.744463940652556e-08, "loss": 3.853, "step": 992500 }, { "epoch": 11.03, "learning_rate": 9.743075761277569e-08, "loss": 3.8628, "step": 993000 }, { "epoch": 11.03, "learning_rate": 9.741687581902582e-08, "loss": 3.8512, "step": 993500 }, { "epoch": 11.04, "learning_rate": 9.740299402527596e-08, "loss": 3.8547, "step": 994000 }, { "epoch": 11.04, "learning_rate": 9.73891122315261e-08, "loss": 3.8546, "step": 994500 }, { "epoch": 11.05, "learning_rate": 9.737523043777624e-08, "loss": 3.867, "step": 995000 }, { "epoch": 11.06, "learning_rate": 9.736134864402639e-08, "loss": 3.8618, "step": 995500 }, { "epoch": 11.06, "learning_rate": 9.734746685027652e-08, "loss": 3.8536, "step": 996000 }, { "epoch": 11.07, "learning_rate": 9.733358505652666e-08, "loss": 3.8588, "step": 996500 }, { "epoch": 11.07, "learning_rate": 9.73197032627768e-08, "loss": 3.8499, "step": 997000 }, { "epoch": 11.08, "learning_rate": 9.730582146902694e-08, "loss": 3.8623, "step": 997500 }, { "epoch": 11.08, "learning_rate": 9.729193967527708e-08, "loss": 3.854, "step": 998000 }, { "epoch": 11.09, "learning_rate": 9.727805788152723e-08, "loss": 3.842, "step": 998500 }, { "epoch": 11.09, "learning_rate": 9.726417608777736e-08, "loss": 3.872, "step": 999000 }, { "epoch": 11.1, "learning_rate": 9.725029429402748e-08, "loss": 3.8529, "step": 999500 }, { "epoch": 11.11, "learning_rate": 9.723641250027763e-08, "loss": 3.8463, "step": 1000000 }, { "epoch": 11.11, "learning_rate": 9.722253070652777e-08, "loss": 3.8362, "step": 1000500 }, { "epoch": 11.12, "learning_rate": 9.720864891277791e-08, "loss": 3.8544, "step": 1001000 }, { "epoch": 11.12, "learning_rate": 9.719476711902804e-08, "loss": 3.8483, "step": 1001500 }, { "epoch": 11.13, "learning_rate": 9.718088532527818e-08, "loss": 3.8695, "step": 1002000 }, { "epoch": 11.13, "learning_rate": 9.716700353152833e-08, "loss": 3.8459, "step": 1002500 }, { "epoch": 11.14, "learning_rate": 9.715312173777847e-08, "loss": 3.8633, "step": 1003000 }, { "epoch": 11.14, "learning_rate": 9.713923994402861e-08, "loss": 3.8359, "step": 1003500 }, { "epoch": 11.15, "learning_rate": 9.712535815027875e-08, "loss": 3.844, "step": 1004000 }, { "epoch": 11.16, "learning_rate": 9.711147635652888e-08, "loss": 3.8747, "step": 1004500 }, { "epoch": 11.16, "learning_rate": 9.709759456277902e-08, "loss": 3.8569, "step": 1005000 }, { "epoch": 11.17, "learning_rate": 9.708371276902915e-08, "loss": 3.8704, "step": 1005500 }, { "epoch": 11.17, "learning_rate": 9.70698309752793e-08, "loss": 3.847, "step": 1006000 }, { "epoch": 11.18, "learning_rate": 9.705594918152944e-08, "loss": 3.8777, "step": 1006500 }, { "epoch": 11.18, "learning_rate": 9.704206738777958e-08, "loss": 3.8748, "step": 1007000 }, { "epoch": 11.19, "learning_rate": 9.702818559402971e-08, "loss": 3.8703, "step": 1007500 }, { "epoch": 11.19, "learning_rate": 9.701430380027985e-08, "loss": 3.863, "step": 1008000 }, { "epoch": 11.2, "learning_rate": 9.700042200652999e-08, "loss": 3.8448, "step": 1008500 }, { "epoch": 11.21, "learning_rate": 9.698654021278013e-08, "loss": 3.8467, "step": 1009000 }, { "epoch": 11.21, "learning_rate": 9.697265841903028e-08, "loss": 3.8423, "step": 1009500 }, { "epoch": 11.22, "learning_rate": 9.69587766252804e-08, "loss": 3.8522, "step": 1010000 }, { "epoch": 11.22, "learning_rate": 9.694489483153055e-08, "loss": 3.8648, "step": 1010500 }, { "epoch": 11.23, "learning_rate": 9.693101303778069e-08, "loss": 3.8497, "step": 1011000 }, { "epoch": 11.23, "learning_rate": 9.691713124403082e-08, "loss": 3.8511, "step": 1011500 }, { "epoch": 11.24, "learning_rate": 9.690324945028096e-08, "loss": 3.8487, "step": 1012000 }, { "epoch": 11.24, "learning_rate": 9.68893676565311e-08, "loss": 3.8518, "step": 1012500 }, { "epoch": 11.25, "learning_rate": 9.687548586278123e-08, "loss": 3.8421, "step": 1013000 }, { "epoch": 11.26, "learning_rate": 9.686160406903138e-08, "loss": 3.8607, "step": 1013500 }, { "epoch": 11.26, "learning_rate": 9.684772227528152e-08, "loss": 3.8521, "step": 1014000 }, { "epoch": 11.27, "learning_rate": 9.683384048153166e-08, "loss": 3.8516, "step": 1014500 }, { "epoch": 11.27, "learning_rate": 9.68199586877818e-08, "loss": 3.8581, "step": 1015000 }, { "epoch": 11.28, "learning_rate": 9.680607689403193e-08, "loss": 3.8553, "step": 1015500 }, { "epoch": 11.28, "learning_rate": 9.679219510028207e-08, "loss": 3.8452, "step": 1016000 }, { "epoch": 11.29, "learning_rate": 9.677831330653222e-08, "loss": 3.8642, "step": 1016500 }, { "epoch": 11.29, "learning_rate": 9.676443151278236e-08, "loss": 3.8361, "step": 1017000 }, { "epoch": 11.3, "learning_rate": 9.675054971903249e-08, "loss": 3.8413, "step": 1017500 }, { "epoch": 11.31, "learning_rate": 9.673666792528263e-08, "loss": 3.8569, "step": 1018000 }, { "epoch": 11.31, "learning_rate": 9.672278613153276e-08, "loss": 3.8511, "step": 1018500 }, { "epoch": 11.32, "learning_rate": 9.67089043377829e-08, "loss": 3.837, "step": 1019000 }, { "epoch": 11.32, "learning_rate": 9.669502254403304e-08, "loss": 3.8386, "step": 1019500 }, { "epoch": 11.33, "learning_rate": 9.668114075028319e-08, "loss": 3.837, "step": 1020000 }, { "epoch": 11.33, "learning_rate": 9.666725895653333e-08, "loss": 3.8515, "step": 1020500 }, { "epoch": 11.34, "learning_rate": 9.665337716278347e-08, "loss": 3.8484, "step": 1021000 }, { "epoch": 11.34, "learning_rate": 9.66394953690336e-08, "loss": 3.85, "step": 1021500 }, { "epoch": 11.35, "learning_rate": 9.662561357528374e-08, "loss": 3.8509, "step": 1022000 }, { "epoch": 11.36, "learning_rate": 9.661173178153388e-08, "loss": 3.8674, "step": 1022500 }, { "epoch": 11.36, "learning_rate": 9.659784998778403e-08, "loss": 3.8413, "step": 1023000 }, { "epoch": 11.37, "learning_rate": 9.658396819403415e-08, "loss": 3.8277, "step": 1023500 }, { "epoch": 11.37, "learning_rate": 9.657008640028428e-08, "loss": 3.8627, "step": 1024000 }, { "epoch": 11.38, "learning_rate": 9.655620460653443e-08, "loss": 3.8431, "step": 1024500 }, { "epoch": 11.38, "learning_rate": 9.654232281278457e-08, "loss": 3.8492, "step": 1025000 }, { "epoch": 11.39, "learning_rate": 9.652844101903471e-08, "loss": 3.8423, "step": 1025500 }, { "epoch": 11.39, "learning_rate": 9.651455922528485e-08, "loss": 3.8626, "step": 1026000 }, { "epoch": 11.4, "learning_rate": 9.6500677431535e-08, "loss": 3.8437, "step": 1026500 }, { "epoch": 11.41, "learning_rate": 9.648679563778512e-08, "loss": 3.864, "step": 1027000 }, { "epoch": 11.41, "learning_rate": 9.647291384403527e-08, "loss": 3.8621, "step": 1027500 }, { "epoch": 11.42, "learning_rate": 9.645903205028541e-08, "loss": 3.8585, "step": 1028000 }, { "epoch": 11.42, "learning_rate": 9.644515025653555e-08, "loss": 3.848, "step": 1028500 }, { "epoch": 11.43, "learning_rate": 9.643126846278569e-08, "loss": 3.8561, "step": 1029000 }, { "epoch": 11.43, "learning_rate": 9.641738666903582e-08, "loss": 3.8447, "step": 1029500 }, { "epoch": 11.44, "learning_rate": 9.640350487528595e-08, "loss": 3.8637, "step": 1030000 }, { "epoch": 11.44, "learning_rate": 9.63896230815361e-08, "loss": 3.845, "step": 1030500 }, { "epoch": 11.45, "learning_rate": 9.637574128778624e-08, "loss": 3.8456, "step": 1031000 }, { "epoch": 11.46, "learning_rate": 9.636185949403638e-08, "loss": 3.8689, "step": 1031500 }, { "epoch": 11.46, "learning_rate": 9.634797770028652e-08, "loss": 3.8708, "step": 1032000 }, { "epoch": 11.47, "learning_rate": 9.633409590653665e-08, "loss": 3.8386, "step": 1032500 }, { "epoch": 11.47, "learning_rate": 9.632021411278679e-08, "loss": 3.8458, "step": 1033000 }, { "epoch": 11.48, "learning_rate": 9.630633231903693e-08, "loss": 3.8464, "step": 1033500 }, { "epoch": 11.48, "learning_rate": 9.629245052528708e-08, "loss": 3.8691, "step": 1034000 }, { "epoch": 11.49, "learning_rate": 9.627856873153722e-08, "loss": 3.8595, "step": 1034500 }, { "epoch": 11.49, "learning_rate": 9.626468693778735e-08, "loss": 3.8135, "step": 1035000 }, { "epoch": 11.5, "learning_rate": 9.625080514403749e-08, "loss": 3.8604, "step": 1035500 }, { "epoch": 11.51, "learning_rate": 9.623692335028762e-08, "loss": 3.8485, "step": 1036000 }, { "epoch": 11.51, "learning_rate": 9.622304155653776e-08, "loss": 3.8416, "step": 1036500 }, { "epoch": 11.52, "learning_rate": 9.62091597627879e-08, "loss": 3.8547, "step": 1037000 }, { "epoch": 11.52, "learning_rate": 9.619527796903805e-08, "loss": 3.8578, "step": 1037500 }, { "epoch": 11.53, "learning_rate": 9.618139617528817e-08, "loss": 3.8577, "step": 1038000 }, { "epoch": 11.53, "learning_rate": 9.616751438153832e-08, "loss": 3.8423, "step": 1038500 }, { "epoch": 11.54, "learning_rate": 9.615363258778846e-08, "loss": 3.8397, "step": 1039000 }, { "epoch": 11.54, "learning_rate": 9.61397507940386e-08, "loss": 3.8531, "step": 1039500 }, { "epoch": 11.55, "learning_rate": 9.612586900028874e-08, "loss": 3.8626, "step": 1040000 }, { "epoch": 11.56, "learning_rate": 9.611198720653889e-08, "loss": 3.8656, "step": 1040500 }, { "epoch": 11.56, "learning_rate": 9.609810541278901e-08, "loss": 3.8743, "step": 1041000 }, { "epoch": 11.57, "learning_rate": 9.608422361903916e-08, "loss": 3.8585, "step": 1041500 }, { "epoch": 11.57, "learning_rate": 9.607034182528929e-08, "loss": 3.8282, "step": 1042000 }, { "epoch": 11.58, "learning_rate": 9.605646003153943e-08, "loss": 3.8469, "step": 1042500 }, { "epoch": 11.58, "learning_rate": 9.604257823778957e-08, "loss": 3.8483, "step": 1043000 }, { "epoch": 11.59, "learning_rate": 9.602869644403971e-08, "loss": 3.85, "step": 1043500 }, { "epoch": 11.59, "learning_rate": 9.601481465028984e-08, "loss": 3.854, "step": 1044000 }, { "epoch": 11.6, "learning_rate": 9.600093285653998e-08, "loss": 3.8255, "step": 1044500 }, { "epoch": 11.61, "learning_rate": 9.598705106279013e-08, "loss": 3.8294, "step": 1045000 }, { "epoch": 11.61, "learning_rate": 9.597316926904027e-08, "loss": 3.8557, "step": 1045500 }, { "epoch": 11.62, "learning_rate": 9.595928747529041e-08, "loss": 3.8637, "step": 1046000 }, { "epoch": 11.62, "learning_rate": 9.594540568154054e-08, "loss": 3.84, "step": 1046500 }, { "epoch": 11.63, "learning_rate": 9.593152388779068e-08, "loss": 3.8525, "step": 1047000 }, { "epoch": 11.63, "learning_rate": 9.591764209404082e-08, "loss": 3.8453, "step": 1047500 }, { "epoch": 11.64, "learning_rate": 9.590376030029095e-08, "loss": 3.8652, "step": 1048000 }, { "epoch": 11.64, "learning_rate": 9.58898785065411e-08, "loss": 3.8566, "step": 1048500 }, { "epoch": 11.65, "learning_rate": 9.587599671279124e-08, "loss": 3.8502, "step": 1049000 }, { "epoch": 11.66, "learning_rate": 9.586211491904137e-08, "loss": 3.852, "step": 1049500 }, { "epoch": 11.66, "learning_rate": 9.584823312529151e-08, "loss": 3.853, "step": 1050000 }, { "epoch": 11.67, "learning_rate": 9.583435133154165e-08, "loss": 3.8677, "step": 1050500 }, { "epoch": 11.67, "learning_rate": 9.58204695377918e-08, "loss": 3.847, "step": 1051000 }, { "epoch": 11.68, "learning_rate": 9.580658774404194e-08, "loss": 3.8677, "step": 1051500 }, { "epoch": 11.68, "learning_rate": 9.579270595029207e-08, "loss": 3.8543, "step": 1052000 }, { "epoch": 11.69, "learning_rate": 9.577882415654221e-08, "loss": 3.8649, "step": 1052500 }, { "epoch": 11.69, "learning_rate": 9.576494236279235e-08, "loss": 3.8709, "step": 1053000 }, { "epoch": 11.7, "learning_rate": 9.575106056904249e-08, "loss": 3.853, "step": 1053500 }, { "epoch": 11.71, "learning_rate": 9.573717877529262e-08, "loss": 3.8393, "step": 1054000 }, { "epoch": 11.71, "learning_rate": 9.572329698154276e-08, "loss": 3.8714, "step": 1054500 }, { "epoch": 11.72, "learning_rate": 9.570941518779289e-08, "loss": 3.8414, "step": 1055000 }, { "epoch": 11.72, "learning_rate": 9.569553339404303e-08, "loss": 3.8584, "step": 1055500 }, { "epoch": 11.73, "learning_rate": 9.568165160029318e-08, "loss": 3.8517, "step": 1056000 }, { "epoch": 11.73, "learning_rate": 9.566776980654332e-08, "loss": 3.835, "step": 1056500 }, { "epoch": 11.74, "learning_rate": 9.565388801279346e-08, "loss": 3.8427, "step": 1057000 }, { "epoch": 11.74, "learning_rate": 9.56400062190436e-08, "loss": 3.8585, "step": 1057500 }, { "epoch": 11.75, "learning_rate": 9.562612442529373e-08, "loss": 3.8566, "step": 1058000 }, { "epoch": 11.76, "learning_rate": 9.561224263154387e-08, "loss": 3.8435, "step": 1058500 }, { "epoch": 11.76, "learning_rate": 9.559836083779402e-08, "loss": 3.8718, "step": 1059000 }, { "epoch": 11.77, "learning_rate": 9.558447904404416e-08, "loss": 3.8519, "step": 1059500 }, { "epoch": 11.77, "learning_rate": 9.557059725029429e-08, "loss": 3.8283, "step": 1060000 }, { "epoch": 11.78, "learning_rate": 9.555671545654442e-08, "loss": 3.8451, "step": 1060500 }, { "epoch": 11.78, "learning_rate": 9.554283366279456e-08, "loss": 3.8543, "step": 1061000 }, { "epoch": 11.79, "learning_rate": 9.55289518690447e-08, "loss": 3.8585, "step": 1061500 }, { "epoch": 11.79, "learning_rate": 9.551507007529484e-08, "loss": 3.8563, "step": 1062000 }, { "epoch": 11.8, "learning_rate": 9.550118828154499e-08, "loss": 3.852, "step": 1062500 }, { "epoch": 11.81, "learning_rate": 9.548730648779513e-08, "loss": 3.854, "step": 1063000 }, { "epoch": 11.81, "learning_rate": 9.547342469404526e-08, "loss": 3.8477, "step": 1063500 }, { "epoch": 11.82, "learning_rate": 9.54595429002954e-08, "loss": 3.8522, "step": 1064000 }, { "epoch": 11.82, "learning_rate": 9.544566110654554e-08, "loss": 3.8577, "step": 1064500 }, { "epoch": 11.83, "learning_rate": 9.543177931279568e-08, "loss": 3.861, "step": 1065000 }, { "epoch": 11.83, "learning_rate": 9.541789751904581e-08, "loss": 3.8263, "step": 1065500 }, { "epoch": 11.84, "learning_rate": 9.540401572529596e-08, "loss": 3.8297, "step": 1066000 }, { "epoch": 11.84, "learning_rate": 9.539013393154608e-08, "loss": 3.8488, "step": 1066500 }, { "epoch": 11.85, "learning_rate": 9.537625213779623e-08, "loss": 3.8334, "step": 1067000 }, { "epoch": 11.86, "learning_rate": 9.536237034404637e-08, "loss": 3.8351, "step": 1067500 }, { "epoch": 11.86, "learning_rate": 9.534848855029651e-08, "loss": 3.8691, "step": 1068000 }, { "epoch": 11.87, "learning_rate": 9.533460675654665e-08, "loss": 3.8423, "step": 1068500 }, { "epoch": 11.87, "learning_rate": 9.532072496279678e-08, "loss": 3.8619, "step": 1069000 }, { "epoch": 11.88, "learning_rate": 9.530684316904693e-08, "loss": 3.8632, "step": 1069500 }, { "epoch": 11.88, "learning_rate": 9.529296137529707e-08, "loss": 3.8607, "step": 1070000 }, { "epoch": 11.89, "learning_rate": 9.527907958154721e-08, "loss": 3.8384, "step": 1070500 }, { "epoch": 11.89, "learning_rate": 9.526519778779735e-08, "loss": 3.8599, "step": 1071000 }, { "epoch": 11.9, "learning_rate": 9.525131599404748e-08, "loss": 3.838, "step": 1071500 }, { "epoch": 11.91, "learning_rate": 9.523743420029762e-08, "loss": 3.8405, "step": 1072000 }, { "epoch": 11.91, "learning_rate": 9.522355240654775e-08, "loss": 3.8304, "step": 1072500 }, { "epoch": 11.92, "learning_rate": 9.52096706127979e-08, "loss": 3.8313, "step": 1073000 }, { "epoch": 11.92, "learning_rate": 9.519578881904804e-08, "loss": 3.8397, "step": 1073500 }, { "epoch": 11.93, "learning_rate": 9.518190702529818e-08, "loss": 3.8488, "step": 1074000 }, { "epoch": 11.93, "learning_rate": 9.516802523154831e-08, "loss": 3.8463, "step": 1074500 }, { "epoch": 11.94, "learning_rate": 9.515414343779845e-08, "loss": 3.8501, "step": 1075000 }, { "epoch": 11.94, "learning_rate": 9.514026164404859e-08, "loss": 3.8566, "step": 1075500 }, { "epoch": 11.95, "learning_rate": 9.512637985029874e-08, "loss": 3.8396, "step": 1076000 }, { "epoch": 11.96, "learning_rate": 9.511249805654888e-08, "loss": 3.8417, "step": 1076500 }, { "epoch": 11.96, "learning_rate": 9.509861626279902e-08, "loss": 3.8362, "step": 1077000 }, { "epoch": 11.97, "learning_rate": 9.508473446904915e-08, "loss": 3.8552, "step": 1077500 }, { "epoch": 11.97, "learning_rate": 9.507085267529929e-08, "loss": 3.8504, "step": 1078000 }, { "epoch": 11.98, "learning_rate": 9.505697088154942e-08, "loss": 3.8471, "step": 1078500 }, { "epoch": 11.98, "learning_rate": 9.504308908779956e-08, "loss": 3.8452, "step": 1079000 }, { "epoch": 11.99, "learning_rate": 9.50292072940497e-08, "loss": 3.8309, "step": 1079500 }, { "epoch": 11.99, "learning_rate": 9.501532550029985e-08, "loss": 3.8516, "step": 1080000 }, { "epoch": 12.0, "learning_rate": 9.500144370654998e-08, "loss": 3.83, "step": 1080500 }, { "epoch": 12.0, "eval_loss": 3.888516426086426, "eval_runtime": 6.3054, "eval_samples_per_second": 246.455, "step": 1080552 }, { "epoch": 12.0, "learning_rate": 9.498756191280012e-08, "loss": 3.8563, "step": 1081000 }, { "epoch": 12.01, "learning_rate": 9.497368011905026e-08, "loss": 3.8492, "step": 1081500 }, { "epoch": 12.02, "learning_rate": 9.49597983253004e-08, "loss": 3.851, "step": 1082000 }, { "epoch": 12.02, "learning_rate": 9.494591653155054e-08, "loss": 3.8411, "step": 1082500 }, { "epoch": 12.03, "learning_rate": 9.493203473780067e-08, "loss": 3.8741, "step": 1083000 }, { "epoch": 12.03, "learning_rate": 9.491815294405082e-08, "loss": 3.8397, "step": 1083500 }, { "epoch": 12.04, "learning_rate": 9.490427115030096e-08, "loss": 3.8422, "step": 1084000 }, { "epoch": 12.04, "learning_rate": 9.489038935655109e-08, "loss": 3.8456, "step": 1084500 }, { "epoch": 12.05, "learning_rate": 9.487650756280123e-08, "loss": 3.8559, "step": 1085000 }, { "epoch": 12.05, "learning_rate": 9.486262576905137e-08, "loss": 3.8318, "step": 1085500 }, { "epoch": 12.06, "learning_rate": 9.48487439753015e-08, "loss": 3.8484, "step": 1086000 }, { "epoch": 12.07, "learning_rate": 9.483486218155164e-08, "loss": 3.8378, "step": 1086500 }, { "epoch": 12.07, "learning_rate": 9.482098038780179e-08, "loss": 3.8473, "step": 1087000 }, { "epoch": 12.08, "learning_rate": 9.480709859405193e-08, "loss": 3.8481, "step": 1087500 }, { "epoch": 12.08, "learning_rate": 9.479321680030207e-08, "loss": 3.8382, "step": 1088000 }, { "epoch": 12.09, "learning_rate": 9.47793350065522e-08, "loss": 3.8414, "step": 1088500 }, { "epoch": 12.09, "learning_rate": 9.476545321280234e-08, "loss": 3.861, "step": 1089000 }, { "epoch": 12.1, "learning_rate": 9.475157141905248e-08, "loss": 3.8713, "step": 1089500 }, { "epoch": 12.1, "learning_rate": 9.473768962530263e-08, "loss": 3.8539, "step": 1090000 }, { "epoch": 12.11, "learning_rate": 9.472380783155275e-08, "loss": 3.8533, "step": 1090500 }, { "epoch": 12.12, "learning_rate": 9.47099260378029e-08, "loss": 3.8408, "step": 1091000 }, { "epoch": 12.12, "learning_rate": 9.469604424405303e-08, "loss": 3.8527, "step": 1091500 }, { "epoch": 12.13, "learning_rate": 9.468216245030317e-08, "loss": 3.8252, "step": 1092000 }, { "epoch": 12.13, "learning_rate": 9.466828065655331e-08, "loss": 3.8371, "step": 1092500 }, { "epoch": 12.14, "learning_rate": 9.465439886280345e-08, "loss": 3.8442, "step": 1093000 }, { "epoch": 12.14, "learning_rate": 9.46405170690536e-08, "loss": 3.8413, "step": 1093500 }, { "epoch": 12.15, "learning_rate": 9.462663527530374e-08, "loss": 3.8494, "step": 1094000 }, { "epoch": 12.15, "learning_rate": 9.461275348155387e-08, "loss": 3.8702, "step": 1094500 }, { "epoch": 12.16, "learning_rate": 9.459887168780401e-08, "loss": 3.8266, "step": 1095000 }, { "epoch": 12.17, "learning_rate": 9.458498989405415e-08, "loss": 3.8341, "step": 1095500 }, { "epoch": 12.17, "learning_rate": 9.457110810030428e-08, "loss": 3.8437, "step": 1096000 }, { "epoch": 12.18, "learning_rate": 9.455722630655442e-08, "loss": 3.8279, "step": 1096500 }, { "epoch": 12.18, "learning_rate": 9.454334451280455e-08, "loss": 3.8534, "step": 1097000 }, { "epoch": 12.19, "learning_rate": 9.45294627190547e-08, "loss": 3.8467, "step": 1097500 }, { "epoch": 12.19, "learning_rate": 9.451558092530484e-08, "loss": 3.8438, "step": 1098000 }, { "epoch": 12.2, "learning_rate": 9.450169913155498e-08, "loss": 3.818, "step": 1098500 }, { "epoch": 12.2, "learning_rate": 9.448781733780512e-08, "loss": 3.8491, "step": 1099000 }, { "epoch": 12.21, "learning_rate": 9.447393554405526e-08, "loss": 3.8233, "step": 1099500 }, { "epoch": 12.22, "learning_rate": 9.446005375030539e-08, "loss": 3.8397, "step": 1100000 }, { "epoch": 12.22, "learning_rate": 9.444617195655553e-08, "loss": 3.8621, "step": 1100500 }, { "epoch": 12.23, "learning_rate": 9.443229016280568e-08, "loss": 3.852, "step": 1101000 }, { "epoch": 12.23, "learning_rate": 9.441840836905582e-08, "loss": 3.8483, "step": 1101500 }, { "epoch": 12.24, "learning_rate": 9.440452657530595e-08, "loss": 3.8358, "step": 1102000 }, { "epoch": 12.24, "learning_rate": 9.439064478155609e-08, "loss": 3.8365, "step": 1102500 }, { "epoch": 12.25, "learning_rate": 9.437676298780622e-08, "loss": 3.8265, "step": 1103000 }, { "epoch": 12.25, "learning_rate": 9.436288119405636e-08, "loss": 3.8515, "step": 1103500 }, { "epoch": 12.26, "learning_rate": 9.43489994003065e-08, "loss": 3.8318, "step": 1104000 }, { "epoch": 12.27, "learning_rate": 9.433511760655665e-08, "loss": 3.8362, "step": 1104500 }, { "epoch": 12.27, "learning_rate": 9.432123581280679e-08, "loss": 3.8564, "step": 1105000 }, { "epoch": 12.28, "learning_rate": 9.430735401905692e-08, "loss": 3.8415, "step": 1105500 }, { "epoch": 12.28, "learning_rate": 9.429347222530706e-08, "loss": 3.8442, "step": 1106000 }, { "epoch": 12.29, "learning_rate": 9.42795904315572e-08, "loss": 3.8349, "step": 1106500 }, { "epoch": 12.29, "learning_rate": 9.426570863780734e-08, "loss": 3.8474, "step": 1107000 }, { "epoch": 12.3, "learning_rate": 9.425182684405749e-08, "loss": 3.8379, "step": 1107500 }, { "epoch": 12.3, "learning_rate": 9.423794505030761e-08, "loss": 3.8386, "step": 1108000 }, { "epoch": 12.31, "learning_rate": 9.422406325655776e-08, "loss": 3.8351, "step": 1108500 }, { "epoch": 12.32, "learning_rate": 9.421018146280789e-08, "loss": 3.8518, "step": 1109000 }, { "epoch": 12.32, "learning_rate": 9.419629966905803e-08, "loss": 3.8268, "step": 1109500 }, { "epoch": 12.33, "learning_rate": 9.418241787530817e-08, "loss": 3.8354, "step": 1110000 }, { "epoch": 12.33, "learning_rate": 9.416853608155831e-08, "loss": 3.8409, "step": 1110500 }, { "epoch": 12.34, "learning_rate": 9.415465428780844e-08, "loss": 3.8453, "step": 1111000 }, { "epoch": 12.34, "learning_rate": 9.414077249405858e-08, "loss": 3.8582, "step": 1111500 }, { "epoch": 12.35, "learning_rate": 9.412689070030873e-08, "loss": 3.8493, "step": 1112000 }, { "epoch": 12.35, "learning_rate": 9.411300890655887e-08, "loss": 3.8446, "step": 1112500 }, { "epoch": 12.36, "learning_rate": 9.409912711280901e-08, "loss": 3.8368, "step": 1113000 }, { "epoch": 12.37, "learning_rate": 9.408524531905915e-08, "loss": 3.8305, "step": 1113500 }, { "epoch": 12.37, "learning_rate": 9.407136352530928e-08, "loss": 3.8499, "step": 1114000 }, { "epoch": 12.38, "learning_rate": 9.405748173155942e-08, "loss": 3.86, "step": 1114500 }, { "epoch": 12.38, "learning_rate": 9.404359993780955e-08, "loss": 3.8443, "step": 1115000 }, { "epoch": 12.39, "learning_rate": 9.40297181440597e-08, "loss": 3.8679, "step": 1115500 }, { "epoch": 12.39, "learning_rate": 9.401583635030984e-08, "loss": 3.8378, "step": 1116000 }, { "epoch": 12.4, "learning_rate": 9.400195455655998e-08, "loss": 3.8292, "step": 1116500 }, { "epoch": 12.4, "learning_rate": 9.398807276281011e-08, "loss": 3.8584, "step": 1117000 }, { "epoch": 12.41, "learning_rate": 9.397419096906025e-08, "loss": 3.866, "step": 1117500 }, { "epoch": 12.42, "learning_rate": 9.39603091753104e-08, "loss": 3.8483, "step": 1118000 }, { "epoch": 12.42, "learning_rate": 9.394642738156054e-08, "loss": 3.8627, "step": 1118500 }, { "epoch": 12.43, "learning_rate": 9.393254558781068e-08, "loss": 3.8488, "step": 1119000 }, { "epoch": 12.43, "learning_rate": 9.391866379406081e-08, "loss": 3.8252, "step": 1119500 }, { "epoch": 12.44, "learning_rate": 9.390478200031095e-08, "loss": 3.8566, "step": 1120000 }, { "epoch": 12.44, "learning_rate": 9.389090020656109e-08, "loss": 3.8523, "step": 1120500 }, { "epoch": 12.45, "learning_rate": 9.387701841281122e-08, "loss": 3.8203, "step": 1121000 }, { "epoch": 12.45, "learning_rate": 9.386313661906136e-08, "loss": 3.8511, "step": 1121500 }, { "epoch": 12.46, "learning_rate": 9.38492548253115e-08, "loss": 3.8347, "step": 1122000 }, { "epoch": 12.47, "learning_rate": 9.383537303156163e-08, "loss": 3.8507, "step": 1122500 }, { "epoch": 12.47, "learning_rate": 9.382149123781178e-08, "loss": 3.8291, "step": 1123000 }, { "epoch": 12.48, "learning_rate": 9.380760944406192e-08, "loss": 3.8277, "step": 1123500 }, { "epoch": 12.48, "learning_rate": 9.379372765031206e-08, "loss": 3.8396, "step": 1124000 }, { "epoch": 12.49, "learning_rate": 9.37798458565622e-08, "loss": 3.8276, "step": 1124500 }, { "epoch": 12.49, "learning_rate": 9.376596406281235e-08, "loss": 3.8473, "step": 1125000 }, { "epoch": 12.5, "learning_rate": 9.375208226906248e-08, "loss": 3.8325, "step": 1125500 }, { "epoch": 12.5, "learning_rate": 9.373820047531262e-08, "loss": 3.8774, "step": 1126000 }, { "epoch": 12.51, "learning_rate": 9.372431868156275e-08, "loss": 3.8307, "step": 1126500 }, { "epoch": 12.52, "learning_rate": 9.371043688781289e-08, "loss": 3.8285, "step": 1127000 }, { "epoch": 12.52, "learning_rate": 9.369655509406303e-08, "loss": 3.8353, "step": 1127500 }, { "epoch": 12.53, "learning_rate": 9.368267330031316e-08, "loss": 3.8449, "step": 1128000 }, { "epoch": 12.53, "learning_rate": 9.36687915065633e-08, "loss": 3.8381, "step": 1128500 }, { "epoch": 12.54, "learning_rate": 9.365490971281344e-08, "loss": 3.8458, "step": 1129000 }, { "epoch": 12.54, "learning_rate": 9.364102791906359e-08, "loss": 3.8363, "step": 1129500 }, { "epoch": 12.55, "learning_rate": 9.362714612531373e-08, "loss": 3.8648, "step": 1130000 }, { "epoch": 12.55, "learning_rate": 9.361326433156387e-08, "loss": 3.8383, "step": 1130500 }, { "epoch": 12.56, "learning_rate": 9.3599382537814e-08, "loss": 3.8339, "step": 1131000 }, { "epoch": 12.57, "learning_rate": 9.358550074406414e-08, "loss": 3.8529, "step": 1131500 }, { "epoch": 12.57, "learning_rate": 9.357161895031428e-08, "loss": 3.8507, "step": 1132000 }, { "epoch": 12.58, "learning_rate": 9.355773715656441e-08, "loss": 3.8485, "step": 1132500 }, { "epoch": 12.58, "learning_rate": 9.354385536281456e-08, "loss": 3.8221, "step": 1133000 }, { "epoch": 12.59, "learning_rate": 9.352997356906469e-08, "loss": 3.8151, "step": 1133500 }, { "epoch": 12.59, "learning_rate": 9.351609177531483e-08, "loss": 3.8608, "step": 1134000 }, { "epoch": 12.6, "learning_rate": 9.350220998156497e-08, "loss": 3.83, "step": 1134500 }, { "epoch": 12.6, "learning_rate": 9.348832818781511e-08, "loss": 3.839, "step": 1135000 }, { "epoch": 12.61, "learning_rate": 9.347444639406525e-08, "loss": 3.8346, "step": 1135500 }, { "epoch": 12.62, "learning_rate": 9.34605646003154e-08, "loss": 3.8454, "step": 1136000 }, { "epoch": 12.62, "learning_rate": 9.344668280656553e-08, "loss": 3.8569, "step": 1136500 }, { "epoch": 12.63, "learning_rate": 9.343280101281567e-08, "loss": 3.83, "step": 1137000 }, { "epoch": 12.63, "learning_rate": 9.341891921906581e-08, "loss": 3.8611, "step": 1137500 }, { "epoch": 12.64, "learning_rate": 9.340503742531595e-08, "loss": 3.855, "step": 1138000 }, { "epoch": 12.64, "learning_rate": 9.339115563156608e-08, "loss": 3.8463, "step": 1138500 }, { "epoch": 12.65, "learning_rate": 9.337727383781622e-08, "loss": 3.8259, "step": 1139000 }, { "epoch": 12.65, "learning_rate": 9.336339204406635e-08, "loss": 3.8309, "step": 1139500 }, { "epoch": 12.66, "learning_rate": 9.33495102503165e-08, "loss": 3.8177, "step": 1140000 }, { "epoch": 12.67, "learning_rate": 9.333562845656664e-08, "loss": 3.8511, "step": 1140500 }, { "epoch": 12.67, "learning_rate": 9.332174666281678e-08, "loss": 3.8532, "step": 1141000 }, { "epoch": 12.68, "learning_rate": 9.330786486906692e-08, "loss": 3.8362, "step": 1141500 }, { "epoch": 12.68, "learning_rate": 9.329398307531705e-08, "loss": 3.8447, "step": 1142000 }, { "epoch": 12.69, "learning_rate": 9.328010128156719e-08, "loss": 3.8472, "step": 1142500 }, { "epoch": 12.69, "learning_rate": 9.326621948781734e-08, "loss": 3.8493, "step": 1143000 }, { "epoch": 12.7, "learning_rate": 9.325233769406748e-08, "loss": 3.8513, "step": 1143500 }, { "epoch": 12.7, "learning_rate": 9.323845590031762e-08, "loss": 3.856, "step": 1144000 }, { "epoch": 12.71, "learning_rate": 9.322457410656775e-08, "loss": 3.837, "step": 1144500 }, { "epoch": 12.72, "learning_rate": 9.321069231281789e-08, "loss": 3.8405, "step": 1145000 }, { "epoch": 12.72, "learning_rate": 9.319681051906802e-08, "loss": 3.8275, "step": 1145500 }, { "epoch": 12.73, "learning_rate": 9.318292872531816e-08, "loss": 3.844, "step": 1146000 }, { "epoch": 12.73, "learning_rate": 9.31690469315683e-08, "loss": 3.8407, "step": 1146500 }, { "epoch": 12.74, "learning_rate": 9.315516513781845e-08, "loss": 3.8365, "step": 1147000 }, { "epoch": 12.74, "learning_rate": 9.314128334406858e-08, "loss": 3.8468, "step": 1147500 }, { "epoch": 12.75, "learning_rate": 9.312740155031872e-08, "loss": 3.8471, "step": 1148000 }, { "epoch": 12.75, "learning_rate": 9.311351975656886e-08, "loss": 3.8566, "step": 1148500 }, { "epoch": 12.76, "learning_rate": 9.3099637962819e-08, "loss": 3.8344, "step": 1149000 }, { "epoch": 12.77, "learning_rate": 9.308575616906914e-08, "loss": 3.8387, "step": 1149500 }, { "epoch": 12.77, "learning_rate": 9.307187437531929e-08, "loss": 3.8406, "step": 1150000 }, { "epoch": 12.78, "learning_rate": 9.305799258156942e-08, "loss": 3.8326, "step": 1150500 }, { "epoch": 12.78, "learning_rate": 9.304411078781956e-08, "loss": 3.8347, "step": 1151000 }, { "epoch": 12.79, "learning_rate": 9.303022899406969e-08, "loss": 3.8513, "step": 1151500 }, { "epoch": 12.79, "learning_rate": 9.301634720031983e-08, "loss": 3.8445, "step": 1152000 }, { "epoch": 12.8, "learning_rate": 9.300246540656997e-08, "loss": 3.833, "step": 1152500 }, { "epoch": 12.8, "learning_rate": 9.298858361282011e-08, "loss": 3.8304, "step": 1153000 }, { "epoch": 12.81, "learning_rate": 9.297470181907024e-08, "loss": 3.8581, "step": 1153500 }, { "epoch": 12.82, "learning_rate": 9.296082002532039e-08, "loss": 3.8312, "step": 1154000 }, { "epoch": 12.82, "learning_rate": 9.294693823157053e-08, "loss": 3.8399, "step": 1154500 }, { "epoch": 12.83, "learning_rate": 9.293305643782067e-08, "loss": 3.8477, "step": 1155000 }, { "epoch": 12.83, "learning_rate": 9.291917464407081e-08, "loss": 3.8314, "step": 1155500 }, { "epoch": 12.84, "learning_rate": 9.290529285032094e-08, "loss": 3.8202, "step": 1156000 }, { "epoch": 12.84, "learning_rate": 9.289141105657108e-08, "loss": 3.8501, "step": 1156500 }, { "epoch": 12.85, "learning_rate": 9.287752926282121e-08, "loss": 3.8339, "step": 1157000 }, { "epoch": 12.85, "learning_rate": 9.286364746907135e-08, "loss": 3.8303, "step": 1157500 }, { "epoch": 12.86, "learning_rate": 9.28497656753215e-08, "loss": 3.836, "step": 1158000 }, { "epoch": 12.87, "learning_rate": 9.283588388157164e-08, "loss": 3.8331, "step": 1158500 }, { "epoch": 12.87, "learning_rate": 9.282200208782177e-08, "loss": 3.8067, "step": 1159000 }, { "epoch": 12.88, "learning_rate": 9.280812029407191e-08, "loss": 3.8322, "step": 1159500 }, { "epoch": 12.88, "learning_rate": 9.279423850032205e-08, "loss": 3.8508, "step": 1160000 }, { "epoch": 12.89, "learning_rate": 9.27803567065722e-08, "loss": 3.8496, "step": 1160500 }, { "epoch": 12.89, "learning_rate": 9.276647491282234e-08, "loss": 3.8249, "step": 1161000 }, { "epoch": 12.9, "learning_rate": 9.275259311907248e-08, "loss": 3.8375, "step": 1161500 }, { "epoch": 12.9, "learning_rate": 9.273871132532261e-08, "loss": 3.8365, "step": 1162000 }, { "epoch": 12.91, "learning_rate": 9.272482953157275e-08, "loss": 3.8506, "step": 1162500 }, { "epoch": 12.92, "learning_rate": 9.271094773782288e-08, "loss": 3.8463, "step": 1163000 }, { "epoch": 12.92, "learning_rate": 9.269706594407302e-08, "loss": 3.8315, "step": 1163500 }, { "epoch": 12.93, "learning_rate": 9.268318415032316e-08, "loss": 3.8338, "step": 1164000 }, { "epoch": 12.93, "learning_rate": 9.26693023565733e-08, "loss": 3.8307, "step": 1164500 }, { "epoch": 12.94, "learning_rate": 9.265542056282344e-08, "loss": 3.8092, "step": 1165000 }, { "epoch": 12.94, "learning_rate": 9.264153876907358e-08, "loss": 3.8233, "step": 1165500 }, { "epoch": 12.95, "learning_rate": 9.262765697532372e-08, "loss": 3.851, "step": 1166000 }, { "epoch": 12.95, "learning_rate": 9.261377518157386e-08, "loss": 3.8444, "step": 1166500 }, { "epoch": 12.96, "learning_rate": 9.2599893387824e-08, "loss": 3.8309, "step": 1167000 }, { "epoch": 12.97, "learning_rate": 9.258601159407413e-08, "loss": 3.8357, "step": 1167500 }, { "epoch": 12.97, "learning_rate": 9.257212980032428e-08, "loss": 3.8593, "step": 1168000 }, { "epoch": 12.98, "learning_rate": 9.255824800657442e-08, "loss": 3.8359, "step": 1168500 }, { "epoch": 12.98, "learning_rate": 9.254436621282455e-08, "loss": 3.8585, "step": 1169000 }, { "epoch": 12.99, "learning_rate": 9.253048441907469e-08, "loss": 3.8193, "step": 1169500 }, { "epoch": 12.99, "learning_rate": 9.251660262532482e-08, "loss": 3.8318, "step": 1170000 }, { "epoch": 13.0, "learning_rate": 9.250272083157496e-08, "loss": 3.8407, "step": 1170500 }, { "epoch": 13.0, "eval_loss": 3.8813984394073486, "eval_runtime": 6.3111, "eval_samples_per_second": 246.232, "step": 1170598 }, { "epoch": 13.0, "learning_rate": 9.24888390378251e-08, "loss": 3.8446, "step": 1171000 }, { "epoch": 13.01, "learning_rate": 9.247495724407525e-08, "loss": 3.8684, "step": 1171500 }, { "epoch": 13.02, "learning_rate": 9.246107545032539e-08, "loss": 3.844, "step": 1172000 }, { "epoch": 13.02, "learning_rate": 9.244719365657553e-08, "loss": 3.82, "step": 1172500 }, { "epoch": 13.03, "learning_rate": 9.243331186282566e-08, "loss": 3.8427, "step": 1173000 }, { "epoch": 13.03, "learning_rate": 9.24194300690758e-08, "loss": 3.8248, "step": 1173500 }, { "epoch": 13.04, "learning_rate": 9.240554827532594e-08, "loss": 3.8373, "step": 1174000 }, { "epoch": 13.04, "learning_rate": 9.239166648157609e-08, "loss": 3.8277, "step": 1174500 }, { "epoch": 13.05, "learning_rate": 9.237778468782622e-08, "loss": 3.8483, "step": 1175000 }, { "epoch": 13.05, "learning_rate": 9.236390289407636e-08, "loss": 3.8482, "step": 1175500 }, { "epoch": 13.06, "learning_rate": 9.235002110032649e-08, "loss": 3.843, "step": 1176000 }, { "epoch": 13.07, "learning_rate": 9.233613930657663e-08, "loss": 3.8175, "step": 1176500 }, { "epoch": 13.07, "learning_rate": 9.232225751282677e-08, "loss": 3.8232, "step": 1177000 }, { "epoch": 13.08, "learning_rate": 9.230837571907691e-08, "loss": 3.8571, "step": 1177500 }, { "epoch": 13.08, "learning_rate": 9.229449392532706e-08, "loss": 3.8311, "step": 1178000 }, { "epoch": 13.09, "learning_rate": 9.228061213157718e-08, "loss": 3.8371, "step": 1178500 }, { "epoch": 13.09, "learning_rate": 9.226673033782733e-08, "loss": 3.8236, "step": 1179000 }, { "epoch": 13.1, "learning_rate": 9.225284854407747e-08, "loss": 3.838, "step": 1179500 }, { "epoch": 13.1, "learning_rate": 9.223896675032761e-08, "loss": 3.8577, "step": 1180000 }, { "epoch": 13.11, "learning_rate": 9.222508495657775e-08, "loss": 3.825, "step": 1180500 }, { "epoch": 13.12, "learning_rate": 9.221120316282788e-08, "loss": 3.8368, "step": 1181000 }, { "epoch": 13.12, "learning_rate": 9.219732136907802e-08, "loss": 3.8097, "step": 1181500 }, { "epoch": 13.13, "learning_rate": 9.218343957532815e-08, "loss": 3.8258, "step": 1182000 }, { "epoch": 13.13, "learning_rate": 9.21695577815783e-08, "loss": 3.8237, "step": 1182500 }, { "epoch": 13.14, "learning_rate": 9.215567598782844e-08, "loss": 3.8195, "step": 1183000 }, { "epoch": 13.14, "learning_rate": 9.214179419407858e-08, "loss": 3.8304, "step": 1183500 }, { "epoch": 13.15, "learning_rate": 9.212791240032872e-08, "loss": 3.8469, "step": 1184000 }, { "epoch": 13.15, "learning_rate": 9.211403060657885e-08, "loss": 3.8369, "step": 1184500 }, { "epoch": 13.16, "learning_rate": 9.2100148812829e-08, "loss": 3.8422, "step": 1185000 }, { "epoch": 13.17, "learning_rate": 9.208626701907914e-08, "loss": 3.8232, "step": 1185500 }, { "epoch": 13.17, "learning_rate": 9.207238522532928e-08, "loss": 3.8285, "step": 1186000 }, { "epoch": 13.18, "learning_rate": 9.205850343157942e-08, "loss": 3.8418, "step": 1186500 }, { "epoch": 13.18, "learning_rate": 9.204462163782955e-08, "loss": 3.8435, "step": 1187000 }, { "epoch": 13.19, "learning_rate": 9.203073984407968e-08, "loss": 3.8342, "step": 1187500 }, { "epoch": 13.19, "learning_rate": 9.201685805032982e-08, "loss": 3.8415, "step": 1188000 }, { "epoch": 13.2, "learning_rate": 9.200297625657996e-08, "loss": 3.8621, "step": 1188500 }, { "epoch": 13.2, "learning_rate": 9.19890944628301e-08, "loss": 3.8267, "step": 1189000 }, { "epoch": 13.21, "learning_rate": 9.197521266908025e-08, "loss": 3.8353, "step": 1189500 }, { "epoch": 13.22, "learning_rate": 9.196133087533038e-08, "loss": 3.8356, "step": 1190000 }, { "epoch": 13.22, "learning_rate": 9.194744908158052e-08, "loss": 3.8301, "step": 1190500 }, { "epoch": 13.23, "learning_rate": 9.193356728783066e-08, "loss": 3.8331, "step": 1191000 }, { "epoch": 13.23, "learning_rate": 9.19196854940808e-08, "loss": 3.8421, "step": 1191500 }, { "epoch": 13.24, "learning_rate": 9.190580370033095e-08, "loss": 3.8236, "step": 1192000 }, { "epoch": 13.24, "learning_rate": 9.189192190658108e-08, "loss": 3.8194, "step": 1192500 }, { "epoch": 13.25, "learning_rate": 9.187804011283122e-08, "loss": 3.8467, "step": 1193000 }, { "epoch": 13.25, "learning_rate": 9.186415831908135e-08, "loss": 3.8361, "step": 1193500 }, { "epoch": 13.26, "learning_rate": 9.185027652533149e-08, "loss": 3.8425, "step": 1194000 }, { "epoch": 13.27, "learning_rate": 9.183639473158163e-08, "loss": 3.8211, "step": 1194500 }, { "epoch": 13.27, "learning_rate": 9.182251293783177e-08, "loss": 3.8441, "step": 1195000 }, { "epoch": 13.28, "learning_rate": 9.18086311440819e-08, "loss": 3.8334, "step": 1195500 }, { "epoch": 13.28, "learning_rate": 9.179474935033204e-08, "loss": 3.8259, "step": 1196000 }, { "epoch": 13.29, "learning_rate": 9.178086755658219e-08, "loss": 3.8333, "step": 1196500 }, { "epoch": 13.29, "learning_rate": 9.176698576283233e-08, "loss": 3.824, "step": 1197000 }, { "epoch": 13.3, "learning_rate": 9.175310396908247e-08, "loss": 3.8325, "step": 1197500 }, { "epoch": 13.3, "learning_rate": 9.173922217533261e-08, "loss": 3.8378, "step": 1198000 }, { "epoch": 13.31, "learning_rate": 9.172534038158274e-08, "loss": 3.814, "step": 1198500 }, { "epoch": 13.32, "learning_rate": 9.171145858783288e-08, "loss": 3.8298, "step": 1199000 }, { "epoch": 13.32, "learning_rate": 9.169757679408301e-08, "loss": 3.8539, "step": 1199500 }, { "epoch": 13.33, "learning_rate": 9.168369500033316e-08, "loss": 3.8354, "step": 1200000 }, { "epoch": 13.33, "learning_rate": 9.16698132065833e-08, "loss": 3.8356, "step": 1200500 }, { "epoch": 13.34, "learning_rate": 9.165593141283343e-08, "loss": 3.8324, "step": 1201000 }, { "epoch": 13.34, "learning_rate": 9.164204961908357e-08, "loss": 3.8284, "step": 1201500 }, { "epoch": 13.35, "learning_rate": 9.162816782533371e-08, "loss": 3.8494, "step": 1202000 }, { "epoch": 13.35, "learning_rate": 9.161428603158385e-08, "loss": 3.8236, "step": 1202500 }, { "epoch": 13.36, "learning_rate": 9.1600404237834e-08, "loss": 3.8249, "step": 1203000 }, { "epoch": 13.37, "learning_rate": 9.158652244408414e-08, "loss": 3.8159, "step": 1203500 }, { "epoch": 13.37, "learning_rate": 9.157264065033427e-08, "loss": 3.8306, "step": 1204000 }, { "epoch": 13.38, "learning_rate": 9.155875885658441e-08, "loss": 3.848, "step": 1204500 }, { "epoch": 13.38, "learning_rate": 9.154487706283455e-08, "loss": 3.8348, "step": 1205000 }, { "epoch": 13.39, "learning_rate": 9.153099526908468e-08, "loss": 3.8314, "step": 1205500 }, { "epoch": 13.39, "learning_rate": 9.151711347533482e-08, "loss": 3.8299, "step": 1206000 }, { "epoch": 13.4, "learning_rate": 9.150323168158495e-08, "loss": 3.8377, "step": 1206500 }, { "epoch": 13.4, "learning_rate": 9.14893498878351e-08, "loss": 3.8287, "step": 1207000 }, { "epoch": 13.41, "learning_rate": 9.147546809408524e-08, "loss": 3.8361, "step": 1207500 }, { "epoch": 13.42, "learning_rate": 9.146158630033538e-08, "loss": 3.8096, "step": 1208000 }, { "epoch": 13.42, "learning_rate": 9.144770450658552e-08, "loss": 3.8503, "step": 1208500 }, { "epoch": 13.43, "learning_rate": 9.143382271283566e-08, "loss": 3.8176, "step": 1209000 }, { "epoch": 13.43, "learning_rate": 9.141994091908579e-08, "loss": 3.8606, "step": 1209500 }, { "epoch": 13.44, "learning_rate": 9.140605912533594e-08, "loss": 3.8349, "step": 1210000 }, { "epoch": 13.44, "learning_rate": 9.139217733158608e-08, "loss": 3.8188, "step": 1210500 }, { "epoch": 13.45, "learning_rate": 9.137829553783622e-08, "loss": 3.8401, "step": 1211000 }, { "epoch": 13.45, "learning_rate": 9.136441374408635e-08, "loss": 3.8354, "step": 1211500 }, { "epoch": 13.46, "learning_rate": 9.135053195033649e-08, "loss": 3.8282, "step": 1212000 }, { "epoch": 13.47, "learning_rate": 9.133665015658662e-08, "loss": 3.8049, "step": 1212500 }, { "epoch": 13.47, "learning_rate": 9.132276836283676e-08, "loss": 3.8396, "step": 1213000 }, { "epoch": 13.48, "learning_rate": 9.13088865690869e-08, "loss": 3.8318, "step": 1213500 }, { "epoch": 13.48, "learning_rate": 9.129500477533705e-08, "loss": 3.8309, "step": 1214000 }, { "epoch": 13.49, "learning_rate": 9.128112298158719e-08, "loss": 3.8524, "step": 1214500 }, { "epoch": 13.49, "learning_rate": 9.126724118783732e-08, "loss": 3.8368, "step": 1215000 }, { "epoch": 13.5, "learning_rate": 9.125335939408746e-08, "loss": 3.8345, "step": 1215500 }, { "epoch": 13.5, "learning_rate": 9.12394776003376e-08, "loss": 3.8618, "step": 1216000 }, { "epoch": 13.51, "learning_rate": 9.122559580658775e-08, "loss": 3.836, "step": 1216500 }, { "epoch": 13.52, "learning_rate": 9.121171401283789e-08, "loss": 3.8349, "step": 1217000 }, { "epoch": 13.52, "learning_rate": 9.119783221908802e-08, "loss": 3.8282, "step": 1217500 }, { "epoch": 13.53, "learning_rate": 9.118395042533815e-08, "loss": 3.8279, "step": 1218000 }, { "epoch": 13.53, "learning_rate": 9.117006863158829e-08, "loss": 3.842, "step": 1218500 }, { "epoch": 13.54, "learning_rate": 9.115618683783843e-08, "loss": 3.8508, "step": 1219000 }, { "epoch": 13.54, "learning_rate": 9.114230504408857e-08, "loss": 3.828, "step": 1219500 }, { "epoch": 13.55, "learning_rate": 9.112842325033871e-08, "loss": 3.8032, "step": 1220000 }, { "epoch": 13.55, "learning_rate": 9.111454145658886e-08, "loss": 3.8283, "step": 1220500 }, { "epoch": 13.56, "learning_rate": 9.110065966283899e-08, "loss": 3.8415, "step": 1221000 }, { "epoch": 13.57, "learning_rate": 9.108677786908913e-08, "loss": 3.8396, "step": 1221500 }, { "epoch": 13.57, "learning_rate": 9.107289607533927e-08, "loss": 3.8329, "step": 1222000 }, { "epoch": 13.58, "learning_rate": 9.105901428158941e-08, "loss": 3.8312, "step": 1222500 }, { "epoch": 13.58, "learning_rate": 9.104513248783955e-08, "loss": 3.8564, "step": 1223000 }, { "epoch": 13.59, "learning_rate": 9.103125069408968e-08, "loss": 3.8346, "step": 1223500 }, { "epoch": 13.59, "learning_rate": 9.101736890033981e-08, "loss": 3.8216, "step": 1224000 }, { "epoch": 13.6, "learning_rate": 9.100348710658996e-08, "loss": 3.8521, "step": 1224500 }, { "epoch": 13.6, "learning_rate": 9.09896053128401e-08, "loss": 3.8444, "step": 1225000 }, { "epoch": 13.61, "learning_rate": 9.097572351909024e-08, "loss": 3.8353, "step": 1225500 }, { "epoch": 13.62, "learning_rate": 9.096184172534038e-08, "loss": 3.8281, "step": 1226000 }, { "epoch": 13.62, "learning_rate": 9.094795993159051e-08, "loss": 3.8366, "step": 1226500 }, { "epoch": 13.63, "learning_rate": 9.093407813784065e-08, "loss": 3.8381, "step": 1227000 }, { "epoch": 13.63, "learning_rate": 9.09201963440908e-08, "loss": 3.8248, "step": 1227500 }, { "epoch": 13.64, "learning_rate": 9.090631455034094e-08, "loss": 3.8382, "step": 1228000 }, { "epoch": 13.64, "learning_rate": 9.089243275659108e-08, "loss": 3.8441, "step": 1228500 }, { "epoch": 13.65, "learning_rate": 9.087855096284121e-08, "loss": 3.8471, "step": 1229000 }, { "epoch": 13.65, "learning_rate": 9.086466916909135e-08, "loss": 3.8133, "step": 1229500 }, { "epoch": 13.66, "learning_rate": 9.085078737534148e-08, "loss": 3.8399, "step": 1230000 }, { "epoch": 13.67, "learning_rate": 9.083690558159162e-08, "loss": 3.8245, "step": 1230500 }, { "epoch": 13.67, "learning_rate": 9.082302378784176e-08, "loss": 3.8368, "step": 1231000 }, { "epoch": 13.68, "learning_rate": 9.080914199409191e-08, "loss": 3.8226, "step": 1231500 }, { "epoch": 13.68, "learning_rate": 9.079526020034204e-08, "loss": 3.8517, "step": 1232000 }, { "epoch": 13.69, "learning_rate": 9.078137840659218e-08, "loss": 3.8241, "step": 1232500 }, { "epoch": 13.69, "learning_rate": 9.076749661284232e-08, "loss": 3.8246, "step": 1233000 }, { "epoch": 13.7, "learning_rate": 9.075361481909246e-08, "loss": 3.8316, "step": 1233500 }, { "epoch": 13.7, "learning_rate": 9.07397330253426e-08, "loss": 3.8368, "step": 1234000 }, { "epoch": 13.71, "learning_rate": 9.072585123159275e-08, "loss": 3.8381, "step": 1234500 }, { "epoch": 13.72, "learning_rate": 9.071196943784288e-08, "loss": 3.8406, "step": 1235000 }, { "epoch": 13.72, "learning_rate": 9.069808764409302e-08, "loss": 3.8354, "step": 1235500 }, { "epoch": 13.73, "learning_rate": 9.068420585034315e-08, "loss": 3.8179, "step": 1236000 }, { "epoch": 13.73, "learning_rate": 9.067032405659329e-08, "loss": 3.8327, "step": 1236500 }, { "epoch": 13.74, "learning_rate": 9.065644226284343e-08, "loss": 3.819, "step": 1237000 }, { "epoch": 13.74, "learning_rate": 9.064256046909356e-08, "loss": 3.8291, "step": 1237500 }, { "epoch": 13.75, "learning_rate": 9.06286786753437e-08, "loss": 3.825, "step": 1238000 }, { "epoch": 13.75, "learning_rate": 9.061479688159385e-08, "loss": 3.8373, "step": 1238500 }, { "epoch": 13.76, "learning_rate": 9.060091508784399e-08, "loss": 3.8331, "step": 1239000 }, { "epoch": 13.77, "learning_rate": 9.058703329409413e-08, "loss": 3.8387, "step": 1239500 }, { "epoch": 13.77, "learning_rate": 9.057315150034427e-08, "loss": 3.8382, "step": 1240000 }, { "epoch": 13.78, "learning_rate": 9.05592697065944e-08, "loss": 3.8133, "step": 1240500 }, { "epoch": 13.78, "learning_rate": 9.054538791284454e-08, "loss": 3.8421, "step": 1241000 }, { "epoch": 13.79, "learning_rate": 9.053150611909469e-08, "loss": 3.8277, "step": 1241500 }, { "epoch": 13.79, "learning_rate": 9.051762432534482e-08, "loss": 3.8341, "step": 1242000 }, { "epoch": 13.8, "learning_rate": 9.050374253159496e-08, "loss": 3.8259, "step": 1242500 }, { "epoch": 13.8, "learning_rate": 9.048986073784509e-08, "loss": 3.8276, "step": 1243000 }, { "epoch": 13.81, "learning_rate": 9.047597894409523e-08, "loss": 3.8129, "step": 1243500 }, { "epoch": 13.82, "learning_rate": 9.046209715034537e-08, "loss": 3.8465, "step": 1244000 }, { "epoch": 13.82, "learning_rate": 9.044821535659551e-08, "loss": 3.8372, "step": 1244500 }, { "epoch": 13.83, "learning_rate": 9.043433356284566e-08, "loss": 3.8241, "step": 1245000 }, { "epoch": 13.83, "learning_rate": 9.04204517690958e-08, "loss": 3.8219, "step": 1245500 }, { "epoch": 13.84, "learning_rate": 9.040656997534593e-08, "loss": 3.856, "step": 1246000 }, { "epoch": 13.84, "learning_rate": 9.039268818159607e-08, "loss": 3.84, "step": 1246500 }, { "epoch": 13.85, "learning_rate": 9.037880638784621e-08, "loss": 3.8275, "step": 1247000 }, { "epoch": 13.85, "learning_rate": 9.036492459409635e-08, "loss": 3.8464, "step": 1247500 }, { "epoch": 13.86, "learning_rate": 9.035104280034648e-08, "loss": 3.8119, "step": 1248000 }, { "epoch": 13.87, "learning_rate": 9.033716100659663e-08, "loss": 3.8291, "step": 1248500 }, { "epoch": 13.87, "learning_rate": 9.032327921284675e-08, "loss": 3.8286, "step": 1249000 }, { "epoch": 13.88, "learning_rate": 9.03093974190969e-08, "loss": 3.8381, "step": 1249500 }, { "epoch": 13.88, "learning_rate": 9.029551562534704e-08, "loss": 3.8245, "step": 1250000 }, { "epoch": 13.89, "learning_rate": 9.028163383159718e-08, "loss": 3.8428, "step": 1250500 }, { "epoch": 13.89, "learning_rate": 9.026775203784732e-08, "loss": 3.8363, "step": 1251000 }, { "epoch": 13.9, "learning_rate": 9.025387024409745e-08, "loss": 3.8453, "step": 1251500 }, { "epoch": 13.9, "learning_rate": 9.02399884503476e-08, "loss": 3.8276, "step": 1252000 }, { "epoch": 13.91, "learning_rate": 9.022610665659774e-08, "loss": 3.8127, "step": 1252500 }, { "epoch": 13.92, "learning_rate": 9.021222486284788e-08, "loss": 3.802, "step": 1253000 }, { "epoch": 13.92, "learning_rate": 9.019834306909802e-08, "loss": 3.8374, "step": 1253500 }, { "epoch": 13.93, "learning_rate": 9.018446127534815e-08, "loss": 3.8412, "step": 1254000 }, { "epoch": 13.93, "learning_rate": 9.017057948159828e-08, "loss": 3.8401, "step": 1254500 }, { "epoch": 13.94, "learning_rate": 9.015669768784842e-08, "loss": 3.8404, "step": 1255000 }, { "epoch": 13.94, "learning_rate": 9.014281589409856e-08, "loss": 3.827, "step": 1255500 }, { "epoch": 13.95, "learning_rate": 9.01289341003487e-08, "loss": 3.8031, "step": 1256000 }, { "epoch": 13.95, "learning_rate": 9.011505230659885e-08, "loss": 3.8386, "step": 1256500 }, { "epoch": 13.96, "learning_rate": 9.010117051284899e-08, "loss": 3.8371, "step": 1257000 }, { "epoch": 13.97, "learning_rate": 9.008728871909912e-08, "loss": 3.8458, "step": 1257500 }, { "epoch": 13.97, "learning_rate": 9.007340692534926e-08, "loss": 3.8242, "step": 1258000 }, { "epoch": 13.98, "learning_rate": 9.00595251315994e-08, "loss": 3.827, "step": 1258500 }, { "epoch": 13.98, "learning_rate": 9.004564333784955e-08, "loss": 3.8209, "step": 1259000 }, { "epoch": 13.99, "learning_rate": 9.003176154409969e-08, "loss": 3.8208, "step": 1259500 }, { "epoch": 13.99, "learning_rate": 9.001787975034982e-08, "loss": 3.8424, "step": 1260000 }, { "epoch": 14.0, "learning_rate": 9.000399795659995e-08, "loss": 3.8257, "step": 1260500 }, { "epoch": 14.0, "eval_loss": 3.875882387161255, "eval_runtime": 6.3086, "eval_samples_per_second": 246.331, "step": 1260644 }, { "epoch": 14.0, "learning_rate": 8.999011616285009e-08, "loss": 3.8341, "step": 1261000 }, { "epoch": 14.01, "learning_rate": 8.997623436910023e-08, "loss": 3.829, "step": 1261500 }, { "epoch": 14.02, "learning_rate": 8.996235257535037e-08, "loss": 3.8324, "step": 1262000 }, { "epoch": 14.02, "learning_rate": 8.994847078160052e-08, "loss": 3.8381, "step": 1262500 }, { "epoch": 14.03, "learning_rate": 8.993458898785064e-08, "loss": 3.8326, "step": 1263000 }, { "epoch": 14.03, "learning_rate": 8.992070719410079e-08, "loss": 3.8233, "step": 1263500 }, { "epoch": 14.04, "learning_rate": 8.990682540035093e-08, "loss": 3.8228, "step": 1264000 }, { "epoch": 14.04, "learning_rate": 8.989294360660107e-08, "loss": 3.8312, "step": 1264500 }, { "epoch": 14.05, "learning_rate": 8.987906181285121e-08, "loss": 3.8304, "step": 1265000 }, { "epoch": 14.05, "learning_rate": 8.986518001910134e-08, "loss": 3.832, "step": 1265500 }, { "epoch": 14.06, "learning_rate": 8.985129822535149e-08, "loss": 3.8391, "step": 1266000 }, { "epoch": 14.07, "learning_rate": 8.983741643160161e-08, "loss": 3.8225, "step": 1266500 }, { "epoch": 14.07, "learning_rate": 8.982353463785176e-08, "loss": 3.8233, "step": 1267000 }, { "epoch": 14.08, "learning_rate": 8.98096528441019e-08, "loss": 3.7987, "step": 1267500 }, { "epoch": 14.08, "learning_rate": 8.979577105035204e-08, "loss": 3.8269, "step": 1268000 }, { "epoch": 14.09, "learning_rate": 8.978188925660217e-08, "loss": 3.8375, "step": 1268500 }, { "epoch": 14.09, "learning_rate": 8.976800746285231e-08, "loss": 3.8304, "step": 1269000 }, { "epoch": 14.1, "learning_rate": 8.975412566910245e-08, "loss": 3.826, "step": 1269500 }, { "epoch": 14.1, "learning_rate": 8.97402438753526e-08, "loss": 3.8233, "step": 1270000 }, { "epoch": 14.11, "learning_rate": 8.972636208160274e-08, "loss": 3.8151, "step": 1270500 }, { "epoch": 14.12, "learning_rate": 8.971248028785288e-08, "loss": 3.8232, "step": 1271000 }, { "epoch": 14.12, "learning_rate": 8.969859849410301e-08, "loss": 3.82, "step": 1271500 }, { "epoch": 14.13, "learning_rate": 8.968471670035315e-08, "loss": 3.8402, "step": 1272000 }, { "epoch": 14.13, "learning_rate": 8.967083490660328e-08, "loss": 3.8239, "step": 1272500 }, { "epoch": 14.14, "learning_rate": 8.965695311285342e-08, "loss": 3.8446, "step": 1273000 }, { "epoch": 14.14, "learning_rate": 8.964307131910357e-08, "loss": 3.8217, "step": 1273500 }, { "epoch": 14.15, "learning_rate": 8.96291895253537e-08, "loss": 3.8284, "step": 1274000 }, { "epoch": 14.15, "learning_rate": 8.961530773160384e-08, "loss": 3.8369, "step": 1274500 }, { "epoch": 14.16, "learning_rate": 8.960142593785398e-08, "loss": 3.8373, "step": 1275000 }, { "epoch": 14.16, "learning_rate": 8.958754414410412e-08, "loss": 3.8222, "step": 1275500 }, { "epoch": 14.17, "learning_rate": 8.957366235035426e-08, "loss": 3.8344, "step": 1276000 }, { "epoch": 14.18, "learning_rate": 8.95597805566044e-08, "loss": 3.8185, "step": 1276500 }, { "epoch": 14.18, "learning_rate": 8.954589876285454e-08, "loss": 3.8252, "step": 1277000 }, { "epoch": 14.19, "learning_rate": 8.953201696910468e-08, "loss": 3.8374, "step": 1277500 }, { "epoch": 14.19, "learning_rate": 8.951813517535482e-08, "loss": 3.8445, "step": 1278000 }, { "epoch": 14.2, "learning_rate": 8.950425338160495e-08, "loss": 3.8207, "step": 1278500 }, { "epoch": 14.2, "learning_rate": 8.949037158785509e-08, "loss": 3.8263, "step": 1279000 }, { "epoch": 14.21, "learning_rate": 8.947648979410523e-08, "loss": 3.8335, "step": 1279500 }, { "epoch": 14.21, "learning_rate": 8.946260800035536e-08, "loss": 3.817, "step": 1280000 }, { "epoch": 14.22, "learning_rate": 8.94487262066055e-08, "loss": 3.8187, "step": 1280500 }, { "epoch": 14.23, "learning_rate": 8.943484441285565e-08, "loss": 3.8278, "step": 1281000 }, { "epoch": 14.23, "learning_rate": 8.942096261910579e-08, "loss": 3.8334, "step": 1281500 }, { "epoch": 14.24, "learning_rate": 8.940708082535593e-08, "loss": 3.828, "step": 1282000 }, { "epoch": 14.24, "learning_rate": 8.939319903160606e-08, "loss": 3.8245, "step": 1282500 }, { "epoch": 14.25, "learning_rate": 8.93793172378562e-08, "loss": 3.8187, "step": 1283000 }, { "epoch": 14.25, "learning_rate": 8.936543544410635e-08, "loss": 3.8182, "step": 1283500 }, { "epoch": 14.26, "learning_rate": 8.935155365035649e-08, "loss": 3.8117, "step": 1284000 }, { "epoch": 14.26, "learning_rate": 8.933767185660662e-08, "loss": 3.8316, "step": 1284500 }, { "epoch": 14.27, "learning_rate": 8.932379006285676e-08, "loss": 3.8354, "step": 1285000 }, { "epoch": 14.28, "learning_rate": 8.930990826910689e-08, "loss": 3.8284, "step": 1285500 }, { "epoch": 14.28, "learning_rate": 8.929602647535703e-08, "loss": 3.8349, "step": 1286000 }, { "epoch": 14.29, "learning_rate": 8.928214468160717e-08, "loss": 3.8173, "step": 1286500 }, { "epoch": 14.29, "learning_rate": 8.926826288785731e-08, "loss": 3.8206, "step": 1287000 }, { "epoch": 14.3, "learning_rate": 8.925438109410746e-08, "loss": 3.8299, "step": 1287500 }, { "epoch": 14.3, "learning_rate": 8.924049930035759e-08, "loss": 3.8193, "step": 1288000 }, { "epoch": 14.31, "learning_rate": 8.922661750660773e-08, "loss": 3.8268, "step": 1288500 }, { "epoch": 14.31, "learning_rate": 8.921273571285787e-08, "loss": 3.8106, "step": 1289000 }, { "epoch": 14.32, "learning_rate": 8.919885391910801e-08, "loss": 3.819, "step": 1289500 }, { "epoch": 14.33, "learning_rate": 8.918497212535815e-08, "loss": 3.8268, "step": 1290000 }, { "epoch": 14.33, "learning_rate": 8.917109033160828e-08, "loss": 3.8401, "step": 1290500 }, { "epoch": 14.34, "learning_rate": 8.915720853785841e-08, "loss": 3.8225, "step": 1291000 }, { "epoch": 14.34, "learning_rate": 8.914332674410856e-08, "loss": 3.8301, "step": 1291500 }, { "epoch": 14.35, "learning_rate": 8.91294449503587e-08, "loss": 3.8297, "step": 1292000 }, { "epoch": 14.35, "learning_rate": 8.911556315660884e-08, "loss": 3.8346, "step": 1292500 }, { "epoch": 14.36, "learning_rate": 8.910168136285898e-08, "loss": 3.8534, "step": 1293000 }, { "epoch": 14.36, "learning_rate": 8.908779956910912e-08, "loss": 3.8296, "step": 1293500 }, { "epoch": 14.37, "learning_rate": 8.907391777535925e-08, "loss": 3.8473, "step": 1294000 }, { "epoch": 14.38, "learning_rate": 8.90600359816094e-08, "loss": 3.8173, "step": 1294500 }, { "epoch": 14.38, "learning_rate": 8.904615418785954e-08, "loss": 3.8274, "step": 1295000 }, { "epoch": 14.39, "learning_rate": 8.903227239410968e-08, "loss": 3.8126, "step": 1295500 }, { "epoch": 14.39, "learning_rate": 8.901839060035982e-08, "loss": 3.8168, "step": 1296000 }, { "epoch": 14.4, "learning_rate": 8.900450880660995e-08, "loss": 3.8136, "step": 1296500 }, { "epoch": 14.4, "learning_rate": 8.899062701286008e-08, "loss": 3.8212, "step": 1297000 }, { "epoch": 14.41, "learning_rate": 8.897674521911022e-08, "loss": 3.8245, "step": 1297500 }, { "epoch": 14.41, "learning_rate": 8.896286342536037e-08, "loss": 3.8219, "step": 1298000 }, { "epoch": 14.42, "learning_rate": 8.894898163161051e-08, "loss": 3.8133, "step": 1298500 }, { "epoch": 14.43, "learning_rate": 8.893509983786065e-08, "loss": 3.8344, "step": 1299000 }, { "epoch": 14.43, "learning_rate": 8.892121804411078e-08, "loss": 3.8299, "step": 1299500 }, { "epoch": 14.44, "learning_rate": 8.890733625036092e-08, "loss": 3.8283, "step": 1300000 }, { "epoch": 14.44, "learning_rate": 8.889345445661106e-08, "loss": 3.8263, "step": 1300500 }, { "epoch": 14.45, "learning_rate": 8.88795726628612e-08, "loss": 3.8288, "step": 1301000 }, { "epoch": 14.45, "learning_rate": 8.886569086911135e-08, "loss": 3.8437, "step": 1301500 }, { "epoch": 14.46, "learning_rate": 8.885180907536148e-08, "loss": 3.8196, "step": 1302000 }, { "epoch": 14.46, "learning_rate": 8.883792728161162e-08, "loss": 3.8317, "step": 1302500 }, { "epoch": 14.47, "learning_rate": 8.882404548786175e-08, "loss": 3.8008, "step": 1303000 }, { "epoch": 14.48, "learning_rate": 8.881016369411189e-08, "loss": 3.838, "step": 1303500 }, { "epoch": 14.48, "learning_rate": 8.879628190036203e-08, "loss": 3.8156, "step": 1304000 }, { "epoch": 14.49, "learning_rate": 8.878240010661217e-08, "loss": 3.8096, "step": 1304500 }, { "epoch": 14.49, "learning_rate": 8.87685183128623e-08, "loss": 3.8306, "step": 1305000 }, { "epoch": 14.5, "learning_rate": 8.875463651911245e-08, "loss": 3.8375, "step": 1305500 }, { "epoch": 14.5, "learning_rate": 8.874075472536259e-08, "loss": 3.8216, "step": 1306000 }, { "epoch": 14.51, "learning_rate": 8.872687293161273e-08, "loss": 3.8256, "step": 1306500 }, { "epoch": 14.51, "learning_rate": 8.871299113786287e-08, "loss": 3.8302, "step": 1307000 }, { "epoch": 14.52, "learning_rate": 8.869910934411302e-08, "loss": 3.8363, "step": 1307500 }, { "epoch": 14.53, "learning_rate": 8.868522755036314e-08, "loss": 3.8195, "step": 1308000 }, { "epoch": 14.53, "learning_rate": 8.867134575661329e-08, "loss": 3.8135, "step": 1308500 }, { "epoch": 14.54, "learning_rate": 8.865746396286342e-08, "loss": 3.8315, "step": 1309000 }, { "epoch": 14.54, "learning_rate": 8.864358216911356e-08, "loss": 3.8357, "step": 1309500 }, { "epoch": 14.55, "learning_rate": 8.86297003753637e-08, "loss": 3.8469, "step": 1310000 }, { "epoch": 14.55, "learning_rate": 8.861581858161383e-08, "loss": 3.8455, "step": 1310500 }, { "epoch": 14.56, "learning_rate": 8.860193678786397e-08, "loss": 3.8335, "step": 1311000 }, { "epoch": 14.56, "learning_rate": 8.858805499411411e-08, "loss": 3.8315, "step": 1311500 }, { "epoch": 14.57, "learning_rate": 8.857417320036426e-08, "loss": 3.8066, "step": 1312000 }, { "epoch": 14.58, "learning_rate": 8.85602914066144e-08, "loss": 3.8235, "step": 1312500 }, { "epoch": 14.58, "learning_rate": 8.854640961286454e-08, "loss": 3.7941, "step": 1313000 }, { "epoch": 14.59, "learning_rate": 8.853252781911467e-08, "loss": 3.8239, "step": 1313500 }, { "epoch": 14.59, "learning_rate": 8.851864602536481e-08, "loss": 3.8576, "step": 1314000 }, { "epoch": 14.6, "learning_rate": 8.850476423161495e-08, "loss": 3.8174, "step": 1314500 }, { "epoch": 14.6, "learning_rate": 8.849088243786508e-08, "loss": 3.8036, "step": 1315000 }, { "epoch": 14.61, "learning_rate": 8.847700064411523e-08, "loss": 3.8326, "step": 1315500 }, { "epoch": 14.61, "learning_rate": 8.846311885036537e-08, "loss": 3.8239, "step": 1316000 }, { "epoch": 14.62, "learning_rate": 8.84492370566155e-08, "loss": 3.8235, "step": 1316500 }, { "epoch": 14.63, "learning_rate": 8.843535526286564e-08, "loss": 3.8396, "step": 1317000 }, { "epoch": 14.63, "learning_rate": 8.842147346911578e-08, "loss": 3.8159, "step": 1317500 }, { "epoch": 14.64, "learning_rate": 8.840759167536592e-08, "loss": 3.8475, "step": 1318000 }, { "epoch": 14.64, "learning_rate": 8.839370988161607e-08, "loss": 3.8351, "step": 1318500 }, { "epoch": 14.65, "learning_rate": 8.83798280878662e-08, "loss": 3.8275, "step": 1319000 }, { "epoch": 14.65, "learning_rate": 8.836594629411634e-08, "loss": 3.8283, "step": 1319500 }, { "epoch": 14.66, "learning_rate": 8.835206450036648e-08, "loss": 3.8388, "step": 1320000 }, { "epoch": 14.66, "learning_rate": 8.833818270661662e-08, "loss": 3.8339, "step": 1320500 }, { "epoch": 14.67, "learning_rate": 8.832430091286675e-08, "loss": 3.8296, "step": 1321000 }, { "epoch": 14.68, "learning_rate": 8.831041911911689e-08, "loss": 3.8061, "step": 1321500 }, { "epoch": 14.68, "learning_rate": 8.829653732536702e-08, "loss": 3.8157, "step": 1322000 }, { "epoch": 14.69, "learning_rate": 8.828265553161716e-08, "loss": 3.8295, "step": 1322500 }, { "epoch": 14.69, "learning_rate": 8.82687737378673e-08, "loss": 3.8406, "step": 1323000 }, { "epoch": 14.7, "learning_rate": 8.825489194411745e-08, "loss": 3.8231, "step": 1323500 }, { "epoch": 14.7, "learning_rate": 8.824101015036759e-08, "loss": 3.8234, "step": 1324000 }, { "epoch": 14.71, "learning_rate": 8.822712835661772e-08, "loss": 3.8236, "step": 1324500 }, { "epoch": 14.71, "learning_rate": 8.821324656286786e-08, "loss": 3.8288, "step": 1325000 }, { "epoch": 14.72, "learning_rate": 8.8199364769118e-08, "loss": 3.8349, "step": 1325500 }, { "epoch": 14.73, "learning_rate": 8.818548297536815e-08, "loss": 3.822, "step": 1326000 }, { "epoch": 14.73, "learning_rate": 8.817160118161829e-08, "loss": 3.8215, "step": 1326500 }, { "epoch": 14.74, "learning_rate": 8.815771938786842e-08, "loss": 3.8243, "step": 1327000 }, { "epoch": 14.74, "learning_rate": 8.814383759411855e-08, "loss": 3.8323, "step": 1327500 }, { "epoch": 14.75, "learning_rate": 8.812995580036869e-08, "loss": 3.8057, "step": 1328000 }, { "epoch": 14.75, "learning_rate": 8.811607400661883e-08, "loss": 3.8359, "step": 1328500 }, { "epoch": 14.76, "learning_rate": 8.810219221286897e-08, "loss": 3.8208, "step": 1329000 }, { "epoch": 14.76, "learning_rate": 8.808831041911912e-08, "loss": 3.8197, "step": 1329500 }, { "epoch": 14.77, "learning_rate": 8.807442862536926e-08, "loss": 3.7929, "step": 1330000 }, { "epoch": 14.78, "learning_rate": 8.806054683161939e-08, "loss": 3.8472, "step": 1330500 }, { "epoch": 14.78, "learning_rate": 8.804666503786953e-08, "loss": 3.8148, "step": 1331000 }, { "epoch": 14.79, "learning_rate": 8.803278324411967e-08, "loss": 3.8319, "step": 1331500 }, { "epoch": 14.79, "learning_rate": 8.801890145036981e-08, "loss": 3.8321, "step": 1332000 }, { "epoch": 14.8, "learning_rate": 8.800501965661996e-08, "loss": 3.8233, "step": 1332500 }, { "epoch": 14.8, "learning_rate": 8.799113786287009e-08, "loss": 3.8252, "step": 1333000 }, { "epoch": 14.81, "learning_rate": 8.797725606912021e-08, "loss": 3.8348, "step": 1333500 }, { "epoch": 14.81, "learning_rate": 8.796337427537036e-08, "loss": 3.8349, "step": 1334000 }, { "epoch": 14.82, "learning_rate": 8.79494924816205e-08, "loss": 3.825, "step": 1334500 }, { "epoch": 14.83, "learning_rate": 8.793561068787064e-08, "loss": 3.8209, "step": 1335000 }, { "epoch": 14.83, "learning_rate": 8.792172889412078e-08, "loss": 3.8269, "step": 1335500 }, { "epoch": 14.84, "learning_rate": 8.790784710037091e-08, "loss": 3.8154, "step": 1336000 }, { "epoch": 14.84, "learning_rate": 8.789396530662105e-08, "loss": 3.8206, "step": 1336500 }, { "epoch": 14.85, "learning_rate": 8.78800835128712e-08, "loss": 3.8202, "step": 1337000 }, { "epoch": 14.85, "learning_rate": 8.786620171912134e-08, "loss": 3.8162, "step": 1337500 }, { "epoch": 14.86, "learning_rate": 8.785231992537148e-08, "loss": 3.832, "step": 1338000 }, { "epoch": 14.86, "learning_rate": 8.783843813162161e-08, "loss": 3.8283, "step": 1338500 }, { "epoch": 14.87, "learning_rate": 8.782455633787175e-08, "loss": 3.8234, "step": 1339000 }, { "epoch": 14.88, "learning_rate": 8.781067454412188e-08, "loss": 3.8269, "step": 1339500 }, { "epoch": 14.88, "learning_rate": 8.779679275037202e-08, "loss": 3.8264, "step": 1340000 }, { "epoch": 14.89, "learning_rate": 8.778291095662217e-08, "loss": 3.8198, "step": 1340500 }, { "epoch": 14.89, "learning_rate": 8.776902916287231e-08, "loss": 3.8232, "step": 1341000 }, { "epoch": 14.9, "learning_rate": 8.775514736912244e-08, "loss": 3.8087, "step": 1341500 }, { "epoch": 14.9, "learning_rate": 8.774126557537258e-08, "loss": 3.8175, "step": 1342000 }, { "epoch": 14.91, "learning_rate": 8.772738378162272e-08, "loss": 3.843, "step": 1342500 }, { "epoch": 14.91, "learning_rate": 8.771350198787286e-08, "loss": 3.8228, "step": 1343000 }, { "epoch": 14.92, "learning_rate": 8.769962019412301e-08, "loss": 3.8287, "step": 1343500 }, { "epoch": 14.93, "learning_rate": 8.768573840037315e-08, "loss": 3.825, "step": 1344000 }, { "epoch": 14.93, "learning_rate": 8.767185660662328e-08, "loss": 3.8371, "step": 1344500 }, { "epoch": 14.94, "learning_rate": 8.765797481287342e-08, "loss": 3.8076, "step": 1345000 }, { "epoch": 14.94, "learning_rate": 8.764409301912355e-08, "loss": 3.8097, "step": 1345500 }, { "epoch": 14.95, "learning_rate": 8.763021122537369e-08, "loss": 3.8304, "step": 1346000 }, { "epoch": 14.95, "learning_rate": 8.761632943162383e-08, "loss": 3.8157, "step": 1346500 }, { "epoch": 14.96, "learning_rate": 8.760244763787396e-08, "loss": 3.8159, "step": 1347000 }, { "epoch": 14.96, "learning_rate": 8.75885658441241e-08, "loss": 3.8155, "step": 1347500 }, { "epoch": 14.97, "learning_rate": 8.757468405037425e-08, "loss": 3.818, "step": 1348000 }, { "epoch": 14.98, "learning_rate": 8.756080225662439e-08, "loss": 3.835, "step": 1348500 }, { "epoch": 14.98, "learning_rate": 8.754692046287453e-08, "loss": 3.8305, "step": 1349000 }, { "epoch": 14.99, "learning_rate": 8.753303866912467e-08, "loss": 3.8271, "step": 1349500 }, { "epoch": 14.99, "learning_rate": 8.75191568753748e-08, "loss": 3.8227, "step": 1350000 }, { "epoch": 15.0, "learning_rate": 8.750527508162495e-08, "loss": 3.8244, "step": 1350500 }, { "epoch": 15.0, "eval_loss": 3.8713603019714355, "eval_runtime": 6.3095, "eval_samples_per_second": 246.297, "step": 1350690 }, { "epoch": 15.0, "learning_rate": 8.749139328787509e-08, "loss": 3.8192, "step": 1351000 }, { "epoch": 15.01, "learning_rate": 8.747751149412522e-08, "loss": 3.8112, "step": 1351500 }, { "epoch": 15.01, "learning_rate": 8.746362970037536e-08, "loss": 3.8426, "step": 1352000 }, { "epoch": 15.02, "learning_rate": 8.74497479066255e-08, "loss": 3.8161, "step": 1352500 }, { "epoch": 15.03, "learning_rate": 8.743586611287563e-08, "loss": 3.8126, "step": 1353000 }, { "epoch": 15.03, "learning_rate": 8.742198431912577e-08, "loss": 3.8203, "step": 1353500 }, { "epoch": 15.04, "learning_rate": 8.740810252537591e-08, "loss": 3.8411, "step": 1354000 }, { "epoch": 15.04, "learning_rate": 8.739422073162606e-08, "loss": 3.834, "step": 1354500 }, { "epoch": 15.05, "learning_rate": 8.73803389378762e-08, "loss": 3.8202, "step": 1355000 }, { "epoch": 15.05, "learning_rate": 8.736645714412633e-08, "loss": 3.8192, "step": 1355500 }, { "epoch": 15.06, "learning_rate": 8.735257535037647e-08, "loss": 3.8115, "step": 1356000 }, { "epoch": 15.06, "learning_rate": 8.733869355662661e-08, "loss": 3.8107, "step": 1356500 }, { "epoch": 15.07, "learning_rate": 8.732481176287676e-08, "loss": 3.8264, "step": 1357000 }, { "epoch": 15.08, "learning_rate": 8.731092996912688e-08, "loss": 3.8289, "step": 1357500 }, { "epoch": 15.08, "learning_rate": 8.729704817537703e-08, "loss": 3.8233, "step": 1358000 }, { "epoch": 15.09, "learning_rate": 8.728316638162716e-08, "loss": 3.8138, "step": 1358500 }, { "epoch": 15.09, "learning_rate": 8.72692845878773e-08, "loss": 3.8318, "step": 1359000 }, { "epoch": 15.1, "learning_rate": 8.725540279412744e-08, "loss": 3.8336, "step": 1359500 }, { "epoch": 15.1, "learning_rate": 8.724152100037758e-08, "loss": 3.8161, "step": 1360000 }, { "epoch": 15.11, "learning_rate": 8.722763920662772e-08, "loss": 3.8088, "step": 1360500 }, { "epoch": 15.11, "learning_rate": 8.721375741287785e-08, "loss": 3.8286, "step": 1361000 }, { "epoch": 15.12, "learning_rate": 8.7199875619128e-08, "loss": 3.8334, "step": 1361500 }, { "epoch": 15.13, "learning_rate": 8.718599382537814e-08, "loss": 3.822, "step": 1362000 }, { "epoch": 15.13, "learning_rate": 8.717211203162828e-08, "loss": 3.8159, "step": 1362500 }, { "epoch": 15.14, "learning_rate": 8.715823023787842e-08, "loss": 3.8314, "step": 1363000 }, { "epoch": 15.14, "learning_rate": 8.714434844412855e-08, "loss": 3.8308, "step": 1363500 }, { "epoch": 15.15, "learning_rate": 8.713046665037868e-08, "loss": 3.8333, "step": 1364000 }, { "epoch": 15.15, "learning_rate": 8.711658485662882e-08, "loss": 3.8294, "step": 1364500 }, { "epoch": 15.16, "learning_rate": 8.710270306287897e-08, "loss": 3.7999, "step": 1365000 }, { "epoch": 15.16, "learning_rate": 8.708882126912911e-08, "loss": 3.823, "step": 1365500 }, { "epoch": 15.17, "learning_rate": 8.707493947537925e-08, "loss": 3.8281, "step": 1366000 }, { "epoch": 15.18, "learning_rate": 8.706105768162939e-08, "loss": 3.8191, "step": 1366500 }, { "epoch": 15.18, "learning_rate": 8.704717588787952e-08, "loss": 3.8142, "step": 1367000 }, { "epoch": 15.19, "learning_rate": 8.703329409412966e-08, "loss": 3.827, "step": 1367500 }, { "epoch": 15.19, "learning_rate": 8.70194123003798e-08, "loss": 3.8198, "step": 1368000 }, { "epoch": 15.2, "learning_rate": 8.700553050662995e-08, "loss": 3.8002, "step": 1368500 }, { "epoch": 15.2, "learning_rate": 8.699164871288009e-08, "loss": 3.8125, "step": 1369000 }, { "epoch": 15.21, "learning_rate": 8.697776691913022e-08, "loss": 3.8256, "step": 1369500 }, { "epoch": 15.21, "learning_rate": 8.696388512538035e-08, "loss": 3.8151, "step": 1370000 }, { "epoch": 15.22, "learning_rate": 8.695000333163049e-08, "loss": 3.842, "step": 1370500 }, { "epoch": 15.23, "learning_rate": 8.693612153788063e-08, "loss": 3.8203, "step": 1371000 }, { "epoch": 15.23, "learning_rate": 8.692223974413077e-08, "loss": 3.8075, "step": 1371500 }, { "epoch": 15.24, "learning_rate": 8.690835795038092e-08, "loss": 3.8278, "step": 1372000 }, { "epoch": 15.24, "learning_rate": 8.689447615663105e-08, "loss": 3.8106, "step": 1372500 }, { "epoch": 15.25, "learning_rate": 8.688059436288119e-08, "loss": 3.8194, "step": 1373000 }, { "epoch": 15.25, "learning_rate": 8.686671256913133e-08, "loss": 3.8153, "step": 1373500 }, { "epoch": 15.26, "learning_rate": 8.685283077538147e-08, "loss": 3.8231, "step": 1374000 }, { "epoch": 15.26, "learning_rate": 8.683894898163162e-08, "loss": 3.8125, "step": 1374500 }, { "epoch": 15.27, "learning_rate": 8.682506718788174e-08, "loss": 3.8376, "step": 1375000 }, { "epoch": 15.28, "learning_rate": 8.681118539413189e-08, "loss": 3.8077, "step": 1375500 }, { "epoch": 15.28, "learning_rate": 8.679730360038202e-08, "loss": 3.8267, "step": 1376000 }, { "epoch": 15.29, "learning_rate": 8.678342180663216e-08, "loss": 3.7899, "step": 1376500 }, { "epoch": 15.29, "learning_rate": 8.67695400128823e-08, "loss": 3.8168, "step": 1377000 }, { "epoch": 15.3, "learning_rate": 8.675565821913244e-08, "loss": 3.8086, "step": 1377500 }, { "epoch": 15.3, "learning_rate": 8.674177642538257e-08, "loss": 3.8206, "step": 1378000 }, { "epoch": 15.31, "learning_rate": 8.672789463163271e-08, "loss": 3.8108, "step": 1378500 }, { "epoch": 15.31, "learning_rate": 8.671401283788286e-08, "loss": 3.8131, "step": 1379000 }, { "epoch": 15.32, "learning_rate": 8.6700131044133e-08, "loss": 3.8269, "step": 1379500 }, { "epoch": 15.33, "learning_rate": 8.668624925038314e-08, "loss": 3.8095, "step": 1380000 }, { "epoch": 15.33, "learning_rate": 8.667236745663328e-08, "loss": 3.8163, "step": 1380500 }, { "epoch": 15.34, "learning_rate": 8.665848566288341e-08, "loss": 3.8233, "step": 1381000 }, { "epoch": 15.34, "learning_rate": 8.664460386913355e-08, "loss": 3.8134, "step": 1381500 }, { "epoch": 15.35, "learning_rate": 8.663072207538368e-08, "loss": 3.8263, "step": 1382000 }, { "epoch": 15.35, "learning_rate": 8.661684028163383e-08, "loss": 3.8218, "step": 1382500 }, { "epoch": 15.36, "learning_rate": 8.660295848788397e-08, "loss": 3.8359, "step": 1383000 }, { "epoch": 15.36, "learning_rate": 8.65890766941341e-08, "loss": 3.8273, "step": 1383500 }, { "epoch": 15.37, "learning_rate": 8.657519490038424e-08, "loss": 3.8112, "step": 1384000 }, { "epoch": 15.38, "learning_rate": 8.656131310663438e-08, "loss": 3.8098, "step": 1384500 }, { "epoch": 15.38, "learning_rate": 8.654743131288452e-08, "loss": 3.8206, "step": 1385000 }, { "epoch": 15.39, "learning_rate": 8.653354951913467e-08, "loss": 3.8287, "step": 1385500 }, { "epoch": 15.39, "learning_rate": 8.651966772538481e-08, "loss": 3.8085, "step": 1386000 }, { "epoch": 15.4, "learning_rate": 8.650578593163494e-08, "loss": 3.8165, "step": 1386500 }, { "epoch": 15.4, "learning_rate": 8.649190413788508e-08, "loss": 3.8133, "step": 1387000 }, { "epoch": 15.41, "learning_rate": 8.647802234413522e-08, "loss": 3.8217, "step": 1387500 }, { "epoch": 15.41, "learning_rate": 8.646414055038535e-08, "loss": 3.8273, "step": 1388000 }, { "epoch": 15.42, "learning_rate": 8.645025875663549e-08, "loss": 3.8122, "step": 1388500 }, { "epoch": 15.43, "learning_rate": 8.643637696288564e-08, "loss": 3.8134, "step": 1389000 }, { "epoch": 15.43, "learning_rate": 8.642249516913576e-08, "loss": 3.813, "step": 1389500 }, { "epoch": 15.44, "learning_rate": 8.64086133753859e-08, "loss": 3.81, "step": 1390000 }, { "epoch": 15.44, "learning_rate": 8.639473158163605e-08, "loss": 3.8207, "step": 1390500 }, { "epoch": 15.45, "learning_rate": 8.638084978788619e-08, "loss": 3.8134, "step": 1391000 }, { "epoch": 15.45, "learning_rate": 8.636696799413633e-08, "loss": 3.8155, "step": 1391500 }, { "epoch": 15.46, "learning_rate": 8.635308620038646e-08, "loss": 3.8221, "step": 1392000 }, { "epoch": 15.46, "learning_rate": 8.63392044066366e-08, "loss": 3.8384, "step": 1392500 }, { "epoch": 15.47, "learning_rate": 8.632532261288675e-08, "loss": 3.8383, "step": 1393000 }, { "epoch": 15.48, "learning_rate": 8.631144081913689e-08, "loss": 3.8442, "step": 1393500 }, { "epoch": 15.48, "learning_rate": 8.629755902538702e-08, "loss": 3.8267, "step": 1394000 }, { "epoch": 15.49, "learning_rate": 8.628367723163716e-08, "loss": 3.8281, "step": 1394500 }, { "epoch": 15.49, "learning_rate": 8.626979543788729e-08, "loss": 3.8247, "step": 1395000 }, { "epoch": 15.5, "learning_rate": 8.625591364413743e-08, "loss": 3.8135, "step": 1395500 }, { "epoch": 15.5, "learning_rate": 8.624203185038757e-08, "loss": 3.8152, "step": 1396000 }, { "epoch": 15.51, "learning_rate": 8.622815005663772e-08, "loss": 3.8322, "step": 1396500 }, { "epoch": 15.51, "learning_rate": 8.621426826288786e-08, "loss": 3.8272, "step": 1397000 }, { "epoch": 15.52, "learning_rate": 8.620038646913799e-08, "loss": 3.8155, "step": 1397500 }, { "epoch": 15.53, "learning_rate": 8.618650467538813e-08, "loss": 3.8181, "step": 1398000 }, { "epoch": 15.53, "learning_rate": 8.617262288163827e-08, "loss": 3.8254, "step": 1398500 }, { "epoch": 15.54, "learning_rate": 8.615874108788841e-08, "loss": 3.8095, "step": 1399000 }, { "epoch": 15.54, "learning_rate": 8.614485929413856e-08, "loss": 3.8181, "step": 1399500 }, { "epoch": 15.55, "learning_rate": 8.613097750038869e-08, "loss": 3.8184, "step": 1400000 }, { "epoch": 15.55, "learning_rate": 8.611709570663881e-08, "loss": 3.8115, "step": 1400500 }, { "epoch": 15.56, "learning_rate": 8.610321391288896e-08, "loss": 3.8327, "step": 1401000 }, { "epoch": 15.56, "learning_rate": 8.60893321191391e-08, "loss": 3.8096, "step": 1401500 }, { "epoch": 15.57, "learning_rate": 8.607545032538924e-08, "loss": 3.8092, "step": 1402000 }, { "epoch": 15.58, "learning_rate": 8.606156853163938e-08, "loss": 3.8297, "step": 1402500 }, { "epoch": 15.58, "learning_rate": 8.604768673788953e-08, "loss": 3.8328, "step": 1403000 }, { "epoch": 15.59, "learning_rate": 8.603380494413965e-08, "loss": 3.8203, "step": 1403500 }, { "epoch": 15.59, "learning_rate": 8.60199231503898e-08, "loss": 3.8042, "step": 1404000 }, { "epoch": 15.6, "learning_rate": 8.600604135663994e-08, "loss": 3.804, "step": 1404500 }, { "epoch": 15.6, "learning_rate": 8.599215956289008e-08, "loss": 3.8493, "step": 1405000 }, { "epoch": 15.61, "learning_rate": 8.597827776914021e-08, "loss": 3.8201, "step": 1405500 }, { "epoch": 15.61, "learning_rate": 8.596439597539035e-08, "loss": 3.8153, "step": 1406000 }, { "epoch": 15.62, "learning_rate": 8.595051418164048e-08, "loss": 3.8193, "step": 1406500 }, { "epoch": 15.63, "learning_rate": 8.593663238789062e-08, "loss": 3.8201, "step": 1407000 }, { "epoch": 15.63, "learning_rate": 8.592275059414077e-08, "loss": 3.8155, "step": 1407500 }, { "epoch": 15.64, "learning_rate": 8.590886880039091e-08, "loss": 3.8088, "step": 1408000 }, { "epoch": 15.64, "learning_rate": 8.589498700664105e-08, "loss": 3.8214, "step": 1408500 }, { "epoch": 15.65, "learning_rate": 8.588110521289118e-08, "loss": 3.806, "step": 1409000 }, { "epoch": 15.65, "learning_rate": 8.586722341914132e-08, "loss": 3.8114, "step": 1409500 }, { "epoch": 15.66, "learning_rate": 8.585334162539146e-08, "loss": 3.8107, "step": 1410000 }, { "epoch": 15.66, "learning_rate": 8.583945983164161e-08, "loss": 3.8379, "step": 1410500 }, { "epoch": 15.67, "learning_rate": 8.582557803789175e-08, "loss": 3.806, "step": 1411000 }, { "epoch": 15.68, "learning_rate": 8.581169624414188e-08, "loss": 3.8209, "step": 1411500 }, { "epoch": 15.68, "learning_rate": 8.579781445039202e-08, "loss": 3.821, "step": 1412000 }, { "epoch": 15.69, "learning_rate": 8.578393265664215e-08, "loss": 3.8236, "step": 1412500 }, { "epoch": 15.69, "learning_rate": 8.577005086289229e-08, "loss": 3.8055, "step": 1413000 }, { "epoch": 15.7, "learning_rate": 8.575616906914243e-08, "loss": 3.8257, "step": 1413500 }, { "epoch": 15.7, "learning_rate": 8.574228727539258e-08, "loss": 3.8202, "step": 1414000 }, { "epoch": 15.71, "learning_rate": 8.57284054816427e-08, "loss": 3.8182, "step": 1414500 }, { "epoch": 15.71, "learning_rate": 8.571452368789285e-08, "loss": 3.8128, "step": 1415000 }, { "epoch": 15.72, "learning_rate": 8.570064189414299e-08, "loss": 3.805, "step": 1415500 }, { "epoch": 15.73, "learning_rate": 8.568676010039313e-08, "loss": 3.8303, "step": 1416000 }, { "epoch": 15.73, "learning_rate": 8.567287830664327e-08, "loss": 3.8159, "step": 1416500 }, { "epoch": 15.74, "learning_rate": 8.565899651289342e-08, "loss": 3.8215, "step": 1417000 }, { "epoch": 15.74, "learning_rate": 8.564511471914355e-08, "loss": 3.8219, "step": 1417500 }, { "epoch": 15.75, "learning_rate": 8.563123292539369e-08, "loss": 3.7965, "step": 1418000 }, { "epoch": 15.75, "learning_rate": 8.561735113164382e-08, "loss": 3.8215, "step": 1418500 }, { "epoch": 15.76, "learning_rate": 8.560346933789396e-08, "loss": 3.8024, "step": 1419000 }, { "epoch": 15.76, "learning_rate": 8.55895875441441e-08, "loss": 3.8235, "step": 1419500 }, { "epoch": 15.77, "learning_rate": 8.557570575039423e-08, "loss": 3.8341, "step": 1420000 }, { "epoch": 15.78, "learning_rate": 8.556182395664437e-08, "loss": 3.8348, "step": 1420500 }, { "epoch": 15.78, "learning_rate": 8.554794216289451e-08, "loss": 3.8106, "step": 1421000 }, { "epoch": 15.79, "learning_rate": 8.553406036914466e-08, "loss": 3.8103, "step": 1421500 }, { "epoch": 15.79, "learning_rate": 8.55201785753948e-08, "loss": 3.8239, "step": 1422000 }, { "epoch": 15.8, "learning_rate": 8.550629678164494e-08, "loss": 3.815, "step": 1422500 }, { "epoch": 15.8, "learning_rate": 8.549241498789507e-08, "loss": 3.8078, "step": 1423000 }, { "epoch": 15.81, "learning_rate": 8.547853319414521e-08, "loss": 3.8191, "step": 1423500 }, { "epoch": 15.81, "learning_rate": 8.546465140039536e-08, "loss": 3.8114, "step": 1424000 }, { "epoch": 15.82, "learning_rate": 8.545076960664548e-08, "loss": 3.8183, "step": 1424500 }, { "epoch": 15.83, "learning_rate": 8.543688781289563e-08, "loss": 3.8089, "step": 1425000 }, { "epoch": 15.83, "learning_rate": 8.542300601914577e-08, "loss": 3.8328, "step": 1425500 }, { "epoch": 15.84, "learning_rate": 8.54091242253959e-08, "loss": 3.8204, "step": 1426000 }, { "epoch": 15.84, "learning_rate": 8.539524243164604e-08, "loss": 3.8005, "step": 1426500 }, { "epoch": 15.85, "learning_rate": 8.538136063789618e-08, "loss": 3.8259, "step": 1427000 }, { "epoch": 15.85, "learning_rate": 8.536747884414632e-08, "loss": 3.8232, "step": 1427500 }, { "epoch": 15.86, "learning_rate": 8.535359705039647e-08, "loss": 3.8196, "step": 1428000 }, { "epoch": 15.86, "learning_rate": 8.53397152566466e-08, "loss": 3.8154, "step": 1428500 }, { "epoch": 15.87, "learning_rate": 8.532583346289674e-08, "loss": 3.8187, "step": 1429000 }, { "epoch": 15.88, "learning_rate": 8.531195166914688e-08, "loss": 3.818, "step": 1429500 }, { "epoch": 15.88, "learning_rate": 8.529806987539702e-08, "loss": 3.8212, "step": 1430000 }, { "epoch": 15.89, "learning_rate": 8.528418808164715e-08, "loss": 3.8055, "step": 1430500 }, { "epoch": 15.89, "learning_rate": 8.52703062878973e-08, "loss": 3.8073, "step": 1431000 }, { "epoch": 15.9, "learning_rate": 8.525642449414742e-08, "loss": 3.8221, "step": 1431500 }, { "epoch": 15.9, "learning_rate": 8.524254270039757e-08, "loss": 3.8145, "step": 1432000 }, { "epoch": 15.91, "learning_rate": 8.522866090664771e-08, "loss": 3.8183, "step": 1432500 }, { "epoch": 15.91, "learning_rate": 8.521477911289785e-08, "loss": 3.8356, "step": 1433000 }, { "epoch": 15.92, "learning_rate": 8.520089731914799e-08, "loss": 3.84, "step": 1433500 }, { "epoch": 15.93, "learning_rate": 8.518701552539813e-08, "loss": 3.8109, "step": 1434000 }, { "epoch": 15.93, "learning_rate": 8.517313373164826e-08, "loss": 3.8183, "step": 1434500 }, { "epoch": 15.94, "learning_rate": 8.51592519378984e-08, "loss": 3.8327, "step": 1435000 }, { "epoch": 15.94, "learning_rate": 8.514537014414855e-08, "loss": 3.8311, "step": 1435500 }, { "epoch": 15.95, "learning_rate": 8.513148835039868e-08, "loss": 3.8188, "step": 1436000 }, { "epoch": 15.95, "learning_rate": 8.511760655664882e-08, "loss": 3.8091, "step": 1436500 }, { "epoch": 15.96, "learning_rate": 8.510372476289895e-08, "loss": 3.8162, "step": 1437000 }, { "epoch": 15.96, "learning_rate": 8.508984296914909e-08, "loss": 3.8364, "step": 1437500 }, { "epoch": 15.97, "learning_rate": 8.507596117539923e-08, "loss": 3.8147, "step": 1438000 }, { "epoch": 15.98, "learning_rate": 8.506207938164938e-08, "loss": 3.8158, "step": 1438500 }, { "epoch": 15.98, "learning_rate": 8.504819758789952e-08, "loss": 3.8208, "step": 1439000 }, { "epoch": 15.99, "learning_rate": 8.503431579414966e-08, "loss": 3.8241, "step": 1439500 }, { "epoch": 15.99, "learning_rate": 8.502043400039979e-08, "loss": 3.8143, "step": 1440000 }, { "epoch": 16.0, "learning_rate": 8.500655220664993e-08, "loss": 3.8116, "step": 1440500 }, { "epoch": 16.0, "eval_loss": 3.8666203022003174, "eval_runtime": 6.3142, "eval_samples_per_second": 246.112, "step": 1440736 }, { "epoch": 16.0, "learning_rate": 8.499267041290007e-08, "loss": 3.8191, "step": 1441000 }, { "epoch": 16.01, "learning_rate": 8.497878861915022e-08, "loss": 3.8049, "step": 1441500 }, { "epoch": 16.01, "learning_rate": 8.496490682540034e-08, "loss": 3.813, "step": 1442000 }, { "epoch": 16.02, "learning_rate": 8.495102503165049e-08, "loss": 3.8038, "step": 1442500 }, { "epoch": 16.03, "learning_rate": 8.493714323790062e-08, "loss": 3.8249, "step": 1443000 }, { "epoch": 16.03, "learning_rate": 8.492326144415076e-08, "loss": 3.8166, "step": 1443500 }, { "epoch": 16.04, "learning_rate": 8.49093796504009e-08, "loss": 3.8103, "step": 1444000 }, { "epoch": 16.04, "learning_rate": 8.489549785665104e-08, "loss": 3.8248, "step": 1444500 }, { "epoch": 16.05, "learning_rate": 8.488161606290118e-08, "loss": 3.7897, "step": 1445000 }, { "epoch": 16.05, "learning_rate": 8.486773426915131e-08, "loss": 3.8007, "step": 1445500 }, { "epoch": 16.06, "learning_rate": 8.485385247540146e-08, "loss": 3.8071, "step": 1446000 }, { "epoch": 16.06, "learning_rate": 8.48399706816516e-08, "loss": 3.8224, "step": 1446500 }, { "epoch": 16.07, "learning_rate": 8.482608888790174e-08, "loss": 3.8002, "step": 1447000 }, { "epoch": 16.08, "learning_rate": 8.481220709415188e-08, "loss": 3.8207, "step": 1447500 }, { "epoch": 16.08, "learning_rate": 8.479832530040201e-08, "loss": 3.8086, "step": 1448000 }, { "epoch": 16.09, "learning_rate": 8.478444350665215e-08, "loss": 3.8017, "step": 1448500 }, { "epoch": 16.09, "learning_rate": 8.477056171290228e-08, "loss": 3.8285, "step": 1449000 }, { "epoch": 16.1, "learning_rate": 8.475667991915243e-08, "loss": 3.8077, "step": 1449500 }, { "epoch": 16.1, "learning_rate": 8.474279812540257e-08, "loss": 3.788, "step": 1450000 }, { "epoch": 16.11, "learning_rate": 8.472891633165271e-08, "loss": 3.8276, "step": 1450500 }, { "epoch": 16.11, "learning_rate": 8.471503453790284e-08, "loss": 3.834, "step": 1451000 }, { "epoch": 16.12, "learning_rate": 8.470115274415298e-08, "loss": 3.8128, "step": 1451500 }, { "epoch": 16.13, "learning_rate": 8.468727095040312e-08, "loss": 3.8144, "step": 1452000 }, { "epoch": 16.13, "learning_rate": 8.467338915665327e-08, "loss": 3.8088, "step": 1452500 }, { "epoch": 16.14, "learning_rate": 8.465950736290341e-08, "loss": 3.8208, "step": 1453000 }, { "epoch": 16.14, "learning_rate": 8.464562556915355e-08, "loss": 3.8222, "step": 1453500 }, { "epoch": 16.15, "learning_rate": 8.463174377540368e-08, "loss": 3.7955, "step": 1454000 }, { "epoch": 16.15, "learning_rate": 8.461786198165382e-08, "loss": 3.8453, "step": 1454500 }, { "epoch": 16.16, "learning_rate": 8.460398018790395e-08, "loss": 3.8136, "step": 1455000 }, { "epoch": 16.16, "learning_rate": 8.459009839415409e-08, "loss": 3.8154, "step": 1455500 }, { "epoch": 16.17, "learning_rate": 8.457621660040424e-08, "loss": 3.8284, "step": 1456000 }, { "epoch": 16.18, "learning_rate": 8.456233480665436e-08, "loss": 3.8186, "step": 1456500 }, { "epoch": 16.18, "learning_rate": 8.45484530129045e-08, "loss": 3.8002, "step": 1457000 }, { "epoch": 16.19, "learning_rate": 8.453457121915465e-08, "loss": 3.8042, "step": 1457500 }, { "epoch": 16.19, "learning_rate": 8.452068942540479e-08, "loss": 3.8248, "step": 1458000 }, { "epoch": 16.2, "learning_rate": 8.450680763165493e-08, "loss": 3.8115, "step": 1458500 }, { "epoch": 16.2, "learning_rate": 8.449292583790508e-08, "loss": 3.7705, "step": 1459000 }, { "epoch": 16.21, "learning_rate": 8.44790440441552e-08, "loss": 3.812, "step": 1459500 }, { "epoch": 16.21, "learning_rate": 8.446516225040535e-08, "loss": 3.815, "step": 1460000 }, { "epoch": 16.22, "learning_rate": 8.445128045665549e-08, "loss": 3.8179, "step": 1460500 }, { "epoch": 16.23, "learning_rate": 8.443739866290562e-08, "loss": 3.8306, "step": 1461000 }, { "epoch": 16.23, "learning_rate": 8.442351686915576e-08, "loss": 3.8334, "step": 1461500 }, { "epoch": 16.24, "learning_rate": 8.44096350754059e-08, "loss": 3.8042, "step": 1462000 }, { "epoch": 16.24, "learning_rate": 8.439575328165603e-08, "loss": 3.8125, "step": 1462500 }, { "epoch": 16.25, "learning_rate": 8.438187148790617e-08, "loss": 3.7856, "step": 1463000 }, { "epoch": 16.25, "learning_rate": 8.436798969415632e-08, "loss": 3.8237, "step": 1463500 }, { "epoch": 16.26, "learning_rate": 8.435410790040646e-08, "loss": 3.8156, "step": 1464000 }, { "epoch": 16.26, "learning_rate": 8.43402261066566e-08, "loss": 3.8183, "step": 1464500 }, { "epoch": 16.27, "learning_rate": 8.432634431290673e-08, "loss": 3.8305, "step": 1465000 }, { "epoch": 16.28, "learning_rate": 8.431246251915687e-08, "loss": 3.8205, "step": 1465500 }, { "epoch": 16.28, "learning_rate": 8.429858072540701e-08, "loss": 3.8137, "step": 1466000 }, { "epoch": 16.29, "learning_rate": 8.428469893165714e-08, "loss": 3.8264, "step": 1466500 }, { "epoch": 16.29, "learning_rate": 8.427081713790729e-08, "loss": 3.8302, "step": 1467000 }, { "epoch": 16.3, "learning_rate": 8.425693534415743e-08, "loss": 3.8016, "step": 1467500 }, { "epoch": 16.3, "learning_rate": 8.424305355040756e-08, "loss": 3.8179, "step": 1468000 }, { "epoch": 16.31, "learning_rate": 8.42291717566577e-08, "loss": 3.8046, "step": 1468500 }, { "epoch": 16.31, "learning_rate": 8.421528996290784e-08, "loss": 3.7876, "step": 1469000 }, { "epoch": 16.32, "learning_rate": 8.420140816915798e-08, "loss": 3.7978, "step": 1469500 }, { "epoch": 16.32, "learning_rate": 8.418752637540813e-08, "loss": 3.8062, "step": 1470000 }, { "epoch": 16.33, "learning_rate": 8.417364458165827e-08, "loss": 3.8247, "step": 1470500 }, { "epoch": 16.34, "learning_rate": 8.41597627879084e-08, "loss": 3.801, "step": 1471000 }, { "epoch": 16.34, "learning_rate": 8.414588099415854e-08, "loss": 3.8146, "step": 1471500 }, { "epoch": 16.35, "learning_rate": 8.413199920040868e-08, "loss": 3.8123, "step": 1472000 }, { "epoch": 16.35, "learning_rate": 8.411811740665881e-08, "loss": 3.8107, "step": 1472500 }, { "epoch": 16.36, "learning_rate": 8.410423561290895e-08, "loss": 3.8171, "step": 1473000 }, { "epoch": 16.36, "learning_rate": 8.409035381915908e-08, "loss": 3.8245, "step": 1473500 }, { "epoch": 16.37, "learning_rate": 8.407647202540922e-08, "loss": 3.7992, "step": 1474000 }, { "epoch": 16.37, "learning_rate": 8.406259023165937e-08, "loss": 3.8018, "step": 1474500 }, { "epoch": 16.38, "learning_rate": 8.404870843790951e-08, "loss": 3.8194, "step": 1475000 }, { "epoch": 16.39, "learning_rate": 8.403482664415965e-08, "loss": 3.8054, "step": 1475500 }, { "epoch": 16.39, "learning_rate": 8.40209448504098e-08, "loss": 3.8085, "step": 1476000 }, { "epoch": 16.4, "learning_rate": 8.400706305665992e-08, "loss": 3.811, "step": 1476500 }, { "epoch": 16.4, "learning_rate": 8.399318126291006e-08, "loss": 3.8089, "step": 1477000 }, { "epoch": 16.41, "learning_rate": 8.397929946916021e-08, "loss": 3.8236, "step": 1477500 }, { "epoch": 16.41, "learning_rate": 8.396541767541035e-08, "loss": 3.8247, "step": 1478000 }, { "epoch": 16.42, "learning_rate": 8.395153588166048e-08, "loss": 3.7988, "step": 1478500 }, { "epoch": 16.42, "learning_rate": 8.393765408791062e-08, "loss": 3.8104, "step": 1479000 }, { "epoch": 16.43, "learning_rate": 8.392377229416075e-08, "loss": 3.8342, "step": 1479500 }, { "epoch": 16.44, "learning_rate": 8.390989050041089e-08, "loss": 3.809, "step": 1480000 }, { "epoch": 16.44, "learning_rate": 8.389600870666103e-08, "loss": 3.7938, "step": 1480500 }, { "epoch": 16.45, "learning_rate": 8.388212691291118e-08, "loss": 3.8169, "step": 1481000 }, { "epoch": 16.45, "learning_rate": 8.386824511916132e-08, "loss": 3.8089, "step": 1481500 }, { "epoch": 16.46, "learning_rate": 8.385436332541145e-08, "loss": 3.8115, "step": 1482000 }, { "epoch": 16.46, "learning_rate": 8.384048153166159e-08, "loss": 3.8208, "step": 1482500 }, { "epoch": 16.47, "learning_rate": 8.382659973791173e-08, "loss": 3.8192, "step": 1483000 }, { "epoch": 16.47, "learning_rate": 8.381271794416187e-08, "loss": 3.8117, "step": 1483500 }, { "epoch": 16.48, "learning_rate": 8.379883615041202e-08, "loss": 3.8197, "step": 1484000 }, { "epoch": 16.49, "learning_rate": 8.378495435666215e-08, "loss": 3.8187, "step": 1484500 }, { "epoch": 16.49, "learning_rate": 8.377107256291229e-08, "loss": 3.8165, "step": 1485000 }, { "epoch": 16.5, "learning_rate": 8.375719076916242e-08, "loss": 3.7969, "step": 1485500 }, { "epoch": 16.5, "learning_rate": 8.374330897541256e-08, "loss": 3.8254, "step": 1486000 }, { "epoch": 16.51, "learning_rate": 8.37294271816627e-08, "loss": 3.8132, "step": 1486500 }, { "epoch": 16.51, "learning_rate": 8.371554538791284e-08, "loss": 3.8066, "step": 1487000 }, { "epoch": 16.52, "learning_rate": 8.370166359416297e-08, "loss": 3.8076, "step": 1487500 }, { "epoch": 16.52, "learning_rate": 8.368778180041312e-08, "loss": 3.8176, "step": 1488000 }, { "epoch": 16.53, "learning_rate": 8.367390000666326e-08, "loss": 3.7983, "step": 1488500 }, { "epoch": 16.54, "learning_rate": 8.36600182129134e-08, "loss": 3.8272, "step": 1489000 }, { "epoch": 16.54, "learning_rate": 8.364613641916354e-08, "loss": 3.8164, "step": 1489500 }, { "epoch": 16.55, "learning_rate": 8.363225462541368e-08, "loss": 3.8167, "step": 1490000 }, { "epoch": 16.55, "learning_rate": 8.361837283166381e-08, "loss": 3.8066, "step": 1490500 }, { "epoch": 16.56, "learning_rate": 8.360449103791396e-08, "loss": 3.7905, "step": 1491000 }, { "epoch": 16.56, "learning_rate": 8.359060924416408e-08, "loss": 3.8025, "step": 1491500 }, { "epoch": 16.57, "learning_rate": 8.357672745041423e-08, "loss": 3.8094, "step": 1492000 }, { "epoch": 16.57, "learning_rate": 8.356284565666437e-08, "loss": 3.8193, "step": 1492500 }, { "epoch": 16.58, "learning_rate": 8.35489638629145e-08, "loss": 3.8276, "step": 1493000 }, { "epoch": 16.59, "learning_rate": 8.353508206916464e-08, "loss": 3.82, "step": 1493500 }, { "epoch": 16.59, "learning_rate": 8.352120027541478e-08, "loss": 3.8219, "step": 1494000 }, { "epoch": 16.6, "learning_rate": 8.350731848166492e-08, "loss": 3.8418, "step": 1494500 }, { "epoch": 16.6, "learning_rate": 8.349343668791507e-08, "loss": 3.8105, "step": 1495000 }, { "epoch": 16.61, "learning_rate": 8.347955489416521e-08, "loss": 3.8177, "step": 1495500 }, { "epoch": 16.61, "learning_rate": 8.346567310041534e-08, "loss": 3.8042, "step": 1496000 }, { "epoch": 16.62, "learning_rate": 8.345179130666548e-08, "loss": 3.8262, "step": 1496500 }, { "epoch": 16.62, "learning_rate": 8.343790951291561e-08, "loss": 3.8134, "step": 1497000 }, { "epoch": 16.63, "learning_rate": 8.342402771916575e-08, "loss": 3.8195, "step": 1497500 }, { "epoch": 16.64, "learning_rate": 8.34101459254159e-08, "loss": 3.8213, "step": 1498000 }, { "epoch": 16.64, "learning_rate": 8.339626413166604e-08, "loss": 3.8041, "step": 1498500 }, { "epoch": 16.65, "learning_rate": 8.338238233791617e-08, "loss": 3.807, "step": 1499000 }, { "epoch": 16.65, "learning_rate": 8.336850054416631e-08, "loss": 3.808, "step": 1499500 }, { "epoch": 16.66, "learning_rate": 8.335461875041645e-08, "loss": 3.8179, "step": 1500000 }, { "epoch": 16.66, "learning_rate": 8.334073695666659e-08, "loss": 3.8197, "step": 1500500 }, { "epoch": 16.67, "learning_rate": 8.332685516291673e-08, "loss": 3.8118, "step": 1501000 }, { "epoch": 16.67, "learning_rate": 8.331297336916686e-08, "loss": 3.8238, "step": 1501500 }, { "epoch": 16.68, "learning_rate": 8.3299091575417e-08, "loss": 3.8, "step": 1502000 }, { "epoch": 16.69, "learning_rate": 8.328520978166715e-08, "loss": 3.8048, "step": 1502500 }, { "epoch": 16.69, "learning_rate": 8.327132798791728e-08, "loss": 3.821, "step": 1503000 }, { "epoch": 16.7, "learning_rate": 8.325744619416742e-08, "loss": 3.8155, "step": 1503500 }, { "epoch": 16.7, "learning_rate": 8.324356440041756e-08, "loss": 3.819, "step": 1504000 }, { "epoch": 16.71, "learning_rate": 8.322968260666769e-08, "loss": 3.8134, "step": 1504500 }, { "epoch": 16.71, "learning_rate": 8.321580081291783e-08, "loss": 3.8152, "step": 1505000 }, { "epoch": 16.72, "learning_rate": 8.320191901916798e-08, "loss": 3.806, "step": 1505500 }, { "epoch": 16.72, "learning_rate": 8.318803722541812e-08, "loss": 3.8079, "step": 1506000 }, { "epoch": 16.73, "learning_rate": 8.317415543166826e-08, "loss": 3.8318, "step": 1506500 }, { "epoch": 16.74, "learning_rate": 8.31602736379184e-08, "loss": 3.8015, "step": 1507000 }, { "epoch": 16.74, "learning_rate": 8.314639184416853e-08, "loss": 3.8231, "step": 1507500 }, { "epoch": 16.75, "learning_rate": 8.313251005041867e-08, "loss": 3.8106, "step": 1508000 }, { "epoch": 16.75, "learning_rate": 8.311862825666882e-08, "loss": 3.8139, "step": 1508500 }, { "epoch": 16.76, "learning_rate": 8.310474646291894e-08, "loss": 3.8296, "step": 1509000 }, { "epoch": 16.76, "learning_rate": 8.309086466916909e-08, "loss": 3.8036, "step": 1509500 }, { "epoch": 16.77, "learning_rate": 8.307698287541922e-08, "loss": 3.8279, "step": 1510000 }, { "epoch": 16.77, "learning_rate": 8.306310108166936e-08, "loss": 3.8196, "step": 1510500 }, { "epoch": 16.78, "learning_rate": 8.30492192879195e-08, "loss": 3.8155, "step": 1511000 }, { "epoch": 16.79, "learning_rate": 8.303533749416964e-08, "loss": 3.81, "step": 1511500 }, { "epoch": 16.79, "learning_rate": 8.302145570041978e-08, "loss": 3.8034, "step": 1512000 }, { "epoch": 16.8, "learning_rate": 8.300757390666993e-08, "loss": 3.8207, "step": 1512500 }, { "epoch": 16.8, "learning_rate": 8.299369211292006e-08, "loss": 3.8107, "step": 1513000 }, { "epoch": 16.81, "learning_rate": 8.29798103191702e-08, "loss": 3.8125, "step": 1513500 }, { "epoch": 16.81, "learning_rate": 8.296592852542034e-08, "loss": 3.8347, "step": 1514000 }, { "epoch": 16.82, "learning_rate": 8.295204673167048e-08, "loss": 3.8297, "step": 1514500 }, { "epoch": 16.82, "learning_rate": 8.293816493792061e-08, "loss": 3.8204, "step": 1515000 }, { "epoch": 16.83, "learning_rate": 8.292428314417075e-08, "loss": 3.82, "step": 1515500 }, { "epoch": 16.84, "learning_rate": 8.291040135042088e-08, "loss": 3.8202, "step": 1516000 }, { "epoch": 16.84, "learning_rate": 8.289651955667103e-08, "loss": 3.8214, "step": 1516500 }, { "epoch": 16.85, "learning_rate": 8.288263776292117e-08, "loss": 3.7997, "step": 1517000 }, { "epoch": 16.85, "learning_rate": 8.286875596917131e-08, "loss": 3.8186, "step": 1517500 }, { "epoch": 16.86, "learning_rate": 8.285487417542145e-08, "loss": 3.81, "step": 1518000 }, { "epoch": 16.86, "learning_rate": 8.284099238167158e-08, "loss": 3.8207, "step": 1518500 }, { "epoch": 16.87, "learning_rate": 8.282711058792172e-08, "loss": 3.8071, "step": 1519000 }, { "epoch": 16.87, "learning_rate": 8.281322879417187e-08, "loss": 3.8247, "step": 1519500 }, { "epoch": 16.88, "learning_rate": 8.279934700042201e-08, "loss": 3.8202, "step": 1520000 }, { "epoch": 16.89, "learning_rate": 8.278546520667215e-08, "loss": 3.8027, "step": 1520500 }, { "epoch": 16.89, "learning_rate": 8.277158341292228e-08, "loss": 3.8041, "step": 1521000 }, { "epoch": 16.9, "learning_rate": 8.275770161917242e-08, "loss": 3.8283, "step": 1521500 }, { "epoch": 16.9, "learning_rate": 8.274381982542255e-08, "loss": 3.8106, "step": 1522000 }, { "epoch": 16.91, "learning_rate": 8.272993803167269e-08, "loss": 3.8153, "step": 1522500 }, { "epoch": 16.91, "learning_rate": 8.271605623792284e-08, "loss": 3.8071, "step": 1523000 }, { "epoch": 16.92, "learning_rate": 8.270217444417298e-08, "loss": 3.8048, "step": 1523500 }, { "epoch": 16.92, "learning_rate": 8.268829265042311e-08, "loss": 3.8246, "step": 1524000 }, { "epoch": 16.93, "learning_rate": 8.267441085667325e-08, "loss": 3.8208, "step": 1524500 }, { "epoch": 16.94, "learning_rate": 8.266052906292339e-08, "loss": 3.8201, "step": 1525000 }, { "epoch": 16.94, "learning_rate": 8.264664726917353e-08, "loss": 3.8069, "step": 1525500 }, { "epoch": 16.95, "learning_rate": 8.263276547542368e-08, "loss": 3.8019, "step": 1526000 }, { "epoch": 16.95, "learning_rate": 8.261888368167382e-08, "loss": 3.8163, "step": 1526500 }, { "epoch": 16.96, "learning_rate": 8.260500188792395e-08, "loss": 3.7999, "step": 1527000 }, { "epoch": 16.96, "learning_rate": 8.259112009417408e-08, "loss": 3.8106, "step": 1527500 }, { "epoch": 16.97, "learning_rate": 8.257723830042422e-08, "loss": 3.7933, "step": 1528000 }, { "epoch": 16.97, "learning_rate": 8.256335650667436e-08, "loss": 3.8071, "step": 1528500 }, { "epoch": 16.98, "learning_rate": 8.25494747129245e-08, "loss": 3.7952, "step": 1529000 }, { "epoch": 16.99, "learning_rate": 8.253559291917465e-08, "loss": 3.7952, "step": 1529500 }, { "epoch": 16.99, "learning_rate": 8.252171112542477e-08, "loss": 3.8094, "step": 1530000 }, { "epoch": 17.0, "learning_rate": 8.250782933167492e-08, "loss": 3.805, "step": 1530500 }, { "epoch": 17.0, "eval_loss": 3.8627829551696777, "eval_runtime": 6.3079, "eval_samples_per_second": 246.358, "step": 1530782 }, { "epoch": 17.0, "learning_rate": 8.249394753792506e-08, "loss": 3.8162, "step": 1531000 }, { "epoch": 17.01, "learning_rate": 8.24800657441752e-08, "loss": 3.8088, "step": 1531500 }, { "epoch": 17.01, "learning_rate": 8.246618395042534e-08, "loss": 3.8008, "step": 1532000 }, { "epoch": 17.02, "learning_rate": 8.245230215667547e-08, "loss": 3.8112, "step": 1532500 }, { "epoch": 17.02, "learning_rate": 8.243842036292561e-08, "loss": 3.8323, "step": 1533000 }, { "epoch": 17.03, "learning_rate": 8.242453856917574e-08, "loss": 3.7973, "step": 1533500 }, { "epoch": 17.04, "learning_rate": 8.241065677542589e-08, "loss": 3.7927, "step": 1534000 }, { "epoch": 17.04, "learning_rate": 8.239677498167603e-08, "loss": 3.8046, "step": 1534500 }, { "epoch": 17.05, "learning_rate": 8.238289318792617e-08, "loss": 3.794, "step": 1535000 }, { "epoch": 17.05, "learning_rate": 8.23690113941763e-08, "loss": 3.8122, "step": 1535500 }, { "epoch": 17.06, "learning_rate": 8.235512960042644e-08, "loss": 3.8133, "step": 1536000 }, { "epoch": 17.06, "learning_rate": 8.234124780667658e-08, "loss": 3.8241, "step": 1536500 }, { "epoch": 17.07, "learning_rate": 8.232736601292673e-08, "loss": 3.802, "step": 1537000 }, { "epoch": 17.07, "learning_rate": 8.231348421917687e-08, "loss": 3.8265, "step": 1537500 }, { "epoch": 17.08, "learning_rate": 8.2299602425427e-08, "loss": 3.8211, "step": 1538000 }, { "epoch": 17.09, "learning_rate": 8.228572063167714e-08, "loss": 3.7952, "step": 1538500 }, { "epoch": 17.09, "learning_rate": 8.227183883792728e-08, "loss": 3.8058, "step": 1539000 }, { "epoch": 17.1, "learning_rate": 8.225795704417741e-08, "loss": 3.7884, "step": 1539500 }, { "epoch": 17.1, "learning_rate": 8.224407525042755e-08, "loss": 3.8228, "step": 1540000 }, { "epoch": 17.11, "learning_rate": 8.22301934566777e-08, "loss": 3.8158, "step": 1540500 }, { "epoch": 17.11, "learning_rate": 8.221631166292782e-08, "loss": 3.8083, "step": 1541000 }, { "epoch": 17.12, "learning_rate": 8.220242986917797e-08, "loss": 3.8341, "step": 1541500 }, { "epoch": 17.12, "learning_rate": 8.218854807542811e-08, "loss": 3.7961, "step": 1542000 }, { "epoch": 17.13, "learning_rate": 8.217466628167825e-08, "loss": 3.8178, "step": 1542500 }, { "epoch": 17.14, "learning_rate": 8.21607844879284e-08, "loss": 3.802, "step": 1543000 }, { "epoch": 17.14, "learning_rate": 8.214690269417854e-08, "loss": 3.8048, "step": 1543500 }, { "epoch": 17.15, "learning_rate": 8.213302090042866e-08, "loss": 3.8061, "step": 1544000 }, { "epoch": 17.15, "learning_rate": 8.211913910667881e-08, "loss": 3.8264, "step": 1544500 }, { "epoch": 17.16, "learning_rate": 8.210525731292895e-08, "loss": 3.813, "step": 1545000 }, { "epoch": 17.16, "learning_rate": 8.209137551917908e-08, "loss": 3.7973, "step": 1545500 }, { "epoch": 17.17, "learning_rate": 8.207749372542922e-08, "loss": 3.8024, "step": 1546000 }, { "epoch": 17.17, "learning_rate": 8.206361193167935e-08, "loss": 3.8042, "step": 1546500 }, { "epoch": 17.18, "learning_rate": 8.204973013792949e-08, "loss": 3.803, "step": 1547000 }, { "epoch": 17.19, "learning_rate": 8.203584834417963e-08, "loss": 3.8072, "step": 1547500 }, { "epoch": 17.19, "learning_rate": 8.202196655042978e-08, "loss": 3.8096, "step": 1548000 }, { "epoch": 17.2, "learning_rate": 8.200808475667992e-08, "loss": 3.8117, "step": 1548500 }, { "epoch": 17.2, "learning_rate": 8.199420296293006e-08, "loss": 3.8039, "step": 1549000 }, { "epoch": 17.21, "learning_rate": 8.198032116918019e-08, "loss": 3.7965, "step": 1549500 }, { "epoch": 17.21, "learning_rate": 8.196643937543033e-08, "loss": 3.7996, "step": 1550000 }, { "epoch": 17.22, "learning_rate": 8.195255758168047e-08, "loss": 3.8054, "step": 1550500 }, { "epoch": 17.22, "learning_rate": 8.193867578793062e-08, "loss": 3.802, "step": 1551000 }, { "epoch": 17.23, "learning_rate": 8.192479399418075e-08, "loss": 3.7946, "step": 1551500 }, { "epoch": 17.24, "learning_rate": 8.191091220043089e-08, "loss": 3.8046, "step": 1552000 }, { "epoch": 17.24, "learning_rate": 8.189703040668102e-08, "loss": 3.8187, "step": 1552500 }, { "epoch": 17.25, "learning_rate": 8.188314861293116e-08, "loss": 3.8068, "step": 1553000 }, { "epoch": 17.25, "learning_rate": 8.18692668191813e-08, "loss": 3.8058, "step": 1553500 }, { "epoch": 17.26, "learning_rate": 8.185538502543144e-08, "loss": 3.8246, "step": 1554000 }, { "epoch": 17.26, "learning_rate": 8.184150323168159e-08, "loss": 3.8019, "step": 1554500 }, { "epoch": 17.27, "learning_rate": 8.182762143793172e-08, "loss": 3.8083, "step": 1555000 }, { "epoch": 17.27, "learning_rate": 8.181373964418186e-08, "loss": 3.8115, "step": 1555500 }, { "epoch": 17.28, "learning_rate": 8.1799857850432e-08, "loss": 3.8036, "step": 1556000 }, { "epoch": 17.29, "learning_rate": 8.178597605668214e-08, "loss": 3.7998, "step": 1556500 }, { "epoch": 17.29, "learning_rate": 8.177209426293228e-08, "loss": 3.794, "step": 1557000 }, { "epoch": 17.3, "learning_rate": 8.175821246918241e-08, "loss": 3.8139, "step": 1557500 }, { "epoch": 17.3, "learning_rate": 8.174433067543254e-08, "loss": 3.7824, "step": 1558000 }, { "epoch": 17.31, "learning_rate": 8.173044888168268e-08, "loss": 3.8173, "step": 1558500 }, { "epoch": 17.31, "learning_rate": 8.171656708793283e-08, "loss": 3.809, "step": 1559000 }, { "epoch": 17.32, "learning_rate": 8.170268529418297e-08, "loss": 3.8032, "step": 1559500 }, { "epoch": 17.32, "learning_rate": 8.168880350043311e-08, "loss": 3.8012, "step": 1560000 }, { "epoch": 17.33, "learning_rate": 8.167492170668324e-08, "loss": 3.8079, "step": 1560500 }, { "epoch": 17.34, "learning_rate": 8.166103991293338e-08, "loss": 3.8075, "step": 1561000 }, { "epoch": 17.34, "learning_rate": 8.164715811918353e-08, "loss": 3.7996, "step": 1561500 }, { "epoch": 17.35, "learning_rate": 8.163327632543367e-08, "loss": 3.825, "step": 1562000 }, { "epoch": 17.35, "learning_rate": 8.161939453168381e-08, "loss": 3.8108, "step": 1562500 }, { "epoch": 17.36, "learning_rate": 8.160551273793395e-08, "loss": 3.8177, "step": 1563000 }, { "epoch": 17.36, "learning_rate": 8.159163094418408e-08, "loss": 3.8176, "step": 1563500 }, { "epoch": 17.37, "learning_rate": 8.157774915043421e-08, "loss": 3.8287, "step": 1564000 }, { "epoch": 17.37, "learning_rate": 8.156386735668435e-08, "loss": 3.8034, "step": 1564500 }, { "epoch": 17.38, "learning_rate": 8.15499855629345e-08, "loss": 3.8146, "step": 1565000 }, { "epoch": 17.39, "learning_rate": 8.153610376918464e-08, "loss": 3.809, "step": 1565500 }, { "epoch": 17.39, "learning_rate": 8.152222197543478e-08, "loss": 3.8231, "step": 1566000 }, { "epoch": 17.4, "learning_rate": 8.150834018168491e-08, "loss": 3.8085, "step": 1566500 }, { "epoch": 17.4, "learning_rate": 8.149445838793505e-08, "loss": 3.8029, "step": 1567000 }, { "epoch": 17.41, "learning_rate": 8.148057659418519e-08, "loss": 3.821, "step": 1567500 }, { "epoch": 17.41, "learning_rate": 8.146669480043533e-08, "loss": 3.8029, "step": 1568000 }, { "epoch": 17.42, "learning_rate": 8.145281300668548e-08, "loss": 3.807, "step": 1568500 }, { "epoch": 17.42, "learning_rate": 8.14389312129356e-08, "loss": 3.7985, "step": 1569000 }, { "epoch": 17.43, "learning_rate": 8.142504941918575e-08, "loss": 3.8087, "step": 1569500 }, { "epoch": 17.44, "learning_rate": 8.141116762543588e-08, "loss": 3.8044, "step": 1570000 }, { "epoch": 17.44, "learning_rate": 8.139728583168602e-08, "loss": 3.8027, "step": 1570500 }, { "epoch": 17.45, "learning_rate": 8.138340403793616e-08, "loss": 3.7967, "step": 1571000 }, { "epoch": 17.45, "learning_rate": 8.13695222441863e-08, "loss": 3.8133, "step": 1571500 }, { "epoch": 17.46, "learning_rate": 8.135564045043643e-08, "loss": 3.8169, "step": 1572000 }, { "epoch": 17.46, "learning_rate": 8.134175865668658e-08, "loss": 3.8119, "step": 1572500 }, { "epoch": 17.47, "learning_rate": 8.132787686293672e-08, "loss": 3.8137, "step": 1573000 }, { "epoch": 17.47, "learning_rate": 8.131399506918686e-08, "loss": 3.8217, "step": 1573500 }, { "epoch": 17.48, "learning_rate": 8.1300113275437e-08, "loss": 3.8088, "step": 1574000 }, { "epoch": 17.49, "learning_rate": 8.128623148168713e-08, "loss": 3.8072, "step": 1574500 }, { "epoch": 17.49, "learning_rate": 8.127234968793727e-08, "loss": 3.8164, "step": 1575000 }, { "epoch": 17.5, "learning_rate": 8.125846789418742e-08, "loss": 3.8114, "step": 1575500 }, { "epoch": 17.5, "learning_rate": 8.124458610043754e-08, "loss": 3.8284, "step": 1576000 }, { "epoch": 17.51, "learning_rate": 8.123070430668769e-08, "loss": 3.8031, "step": 1576500 }, { "epoch": 17.51, "learning_rate": 8.121682251293783e-08, "loss": 3.8156, "step": 1577000 }, { "epoch": 17.52, "learning_rate": 8.120294071918796e-08, "loss": 3.8122, "step": 1577500 }, { "epoch": 17.52, "learning_rate": 8.11890589254381e-08, "loss": 3.806, "step": 1578000 }, { "epoch": 17.53, "learning_rate": 8.117517713168824e-08, "loss": 3.8084, "step": 1578500 }, { "epoch": 17.54, "learning_rate": 8.116129533793839e-08, "loss": 3.8027, "step": 1579000 }, { "epoch": 17.54, "learning_rate": 8.114741354418853e-08, "loss": 3.826, "step": 1579500 }, { "epoch": 17.55, "learning_rate": 8.113353175043867e-08, "loss": 3.8197, "step": 1580000 }, { "epoch": 17.55, "learning_rate": 8.11196499566888e-08, "loss": 3.7976, "step": 1580500 }, { "epoch": 17.56, "learning_rate": 8.110576816293894e-08, "loss": 3.8124, "step": 1581000 }, { "epoch": 17.56, "learning_rate": 8.109188636918908e-08, "loss": 3.7982, "step": 1581500 }, { "epoch": 17.57, "learning_rate": 8.107800457543921e-08, "loss": 3.8033, "step": 1582000 }, { "epoch": 17.57, "learning_rate": 8.106412278168935e-08, "loss": 3.8034, "step": 1582500 }, { "epoch": 17.58, "learning_rate": 8.105024098793948e-08, "loss": 3.8005, "step": 1583000 }, { "epoch": 17.59, "learning_rate": 8.103635919418963e-08, "loss": 3.8113, "step": 1583500 }, { "epoch": 17.59, "learning_rate": 8.102247740043977e-08, "loss": 3.8051, "step": 1584000 }, { "epoch": 17.6, "learning_rate": 8.100859560668991e-08, "loss": 3.8114, "step": 1584500 }, { "epoch": 17.6, "learning_rate": 8.099471381294005e-08, "loss": 3.7973, "step": 1585000 }, { "epoch": 17.61, "learning_rate": 8.09808320191902e-08, "loss": 3.8058, "step": 1585500 }, { "epoch": 17.61, "learning_rate": 8.096695022544032e-08, "loss": 3.8052, "step": 1586000 }, { "epoch": 17.62, "learning_rate": 8.095306843169047e-08, "loss": 3.8029, "step": 1586500 }, { "epoch": 17.62, "learning_rate": 8.093918663794061e-08, "loss": 3.8095, "step": 1587000 }, { "epoch": 17.63, "learning_rate": 8.092530484419075e-08, "loss": 3.7888, "step": 1587500 }, { "epoch": 17.64, "learning_rate": 8.091142305044088e-08, "loss": 3.8118, "step": 1588000 }, { "epoch": 17.64, "learning_rate": 8.089754125669102e-08, "loss": 3.8101, "step": 1588500 }, { "epoch": 17.65, "learning_rate": 8.088365946294115e-08, "loss": 3.8108, "step": 1589000 }, { "epoch": 17.65, "learning_rate": 8.086977766919129e-08, "loss": 3.8253, "step": 1589500 }, { "epoch": 17.66, "learning_rate": 8.085589587544144e-08, "loss": 3.7959, "step": 1590000 }, { "epoch": 17.66, "learning_rate": 8.084201408169158e-08, "loss": 3.8004, "step": 1590500 }, { "epoch": 17.67, "learning_rate": 8.082813228794172e-08, "loss": 3.8229, "step": 1591000 }, { "epoch": 17.67, "learning_rate": 8.081425049419185e-08, "loss": 3.7977, "step": 1591500 }, { "epoch": 17.68, "learning_rate": 8.080036870044199e-08, "loss": 3.8166, "step": 1592000 }, { "epoch": 17.69, "learning_rate": 8.078648690669213e-08, "loss": 3.8166, "step": 1592500 }, { "epoch": 17.69, "learning_rate": 8.077260511294228e-08, "loss": 3.8313, "step": 1593000 }, { "epoch": 17.7, "learning_rate": 8.075872331919242e-08, "loss": 3.8007, "step": 1593500 }, { "epoch": 17.7, "learning_rate": 8.074484152544255e-08, "loss": 3.8039, "step": 1594000 }, { "epoch": 17.71, "learning_rate": 8.073095973169268e-08, "loss": 3.8128, "step": 1594500 }, { "epoch": 17.71, "learning_rate": 8.071707793794282e-08, "loss": 3.7996, "step": 1595000 }, { "epoch": 17.72, "learning_rate": 8.070319614419296e-08, "loss": 3.8031, "step": 1595500 }, { "epoch": 17.72, "learning_rate": 8.06893143504431e-08, "loss": 3.8277, "step": 1596000 }, { "epoch": 17.73, "learning_rate": 8.067543255669325e-08, "loss": 3.8008, "step": 1596500 }, { "epoch": 17.74, "learning_rate": 8.066155076294337e-08, "loss": 3.8132, "step": 1597000 }, { "epoch": 17.74, "learning_rate": 8.064766896919352e-08, "loss": 3.8232, "step": 1597500 }, { "epoch": 17.75, "learning_rate": 8.063378717544366e-08, "loss": 3.8208, "step": 1598000 }, { "epoch": 17.75, "learning_rate": 8.06199053816938e-08, "loss": 3.8144, "step": 1598500 }, { "epoch": 17.76, "learning_rate": 8.060602358794394e-08, "loss": 3.825, "step": 1599000 }, { "epoch": 17.76, "learning_rate": 8.059214179419409e-08, "loss": 3.8091, "step": 1599500 }, { "epoch": 17.77, "learning_rate": 8.057826000044421e-08, "loss": 3.8149, "step": 1600000 }, { "epoch": 17.77, "learning_rate": 8.056437820669434e-08, "loss": 3.8123, "step": 1600500 }, { "epoch": 17.78, "learning_rate": 8.055049641294449e-08, "loss": 3.8083, "step": 1601000 }, { "epoch": 17.79, "learning_rate": 8.053661461919463e-08, "loss": 3.8047, "step": 1601500 }, { "epoch": 17.79, "learning_rate": 8.052273282544477e-08, "loss": 3.8127, "step": 1602000 }, { "epoch": 17.8, "learning_rate": 8.050885103169491e-08, "loss": 3.7996, "step": 1602500 }, { "epoch": 17.8, "learning_rate": 8.049496923794504e-08, "loss": 3.8156, "step": 1603000 }, { "epoch": 17.81, "learning_rate": 8.048108744419518e-08, "loss": 3.8025, "step": 1603500 }, { "epoch": 17.81, "learning_rate": 8.046720565044533e-08, "loss": 3.8005, "step": 1604000 }, { "epoch": 17.82, "learning_rate": 8.045332385669547e-08, "loss": 3.8118, "step": 1604500 }, { "epoch": 17.82, "learning_rate": 8.043944206294561e-08, "loss": 3.8133, "step": 1605000 }, { "epoch": 17.83, "learning_rate": 8.042556026919574e-08, "loss": 3.8172, "step": 1605500 }, { "epoch": 17.84, "learning_rate": 8.041167847544588e-08, "loss": 3.7863, "step": 1606000 }, { "epoch": 17.84, "learning_rate": 8.039779668169601e-08, "loss": 3.7791, "step": 1606500 }, { "epoch": 17.85, "learning_rate": 8.038391488794615e-08, "loss": 3.7922, "step": 1607000 }, { "epoch": 17.85, "learning_rate": 8.03700330941963e-08, "loss": 3.7973, "step": 1607500 }, { "epoch": 17.86, "learning_rate": 8.035615130044644e-08, "loss": 3.8094, "step": 1608000 }, { "epoch": 17.86, "learning_rate": 8.034226950669657e-08, "loss": 3.7928, "step": 1608500 }, { "epoch": 17.87, "learning_rate": 8.032838771294671e-08, "loss": 3.8039, "step": 1609000 }, { "epoch": 17.87, "learning_rate": 8.031450591919685e-08, "loss": 3.7949, "step": 1609500 }, { "epoch": 17.88, "learning_rate": 8.0300624125447e-08, "loss": 3.7983, "step": 1610000 }, { "epoch": 17.89, "learning_rate": 8.028674233169714e-08, "loss": 3.8129, "step": 1610500 }, { "epoch": 17.89, "learning_rate": 8.027286053794727e-08, "loss": 3.8007, "step": 1611000 }, { "epoch": 17.9, "learning_rate": 8.025897874419741e-08, "loss": 3.8029, "step": 1611500 }, { "epoch": 17.9, "learning_rate": 8.024509695044755e-08, "loss": 3.8091, "step": 1612000 }, { "epoch": 17.91, "learning_rate": 8.023121515669768e-08, "loss": 3.8038, "step": 1612500 }, { "epoch": 17.91, "learning_rate": 8.021733336294782e-08, "loss": 3.806, "step": 1613000 }, { "epoch": 17.92, "learning_rate": 8.020345156919796e-08, "loss": 3.8024, "step": 1613500 }, { "epoch": 17.92, "learning_rate": 8.018956977544809e-08, "loss": 3.801, "step": 1614000 }, { "epoch": 17.93, "learning_rate": 8.017568798169823e-08, "loss": 3.8097, "step": 1614500 }, { "epoch": 17.94, "learning_rate": 8.016180618794838e-08, "loss": 3.7985, "step": 1615000 }, { "epoch": 17.94, "learning_rate": 8.014792439419852e-08, "loss": 3.8051, "step": 1615500 }, { "epoch": 17.95, "learning_rate": 8.013404260044866e-08, "loss": 3.8034, "step": 1616000 }, { "epoch": 17.95, "learning_rate": 8.01201608066988e-08, "loss": 3.8052, "step": 1616500 }, { "epoch": 17.96, "learning_rate": 8.010627901294893e-08, "loss": 3.8057, "step": 1617000 }, { "epoch": 17.96, "learning_rate": 8.009239721919907e-08, "loss": 3.7846, "step": 1617500 }, { "epoch": 17.97, "learning_rate": 8.007851542544922e-08, "loss": 3.7851, "step": 1618000 }, { "epoch": 17.97, "learning_rate": 8.006463363169935e-08, "loss": 3.8106, "step": 1618500 }, { "epoch": 17.98, "learning_rate": 8.005075183794949e-08, "loss": 3.8231, "step": 1619000 }, { "epoch": 17.99, "learning_rate": 8.003687004419962e-08, "loss": 3.8266, "step": 1619500 }, { "epoch": 17.99, "learning_rate": 8.002298825044976e-08, "loss": 3.8091, "step": 1620000 }, { "epoch": 18.0, "learning_rate": 8.00091064566999e-08, "loss": 3.8298, "step": 1620500 }, { "epoch": 18.0, "eval_loss": 3.8592498302459717, "eval_runtime": 6.3075, "eval_samples_per_second": 246.375, "step": 1620828 }, { "epoch": 18.0, "learning_rate": 7.999522466295004e-08, "loss": 3.8243, "step": 1621000 }, { "epoch": 18.01, "learning_rate": 7.998134286920019e-08, "loss": 3.8152, "step": 1621500 }, { "epoch": 18.01, "learning_rate": 7.996746107545033e-08, "loss": 3.793, "step": 1622000 }, { "epoch": 18.02, "learning_rate": 7.995357928170046e-08, "loss": 3.8098, "step": 1622500 }, { "epoch": 18.02, "learning_rate": 7.99396974879506e-08, "loss": 3.8123, "step": 1623000 }, { "epoch": 18.03, "learning_rate": 7.992581569420074e-08, "loss": 3.7995, "step": 1623500 }, { "epoch": 18.04, "learning_rate": 7.991193390045088e-08, "loss": 3.8055, "step": 1624000 }, { "epoch": 18.04, "learning_rate": 7.989805210670101e-08, "loss": 3.8155, "step": 1624500 }, { "epoch": 18.05, "learning_rate": 7.988417031295116e-08, "loss": 3.7985, "step": 1625000 }, { "epoch": 18.05, "learning_rate": 7.987028851920128e-08, "loss": 3.7892, "step": 1625500 }, { "epoch": 18.06, "learning_rate": 7.985640672545143e-08, "loss": 3.8237, "step": 1626000 }, { "epoch": 18.06, "learning_rate": 7.984252493170157e-08, "loss": 3.7959, "step": 1626500 }, { "epoch": 18.07, "learning_rate": 7.982864313795171e-08, "loss": 3.7915, "step": 1627000 }, { "epoch": 18.07, "learning_rate": 7.981476134420185e-08, "loss": 3.7881, "step": 1627500 }, { "epoch": 18.08, "learning_rate": 7.980087955045198e-08, "loss": 3.8044, "step": 1628000 }, { "epoch": 18.09, "learning_rate": 7.978699775670213e-08, "loss": 3.7891, "step": 1628500 }, { "epoch": 18.09, "learning_rate": 7.977311596295227e-08, "loss": 3.7869, "step": 1629000 }, { "epoch": 18.1, "learning_rate": 7.975923416920241e-08, "loss": 3.8062, "step": 1629500 }, { "epoch": 18.1, "learning_rate": 7.974535237545255e-08, "loss": 3.8081, "step": 1630000 }, { "epoch": 18.11, "learning_rate": 7.973147058170268e-08, "loss": 3.8021, "step": 1630500 }, { "epoch": 18.11, "learning_rate": 7.971758878795281e-08, "loss": 3.7914, "step": 1631000 }, { "epoch": 18.12, "learning_rate": 7.970370699420295e-08, "loss": 3.7964, "step": 1631500 }, { "epoch": 18.12, "learning_rate": 7.96898252004531e-08, "loss": 3.7964, "step": 1632000 }, { "epoch": 18.13, "learning_rate": 7.967594340670324e-08, "loss": 3.814, "step": 1632500 }, { "epoch": 18.14, "learning_rate": 7.966206161295338e-08, "loss": 3.8009, "step": 1633000 }, { "epoch": 18.14, "learning_rate": 7.964817981920351e-08, "loss": 3.8109, "step": 1633500 }, { "epoch": 18.15, "learning_rate": 7.963429802545365e-08, "loss": 3.8077, "step": 1634000 }, { "epoch": 18.15, "learning_rate": 7.962041623170379e-08, "loss": 3.8147, "step": 1634500 }, { "epoch": 18.16, "learning_rate": 7.960653443795393e-08, "loss": 3.8093, "step": 1635000 }, { "epoch": 18.16, "learning_rate": 7.959265264420408e-08, "loss": 3.8039, "step": 1635500 }, { "epoch": 18.17, "learning_rate": 7.957877085045422e-08, "loss": 3.8128, "step": 1636000 }, { "epoch": 18.17, "learning_rate": 7.956488905670435e-08, "loss": 3.7914, "step": 1636500 }, { "epoch": 18.18, "learning_rate": 7.955100726295448e-08, "loss": 3.798, "step": 1637000 }, { "epoch": 18.19, "learning_rate": 7.953712546920462e-08, "loss": 3.8156, "step": 1637500 }, { "epoch": 18.19, "learning_rate": 7.952324367545476e-08, "loss": 3.8045, "step": 1638000 }, { "epoch": 18.2, "learning_rate": 7.95093618817049e-08, "loss": 3.798, "step": 1638500 }, { "epoch": 18.2, "learning_rate": 7.949548008795505e-08, "loss": 3.7958, "step": 1639000 }, { "epoch": 18.21, "learning_rate": 7.948159829420518e-08, "loss": 3.7975, "step": 1639500 }, { "epoch": 18.21, "learning_rate": 7.946771650045532e-08, "loss": 3.8075, "step": 1640000 }, { "epoch": 18.22, "learning_rate": 7.945383470670546e-08, "loss": 3.7894, "step": 1640500 }, { "epoch": 18.22, "learning_rate": 7.94399529129556e-08, "loss": 3.8022, "step": 1641000 }, { "epoch": 18.23, "learning_rate": 7.942607111920574e-08, "loss": 3.8065, "step": 1641500 }, { "epoch": 18.24, "learning_rate": 7.941218932545587e-08, "loss": 3.8134, "step": 1642000 }, { "epoch": 18.24, "learning_rate": 7.939830753170602e-08, "loss": 3.7995, "step": 1642500 }, { "epoch": 18.25, "learning_rate": 7.938442573795614e-08, "loss": 3.8211, "step": 1643000 }, { "epoch": 18.25, "learning_rate": 7.937054394420629e-08, "loss": 3.8057, "step": 1643500 }, { "epoch": 18.26, "learning_rate": 7.935666215045643e-08, "loss": 3.8048, "step": 1644000 }, { "epoch": 18.26, "learning_rate": 7.934278035670657e-08, "loss": 3.8267, "step": 1644500 }, { "epoch": 18.27, "learning_rate": 7.93288985629567e-08, "loss": 3.8134, "step": 1645000 }, { "epoch": 18.27, "learning_rate": 7.931501676920684e-08, "loss": 3.7961, "step": 1645500 }, { "epoch": 18.28, "learning_rate": 7.930113497545699e-08, "loss": 3.8012, "step": 1646000 }, { "epoch": 18.29, "learning_rate": 7.928725318170713e-08, "loss": 3.8117, "step": 1646500 }, { "epoch": 18.29, "learning_rate": 7.927337138795727e-08, "loss": 3.8043, "step": 1647000 }, { "epoch": 18.3, "learning_rate": 7.92594895942074e-08, "loss": 3.8062, "step": 1647500 }, { "epoch": 18.3, "learning_rate": 7.924560780045754e-08, "loss": 3.7995, "step": 1648000 }, { "epoch": 18.31, "learning_rate": 7.923172600670768e-08, "loss": 3.7896, "step": 1648500 }, { "epoch": 18.31, "learning_rate": 7.921784421295781e-08, "loss": 3.7983, "step": 1649000 }, { "epoch": 18.32, "learning_rate": 7.920396241920795e-08, "loss": 3.8059, "step": 1649500 }, { "epoch": 18.32, "learning_rate": 7.91900806254581e-08, "loss": 3.7998, "step": 1650000 }, { "epoch": 18.33, "learning_rate": 7.917619883170823e-08, "loss": 3.8103, "step": 1650500 }, { "epoch": 18.34, "learning_rate": 7.916231703795837e-08, "loss": 3.8165, "step": 1651000 }, { "epoch": 18.34, "learning_rate": 7.914843524420851e-08, "loss": 3.8103, "step": 1651500 }, { "epoch": 18.35, "learning_rate": 7.913455345045865e-08, "loss": 3.7898, "step": 1652000 }, { "epoch": 18.35, "learning_rate": 7.91206716567088e-08, "loss": 3.8264, "step": 1652500 }, { "epoch": 18.36, "learning_rate": 7.910678986295894e-08, "loss": 3.801, "step": 1653000 }, { "epoch": 18.36, "learning_rate": 7.909290806920907e-08, "loss": 3.8075, "step": 1653500 }, { "epoch": 18.37, "learning_rate": 7.907902627545921e-08, "loss": 3.8058, "step": 1654000 }, { "epoch": 18.37, "learning_rate": 7.906514448170935e-08, "loss": 3.7893, "step": 1654500 }, { "epoch": 18.38, "learning_rate": 7.905126268795948e-08, "loss": 3.7998, "step": 1655000 }, { "epoch": 18.39, "learning_rate": 7.903738089420962e-08, "loss": 3.8315, "step": 1655500 }, { "epoch": 18.39, "learning_rate": 7.902349910045975e-08, "loss": 3.7957, "step": 1656000 }, { "epoch": 18.4, "learning_rate": 7.90096173067099e-08, "loss": 3.816, "step": 1656500 }, { "epoch": 18.4, "learning_rate": 7.899573551296004e-08, "loss": 3.7955, "step": 1657000 }, { "epoch": 18.41, "learning_rate": 7.898185371921018e-08, "loss": 3.8179, "step": 1657500 }, { "epoch": 18.41, "learning_rate": 7.896797192546032e-08, "loss": 3.7936, "step": 1658000 }, { "epoch": 18.42, "learning_rate": 7.895409013171046e-08, "loss": 3.8086, "step": 1658500 }, { "epoch": 18.42, "learning_rate": 7.894020833796059e-08, "loss": 3.8047, "step": 1659000 }, { "epoch": 18.43, "learning_rate": 7.892632654421073e-08, "loss": 3.7983, "step": 1659500 }, { "epoch": 18.44, "learning_rate": 7.891244475046088e-08, "loss": 3.8145, "step": 1660000 }, { "epoch": 18.44, "learning_rate": 7.889856295671102e-08, "loss": 3.7944, "step": 1660500 }, { "epoch": 18.45, "learning_rate": 7.888468116296115e-08, "loss": 3.8025, "step": 1661000 }, { "epoch": 18.45, "learning_rate": 7.887079936921129e-08, "loss": 3.79, "step": 1661500 }, { "epoch": 18.46, "learning_rate": 7.885691757546142e-08, "loss": 3.8295, "step": 1662000 }, { "epoch": 18.46, "learning_rate": 7.884303578171156e-08, "loss": 3.8068, "step": 1662500 }, { "epoch": 18.47, "learning_rate": 7.88291539879617e-08, "loss": 3.811, "step": 1663000 }, { "epoch": 18.47, "learning_rate": 7.881527219421185e-08, "loss": 3.7964, "step": 1663500 }, { "epoch": 18.48, "learning_rate": 7.880139040046199e-08, "loss": 3.8171, "step": 1664000 }, { "epoch": 18.48, "learning_rate": 7.878750860671212e-08, "loss": 3.8098, "step": 1664500 }, { "epoch": 18.49, "learning_rate": 7.877362681296226e-08, "loss": 3.7987, "step": 1665000 }, { "epoch": 18.5, "learning_rate": 7.87597450192124e-08, "loss": 3.8104, "step": 1665500 }, { "epoch": 18.5, "learning_rate": 7.874586322546254e-08, "loss": 3.8168, "step": 1666000 }, { "epoch": 18.51, "learning_rate": 7.873198143171269e-08, "loss": 3.8133, "step": 1666500 }, { "epoch": 18.51, "learning_rate": 7.871809963796281e-08, "loss": 3.799, "step": 1667000 }, { "epoch": 18.52, "learning_rate": 7.870421784421294e-08, "loss": 3.8096, "step": 1667500 }, { "epoch": 18.52, "learning_rate": 7.869033605046309e-08, "loss": 3.8316, "step": 1668000 }, { "epoch": 18.53, "learning_rate": 7.867645425671323e-08, "loss": 3.7886, "step": 1668500 }, { "epoch": 18.53, "learning_rate": 7.866257246296337e-08, "loss": 3.8021, "step": 1669000 }, { "epoch": 18.54, "learning_rate": 7.864869066921351e-08, "loss": 3.7834, "step": 1669500 }, { "epoch": 18.55, "learning_rate": 7.863480887546364e-08, "loss": 3.8008, "step": 1670000 }, { "epoch": 18.55, "learning_rate": 7.862092708171378e-08, "loss": 3.8158, "step": 1670500 }, { "epoch": 18.56, "learning_rate": 7.860704528796393e-08, "loss": 3.7952, "step": 1671000 }, { "epoch": 18.56, "learning_rate": 7.859316349421407e-08, "loss": 3.8093, "step": 1671500 }, { "epoch": 18.57, "learning_rate": 7.857928170046421e-08, "loss": 3.8114, "step": 1672000 }, { "epoch": 18.57, "learning_rate": 7.856539990671435e-08, "loss": 3.7972, "step": 1672500 }, { "epoch": 18.58, "learning_rate": 7.855151811296448e-08, "loss": 3.7824, "step": 1673000 }, { "epoch": 18.58, "learning_rate": 7.853763631921461e-08, "loss": 3.7884, "step": 1673500 }, { "epoch": 18.59, "learning_rate": 7.852375452546475e-08, "loss": 3.8032, "step": 1674000 }, { "epoch": 18.6, "learning_rate": 7.85098727317149e-08, "loss": 3.8057, "step": 1674500 }, { "epoch": 18.6, "learning_rate": 7.849599093796504e-08, "loss": 3.8021, "step": 1675000 }, { "epoch": 18.61, "learning_rate": 7.848210914421518e-08, "loss": 3.8104, "step": 1675500 }, { "epoch": 18.61, "learning_rate": 7.846822735046531e-08, "loss": 3.8176, "step": 1676000 }, { "epoch": 18.62, "learning_rate": 7.845434555671545e-08, "loss": 3.7965, "step": 1676500 }, { "epoch": 18.62, "learning_rate": 7.84404637629656e-08, "loss": 3.7893, "step": 1677000 }, { "epoch": 18.63, "learning_rate": 7.842658196921574e-08, "loss": 3.8028, "step": 1677500 }, { "epoch": 18.63, "learning_rate": 7.841270017546588e-08, "loss": 3.8143, "step": 1678000 }, { "epoch": 18.64, "learning_rate": 7.839881838171601e-08, "loss": 3.7986, "step": 1678500 }, { "epoch": 18.65, "learning_rate": 7.838493658796615e-08, "loss": 3.8016, "step": 1679000 }, { "epoch": 18.65, "learning_rate": 7.837105479421628e-08, "loss": 3.7878, "step": 1679500 }, { "epoch": 18.66, "learning_rate": 7.835717300046642e-08, "loss": 3.8143, "step": 1680000 }, { "epoch": 18.66, "learning_rate": 7.834329120671656e-08, "loss": 3.8011, "step": 1680500 }, { "epoch": 18.67, "learning_rate": 7.83294094129667e-08, "loss": 3.8107, "step": 1681000 }, { "epoch": 18.67, "learning_rate": 7.831552761921683e-08, "loss": 3.8064, "step": 1681500 }, { "epoch": 18.68, "learning_rate": 7.830164582546698e-08, "loss": 3.8115, "step": 1682000 }, { "epoch": 18.68, "learning_rate": 7.828776403171712e-08, "loss": 3.7995, "step": 1682500 }, { "epoch": 18.69, "learning_rate": 7.827388223796726e-08, "loss": 3.7899, "step": 1683000 }, { "epoch": 18.7, "learning_rate": 7.82600004442174e-08, "loss": 3.8011, "step": 1683500 }, { "epoch": 18.7, "learning_rate": 7.824611865046755e-08, "loss": 3.8036, "step": 1684000 }, { "epoch": 18.71, "learning_rate": 7.823223685671767e-08, "loss": 3.7953, "step": 1684500 }, { "epoch": 18.71, "learning_rate": 7.821835506296782e-08, "loss": 3.8062, "step": 1685000 }, { "epoch": 18.72, "learning_rate": 7.820447326921795e-08, "loss": 3.8018, "step": 1685500 }, { "epoch": 18.72, "learning_rate": 7.819059147546809e-08, "loss": 3.8173, "step": 1686000 }, { "epoch": 18.73, "learning_rate": 7.817670968171823e-08, "loss": 3.7973, "step": 1686500 }, { "epoch": 18.73, "learning_rate": 7.816282788796836e-08, "loss": 3.7905, "step": 1687000 }, { "epoch": 18.74, "learning_rate": 7.81489460942185e-08, "loss": 3.7987, "step": 1687500 }, { "epoch": 18.75, "learning_rate": 7.813506430046864e-08, "loss": 3.7968, "step": 1688000 }, { "epoch": 18.75, "learning_rate": 7.812118250671879e-08, "loss": 3.8173, "step": 1688500 }, { "epoch": 18.76, "learning_rate": 7.810730071296893e-08, "loss": 3.8063, "step": 1689000 }, { "epoch": 18.76, "learning_rate": 7.809341891921907e-08, "loss": 3.7957, "step": 1689500 }, { "epoch": 18.77, "learning_rate": 7.80795371254692e-08, "loss": 3.8068, "step": 1690000 }, { "epoch": 18.77, "learning_rate": 7.806565533171934e-08, "loss": 3.78, "step": 1690500 }, { "epoch": 18.78, "learning_rate": 7.805177353796948e-08, "loss": 3.8095, "step": 1691000 }, { "epoch": 18.78, "learning_rate": 7.803789174421961e-08, "loss": 3.8058, "step": 1691500 }, { "epoch": 18.79, "learning_rate": 7.802400995046976e-08, "loss": 3.7942, "step": 1692000 }, { "epoch": 18.8, "learning_rate": 7.801012815671988e-08, "loss": 3.8032, "step": 1692500 }, { "epoch": 18.8, "learning_rate": 7.799624636297003e-08, "loss": 3.7896, "step": 1693000 }, { "epoch": 18.81, "learning_rate": 7.798236456922017e-08, "loss": 3.7838, "step": 1693500 }, { "epoch": 18.81, "learning_rate": 7.796848277547031e-08, "loss": 3.7953, "step": 1694000 }, { "epoch": 18.82, "learning_rate": 7.795460098172045e-08, "loss": 3.7927, "step": 1694500 }, { "epoch": 18.82, "learning_rate": 7.79407191879706e-08, "loss": 3.7847, "step": 1695000 }, { "epoch": 18.83, "learning_rate": 7.792683739422073e-08, "loss": 3.8211, "step": 1695500 }, { "epoch": 18.83, "learning_rate": 7.791295560047087e-08, "loss": 3.8017, "step": 1696000 }, { "epoch": 18.84, "learning_rate": 7.789907380672101e-08, "loss": 3.7961, "step": 1696500 }, { "epoch": 18.85, "learning_rate": 7.788519201297115e-08, "loss": 3.8013, "step": 1697000 }, { "epoch": 18.85, "learning_rate": 7.787131021922128e-08, "loss": 3.8001, "step": 1697500 }, { "epoch": 18.86, "learning_rate": 7.785742842547142e-08, "loss": 3.814, "step": 1698000 }, { "epoch": 18.86, "learning_rate": 7.784354663172155e-08, "loss": 3.7919, "step": 1698500 }, { "epoch": 18.87, "learning_rate": 7.78296648379717e-08, "loss": 3.794, "step": 1699000 }, { "epoch": 18.87, "learning_rate": 7.781578304422184e-08, "loss": 3.7969, "step": 1699500 }, { "epoch": 18.88, "learning_rate": 7.780190125047198e-08, "loss": 3.7963, "step": 1700000 }, { "epoch": 18.88, "learning_rate": 7.778801945672212e-08, "loss": 3.7987, "step": 1700500 }, { "epoch": 18.89, "learning_rate": 7.777413766297225e-08, "loss": 3.7847, "step": 1701000 }, { "epoch": 18.9, "learning_rate": 7.776025586922239e-08, "loss": 3.8126, "step": 1701500 }, { "epoch": 18.9, "learning_rate": 7.774637407547254e-08, "loss": 3.8, "step": 1702000 }, { "epoch": 18.91, "learning_rate": 7.773249228172268e-08, "loss": 3.8206, "step": 1702500 }, { "epoch": 18.91, "learning_rate": 7.771861048797282e-08, "loss": 3.7828, "step": 1703000 }, { "epoch": 18.92, "learning_rate": 7.770472869422295e-08, "loss": 3.8022, "step": 1703500 }, { "epoch": 18.92, "learning_rate": 7.769084690047308e-08, "loss": 3.7976, "step": 1704000 }, { "epoch": 18.93, "learning_rate": 7.767696510672322e-08, "loss": 3.7949, "step": 1704500 }, { "epoch": 18.93, "learning_rate": 7.766308331297336e-08, "loss": 3.7969, "step": 1705000 }, { "epoch": 18.94, "learning_rate": 7.76492015192235e-08, "loss": 3.8113, "step": 1705500 }, { "epoch": 18.95, "learning_rate": 7.763531972547365e-08, "loss": 3.791, "step": 1706000 }, { "epoch": 18.95, "learning_rate": 7.762143793172378e-08, "loss": 3.7971, "step": 1706500 }, { "epoch": 18.96, "learning_rate": 7.760755613797392e-08, "loss": 3.7941, "step": 1707000 }, { "epoch": 18.96, "learning_rate": 7.759367434422406e-08, "loss": 3.8058, "step": 1707500 }, { "epoch": 18.97, "learning_rate": 7.75797925504742e-08, "loss": 3.8023, "step": 1708000 }, { "epoch": 18.97, "learning_rate": 7.756591075672434e-08, "loss": 3.8226, "step": 1708500 }, { "epoch": 18.98, "learning_rate": 7.755202896297449e-08, "loss": 3.7875, "step": 1709000 }, { "epoch": 18.98, "learning_rate": 7.753814716922462e-08, "loss": 3.8119, "step": 1709500 }, { "epoch": 18.99, "learning_rate": 7.752426537547475e-08, "loss": 3.8151, "step": 1710000 }, { "epoch": 19.0, "learning_rate": 7.751038358172489e-08, "loss": 3.8161, "step": 1710500 }, { "epoch": 19.0, "eval_loss": 3.8565142154693604, "eval_runtime": 6.3064, "eval_samples_per_second": 246.416, "step": 1710874 }, { "epoch": 19.0, "learning_rate": 7.749650178797503e-08, "loss": 3.8229, "step": 1711000 }, { "epoch": 19.01, "learning_rate": 7.748261999422517e-08, "loss": 3.794, "step": 1711500 }, { "epoch": 19.01, "learning_rate": 7.746873820047531e-08, "loss": 3.7888, "step": 1712000 }, { "epoch": 19.02, "learning_rate": 7.745485640672544e-08, "loss": 3.7998, "step": 1712500 }, { "epoch": 19.02, "learning_rate": 7.744097461297559e-08, "loss": 3.7976, "step": 1713000 }, { "epoch": 19.03, "learning_rate": 7.742709281922573e-08, "loss": 3.7674, "step": 1713500 }, { "epoch": 19.03, "learning_rate": 7.741321102547587e-08, "loss": 3.7792, "step": 1714000 }, { "epoch": 19.04, "learning_rate": 7.739932923172601e-08, "loss": 3.7915, "step": 1714500 }, { "epoch": 19.05, "learning_rate": 7.738544743797614e-08, "loss": 3.8065, "step": 1715000 }, { "epoch": 19.05, "learning_rate": 7.737156564422628e-08, "loss": 3.8095, "step": 1715500 }, { "epoch": 19.06, "learning_rate": 7.735768385047641e-08, "loss": 3.816, "step": 1716000 }, { "epoch": 19.06, "learning_rate": 7.734380205672655e-08, "loss": 3.8047, "step": 1716500 }, { "epoch": 19.07, "learning_rate": 7.73299202629767e-08, "loss": 3.7947, "step": 1717000 }, { "epoch": 19.07, "learning_rate": 7.731603846922684e-08, "loss": 3.8103, "step": 1717500 }, { "epoch": 19.08, "learning_rate": 7.730215667547697e-08, "loss": 3.7911, "step": 1718000 }, { "epoch": 19.08, "learning_rate": 7.728827488172711e-08, "loss": 3.8011, "step": 1718500 }, { "epoch": 19.09, "learning_rate": 7.727439308797725e-08, "loss": 3.8143, "step": 1719000 }, { "epoch": 19.1, "learning_rate": 7.72605112942274e-08, "loss": 3.8125, "step": 1719500 }, { "epoch": 19.1, "learning_rate": 7.724662950047754e-08, "loss": 3.8094, "step": 1720000 }, { "epoch": 19.11, "learning_rate": 7.723274770672768e-08, "loss": 3.8021, "step": 1720500 }, { "epoch": 19.11, "learning_rate": 7.721886591297781e-08, "loss": 3.8011, "step": 1721000 }, { "epoch": 19.12, "learning_rate": 7.720498411922795e-08, "loss": 3.7851, "step": 1721500 }, { "epoch": 19.12, "learning_rate": 7.719110232547808e-08, "loss": 3.8019, "step": 1722000 }, { "epoch": 19.13, "learning_rate": 7.717722053172822e-08, "loss": 3.8182, "step": 1722500 }, { "epoch": 19.13, "learning_rate": 7.716333873797836e-08, "loss": 3.8107, "step": 1723000 }, { "epoch": 19.14, "learning_rate": 7.71494569442285e-08, "loss": 3.7883, "step": 1723500 }, { "epoch": 19.15, "learning_rate": 7.713557515047864e-08, "loss": 3.7994, "step": 1724000 }, { "epoch": 19.15, "learning_rate": 7.712169335672878e-08, "loss": 3.7832, "step": 1724500 }, { "epoch": 19.16, "learning_rate": 7.710781156297892e-08, "loss": 3.7814, "step": 1725000 }, { "epoch": 19.16, "learning_rate": 7.709392976922906e-08, "loss": 3.8007, "step": 1725500 }, { "epoch": 19.17, "learning_rate": 7.70800479754792e-08, "loss": 3.7963, "step": 1726000 }, { "epoch": 19.17, "learning_rate": 7.706616618172933e-08, "loss": 3.8004, "step": 1726500 }, { "epoch": 19.18, "learning_rate": 7.705228438797948e-08, "loss": 3.8011, "step": 1727000 }, { "epoch": 19.18, "learning_rate": 7.703840259422962e-08, "loss": 3.794, "step": 1727500 }, { "epoch": 19.19, "learning_rate": 7.702452080047975e-08, "loss": 3.7903, "step": 1728000 }, { "epoch": 19.2, "learning_rate": 7.701063900672989e-08, "loss": 3.8178, "step": 1728500 }, { "epoch": 19.2, "learning_rate": 7.699675721298002e-08, "loss": 3.7936, "step": 1729000 }, { "epoch": 19.21, "learning_rate": 7.698287541923016e-08, "loss": 3.7773, "step": 1729500 }, { "epoch": 19.21, "learning_rate": 7.69689936254803e-08, "loss": 3.8125, "step": 1730000 }, { "epoch": 19.22, "learning_rate": 7.695511183173045e-08, "loss": 3.8203, "step": 1730500 }, { "epoch": 19.22, "learning_rate": 7.694123003798059e-08, "loss": 3.7956, "step": 1731000 }, { "epoch": 19.23, "learning_rate": 7.692734824423073e-08, "loss": 3.7872, "step": 1731500 }, { "epoch": 19.23, "learning_rate": 7.691346645048086e-08, "loss": 3.8037, "step": 1732000 }, { "epoch": 19.24, "learning_rate": 7.6899584656731e-08, "loss": 3.7933, "step": 1732500 }, { "epoch": 19.25, "learning_rate": 7.688570286298114e-08, "loss": 3.8028, "step": 1733000 }, { "epoch": 19.25, "learning_rate": 7.687182106923129e-08, "loss": 3.7937, "step": 1733500 }, { "epoch": 19.26, "learning_rate": 7.685793927548141e-08, "loss": 3.7934, "step": 1734000 }, { "epoch": 19.26, "learning_rate": 7.684405748173156e-08, "loss": 3.7865, "step": 1734500 }, { "epoch": 19.27, "learning_rate": 7.683017568798169e-08, "loss": 3.7982, "step": 1735000 }, { "epoch": 19.27, "learning_rate": 7.681629389423183e-08, "loss": 3.7828, "step": 1735500 }, { "epoch": 19.28, "learning_rate": 7.680241210048197e-08, "loss": 3.7969, "step": 1736000 }, { "epoch": 19.28, "learning_rate": 7.678853030673211e-08, "loss": 3.7867, "step": 1736500 }, { "epoch": 19.29, "learning_rate": 7.677464851298226e-08, "loss": 3.7937, "step": 1737000 }, { "epoch": 19.3, "learning_rate": 7.676076671923238e-08, "loss": 3.8099, "step": 1737500 }, { "epoch": 19.3, "learning_rate": 7.674688492548253e-08, "loss": 3.8038, "step": 1738000 }, { "epoch": 19.31, "learning_rate": 7.673300313173267e-08, "loss": 3.805, "step": 1738500 }, { "epoch": 19.31, "learning_rate": 7.671912133798281e-08, "loss": 3.7951, "step": 1739000 }, { "epoch": 19.32, "learning_rate": 7.670523954423295e-08, "loss": 3.7824, "step": 1739500 }, { "epoch": 19.32, "learning_rate": 7.669135775048308e-08, "loss": 3.8023, "step": 1740000 }, { "epoch": 19.33, "learning_rate": 7.667747595673321e-08, "loss": 3.8166, "step": 1740500 }, { "epoch": 19.33, "learning_rate": 7.666359416298335e-08, "loss": 3.8183, "step": 1741000 }, { "epoch": 19.34, "learning_rate": 7.66497123692335e-08, "loss": 3.8044, "step": 1741500 }, { "epoch": 19.35, "learning_rate": 7.663583057548364e-08, "loss": 3.795, "step": 1742000 }, { "epoch": 19.35, "learning_rate": 7.662194878173378e-08, "loss": 3.8058, "step": 1742500 }, { "epoch": 19.36, "learning_rate": 7.660806698798391e-08, "loss": 3.7886, "step": 1743000 }, { "epoch": 19.36, "learning_rate": 7.659418519423405e-08, "loss": 3.7869, "step": 1743500 }, { "epoch": 19.37, "learning_rate": 7.65803034004842e-08, "loss": 3.8041, "step": 1744000 }, { "epoch": 19.37, "learning_rate": 7.656642160673434e-08, "loss": 3.7995, "step": 1744500 }, { "epoch": 19.38, "learning_rate": 7.655253981298448e-08, "loss": 3.8083, "step": 1745000 }, { "epoch": 19.38, "learning_rate": 7.653865801923462e-08, "loss": 3.7982, "step": 1745500 }, { "epoch": 19.39, "learning_rate": 7.652477622548475e-08, "loss": 3.8015, "step": 1746000 }, { "epoch": 19.4, "learning_rate": 7.651089443173488e-08, "loss": 3.7959, "step": 1746500 }, { "epoch": 19.4, "learning_rate": 7.649701263798502e-08, "loss": 3.8234, "step": 1747000 }, { "epoch": 19.41, "learning_rate": 7.648313084423516e-08, "loss": 3.824, "step": 1747500 }, { "epoch": 19.41, "learning_rate": 7.64692490504853e-08, "loss": 3.7907, "step": 1748000 }, { "epoch": 19.42, "learning_rate": 7.645536725673545e-08, "loss": 3.7896, "step": 1748500 }, { "epoch": 19.42, "learning_rate": 7.644148546298558e-08, "loss": 3.8168, "step": 1749000 }, { "epoch": 19.43, "learning_rate": 7.642760366923572e-08, "loss": 3.8147, "step": 1749500 }, { "epoch": 19.43, "learning_rate": 7.641372187548586e-08, "loss": 3.8147, "step": 1750000 }, { "epoch": 19.44, "learning_rate": 7.6399840081736e-08, "loss": 3.7868, "step": 1750500 }, { "epoch": 19.45, "learning_rate": 7.638595828798615e-08, "loss": 3.7938, "step": 1751000 }, { "epoch": 19.45, "learning_rate": 7.637207649423628e-08, "loss": 3.8054, "step": 1751500 }, { "epoch": 19.46, "learning_rate": 7.635819470048642e-08, "loss": 3.8005, "step": 1752000 }, { "epoch": 19.46, "learning_rate": 7.634431290673655e-08, "loss": 3.7959, "step": 1752500 }, { "epoch": 19.47, "learning_rate": 7.633043111298669e-08, "loss": 3.7931, "step": 1753000 }, { "epoch": 19.47, "learning_rate": 7.631654931923683e-08, "loss": 3.7825, "step": 1753500 }, { "epoch": 19.48, "learning_rate": 7.630266752548697e-08, "loss": 3.8054, "step": 1754000 }, { "epoch": 19.48, "learning_rate": 7.62887857317371e-08, "loss": 3.7934, "step": 1754500 }, { "epoch": 19.49, "learning_rate": 7.627490393798724e-08, "loss": 3.798, "step": 1755000 }, { "epoch": 19.5, "learning_rate": 7.626102214423739e-08, "loss": 3.8198, "step": 1755500 }, { "epoch": 19.5, "learning_rate": 7.624714035048753e-08, "loss": 3.7971, "step": 1756000 }, { "epoch": 19.51, "learning_rate": 7.623325855673767e-08, "loss": 3.7956, "step": 1756500 }, { "epoch": 19.51, "learning_rate": 7.621937676298781e-08, "loss": 3.7888, "step": 1757000 }, { "epoch": 19.52, "learning_rate": 7.620549496923794e-08, "loss": 3.7896, "step": 1757500 }, { "epoch": 19.52, "learning_rate": 7.619161317548808e-08, "loss": 3.799, "step": 1758000 }, { "epoch": 19.53, "learning_rate": 7.617773138173821e-08, "loss": 3.7932, "step": 1758500 }, { "epoch": 19.53, "learning_rate": 7.616384958798836e-08, "loss": 3.8061, "step": 1759000 }, { "epoch": 19.54, "learning_rate": 7.61499677942385e-08, "loss": 3.8021, "step": 1759500 }, { "epoch": 19.55, "learning_rate": 7.613608600048863e-08, "loss": 3.7909, "step": 1760000 }, { "epoch": 19.55, "learning_rate": 7.612220420673877e-08, "loss": 3.8025, "step": 1760500 }, { "epoch": 19.56, "learning_rate": 7.610832241298891e-08, "loss": 3.7997, "step": 1761000 }, { "epoch": 19.56, "learning_rate": 7.609444061923905e-08, "loss": 3.7918, "step": 1761500 }, { "epoch": 19.57, "learning_rate": 7.60805588254892e-08, "loss": 3.8004, "step": 1762000 }, { "epoch": 19.57, "learning_rate": 7.606667703173934e-08, "loss": 3.7896, "step": 1762500 }, { "epoch": 19.58, "learning_rate": 7.605279523798947e-08, "loss": 3.7981, "step": 1763000 }, { "epoch": 19.58, "learning_rate": 7.603891344423961e-08, "loss": 3.8126, "step": 1763500 }, { "epoch": 19.59, "learning_rate": 7.602503165048975e-08, "loss": 3.7757, "step": 1764000 }, { "epoch": 19.6, "learning_rate": 7.601114985673988e-08, "loss": 3.8005, "step": 1764500 }, { "epoch": 19.6, "learning_rate": 7.599726806299002e-08, "loss": 3.7864, "step": 1765000 }, { "epoch": 19.61, "learning_rate": 7.598338626924015e-08, "loss": 3.8066, "step": 1765500 }, { "epoch": 19.61, "learning_rate": 7.59695044754903e-08, "loss": 3.7919, "step": 1766000 }, { "epoch": 19.62, "learning_rate": 7.595562268174044e-08, "loss": 3.8104, "step": 1766500 }, { "epoch": 19.62, "learning_rate": 7.594174088799058e-08, "loss": 3.7901, "step": 1767000 }, { "epoch": 19.63, "learning_rate": 7.592785909424072e-08, "loss": 3.7801, "step": 1767500 }, { "epoch": 19.63, "learning_rate": 7.591397730049086e-08, "loss": 3.7929, "step": 1768000 }, { "epoch": 19.64, "learning_rate": 7.590009550674099e-08, "loss": 3.8095, "step": 1768500 }, { "epoch": 19.65, "learning_rate": 7.588621371299114e-08, "loss": 3.7961, "step": 1769000 }, { "epoch": 19.65, "learning_rate": 7.587233191924128e-08, "loss": 3.8034, "step": 1769500 }, { "epoch": 19.66, "learning_rate": 7.585845012549142e-08, "loss": 3.7942, "step": 1770000 }, { "epoch": 19.66, "learning_rate": 7.584456833174155e-08, "loss": 3.8079, "step": 1770500 }, { "epoch": 19.67, "learning_rate": 7.583068653799169e-08, "loss": 3.7797, "step": 1771000 }, { "epoch": 19.67, "learning_rate": 7.581680474424182e-08, "loss": 3.8036, "step": 1771500 }, { "epoch": 19.68, "learning_rate": 7.580292295049196e-08, "loss": 3.797, "step": 1772000 }, { "epoch": 19.68, "learning_rate": 7.57890411567421e-08, "loss": 3.8003, "step": 1772500 }, { "epoch": 19.69, "learning_rate": 7.577515936299225e-08, "loss": 3.7965, "step": 1773000 }, { "epoch": 19.7, "learning_rate": 7.576127756924239e-08, "loss": 3.7857, "step": 1773500 }, { "epoch": 19.7, "learning_rate": 7.574739577549252e-08, "loss": 3.8118, "step": 1774000 }, { "epoch": 19.71, "learning_rate": 7.573351398174266e-08, "loss": 3.805, "step": 1774500 }, { "epoch": 19.71, "learning_rate": 7.57196321879928e-08, "loss": 3.7956, "step": 1775000 }, { "epoch": 19.72, "learning_rate": 7.570575039424294e-08, "loss": 3.811, "step": 1775500 }, { "epoch": 19.72, "learning_rate": 7.569186860049309e-08, "loss": 3.8026, "step": 1776000 }, { "epoch": 19.73, "learning_rate": 7.567798680674322e-08, "loss": 3.7898, "step": 1776500 }, { "epoch": 19.73, "learning_rate": 7.566410501299335e-08, "loss": 3.7926, "step": 1777000 }, { "epoch": 19.74, "learning_rate": 7.565022321924349e-08, "loss": 3.7924, "step": 1777500 }, { "epoch": 19.75, "learning_rate": 7.563634142549363e-08, "loss": 3.7859, "step": 1778000 }, { "epoch": 19.75, "learning_rate": 7.562245963174377e-08, "loss": 3.8176, "step": 1778500 }, { "epoch": 19.76, "learning_rate": 7.560857783799391e-08, "loss": 3.7954, "step": 1779000 }, { "epoch": 19.76, "learning_rate": 7.559469604424406e-08, "loss": 3.7931, "step": 1779500 }, { "epoch": 19.77, "learning_rate": 7.558081425049419e-08, "loss": 3.8269, "step": 1780000 }, { "epoch": 19.77, "learning_rate": 7.556693245674433e-08, "loss": 3.7931, "step": 1780500 }, { "epoch": 19.78, "learning_rate": 7.555305066299447e-08, "loss": 3.8076, "step": 1781000 }, { "epoch": 19.78, "learning_rate": 7.553916886924461e-08, "loss": 3.7931, "step": 1781500 }, { "epoch": 19.79, "learning_rate": 7.552528707549474e-08, "loss": 3.7941, "step": 1782000 }, { "epoch": 19.8, "learning_rate": 7.551140528174488e-08, "loss": 3.7918, "step": 1782500 }, { "epoch": 19.8, "learning_rate": 7.549752348799501e-08, "loss": 3.7983, "step": 1783000 }, { "epoch": 19.81, "learning_rate": 7.548364169424516e-08, "loss": 3.7864, "step": 1783500 }, { "epoch": 19.81, "learning_rate": 7.54697599004953e-08, "loss": 3.7954, "step": 1784000 }, { "epoch": 19.82, "learning_rate": 7.545587810674544e-08, "loss": 3.7967, "step": 1784500 }, { "epoch": 19.82, "learning_rate": 7.544199631299558e-08, "loss": 3.787, "step": 1785000 }, { "epoch": 19.83, "learning_rate": 7.542811451924571e-08, "loss": 3.7962, "step": 1785500 }, { "epoch": 19.83, "learning_rate": 7.541423272549585e-08, "loss": 3.7912, "step": 1786000 }, { "epoch": 19.84, "learning_rate": 7.5400350931746e-08, "loss": 3.7921, "step": 1786500 }, { "epoch": 19.85, "learning_rate": 7.538646913799614e-08, "loss": 3.8113, "step": 1787000 }, { "epoch": 19.85, "learning_rate": 7.537258734424628e-08, "loss": 3.809, "step": 1787500 }, { "epoch": 19.86, "learning_rate": 7.535870555049641e-08, "loss": 3.8026, "step": 1788000 }, { "epoch": 19.86, "learning_rate": 7.534482375674655e-08, "loss": 3.7974, "step": 1788500 }, { "epoch": 19.87, "learning_rate": 7.533094196299668e-08, "loss": 3.7963, "step": 1789000 }, { "epoch": 19.87, "learning_rate": 7.531706016924682e-08, "loss": 3.8083, "step": 1789500 }, { "epoch": 19.88, "learning_rate": 7.530317837549696e-08, "loss": 3.7901, "step": 1790000 }, { "epoch": 19.88, "learning_rate": 7.528929658174711e-08, "loss": 3.8087, "step": 1790500 }, { "epoch": 19.89, "learning_rate": 7.527541478799724e-08, "loss": 3.8044, "step": 1791000 }, { "epoch": 19.9, "learning_rate": 7.526153299424738e-08, "loss": 3.8059, "step": 1791500 }, { "epoch": 19.9, "learning_rate": 7.524765120049752e-08, "loss": 3.8079, "step": 1792000 }, { "epoch": 19.91, "learning_rate": 7.523376940674766e-08, "loss": 3.812, "step": 1792500 }, { "epoch": 19.91, "learning_rate": 7.52198876129978e-08, "loss": 3.8006, "step": 1793000 }, { "epoch": 19.92, "learning_rate": 7.520600581924795e-08, "loss": 3.7945, "step": 1793500 }, { "epoch": 19.92, "learning_rate": 7.519212402549808e-08, "loss": 3.8061, "step": 1794000 }, { "epoch": 19.93, "learning_rate": 7.517824223174822e-08, "loss": 3.791, "step": 1794500 }, { "epoch": 19.93, "learning_rate": 7.516436043799835e-08, "loss": 3.7976, "step": 1795000 }, { "epoch": 19.94, "learning_rate": 7.515047864424849e-08, "loss": 3.7774, "step": 1795500 }, { "epoch": 19.95, "learning_rate": 7.513659685049863e-08, "loss": 3.7975, "step": 1796000 }, { "epoch": 19.95, "learning_rate": 7.512271505674876e-08, "loss": 3.7811, "step": 1796500 }, { "epoch": 19.96, "learning_rate": 7.51088332629989e-08, "loss": 3.8031, "step": 1797000 }, { "epoch": 19.96, "learning_rate": 7.509495146924905e-08, "loss": 3.785, "step": 1797500 }, { "epoch": 19.97, "learning_rate": 7.508106967549919e-08, "loss": 3.8107, "step": 1798000 }, { "epoch": 19.97, "learning_rate": 7.506718788174933e-08, "loss": 3.7846, "step": 1798500 }, { "epoch": 19.98, "learning_rate": 7.505330608799947e-08, "loss": 3.7925, "step": 1799000 }, { "epoch": 19.98, "learning_rate": 7.50394242942496e-08, "loss": 3.786, "step": 1799500 }, { "epoch": 19.99, "learning_rate": 7.502554250049974e-08, "loss": 3.8075, "step": 1800000 }, { "epoch": 20.0, "learning_rate": 7.501166070674989e-08, "loss": 3.8053, "step": 1800500 }, { "epoch": 20.0, "eval_loss": 3.853641986846924, "eval_runtime": 6.3154, "eval_samples_per_second": 246.067, "step": 1800920 }, { "epoch": 20.0, "learning_rate": 7.499777891300002e-08, "loss": 3.7879, "step": 1801000 }, { "epoch": 20.01, "learning_rate": 7.498389711925016e-08, "loss": 3.8016, "step": 1801500 }, { "epoch": 20.01, "learning_rate": 7.497001532550029e-08, "loss": 3.7886, "step": 1802000 }, { "epoch": 20.02, "learning_rate": 7.495613353175043e-08, "loss": 3.7989, "step": 1802500 }, { "epoch": 20.02, "learning_rate": 7.494225173800057e-08, "loss": 3.7949, "step": 1803000 }, { "epoch": 20.03, "learning_rate": 7.492836994425071e-08, "loss": 3.8031, "step": 1803500 }, { "epoch": 20.03, "learning_rate": 7.491448815050086e-08, "loss": 3.804, "step": 1804000 }, { "epoch": 20.04, "learning_rate": 7.4900606356751e-08, "loss": 3.8101, "step": 1804500 }, { "epoch": 20.05, "learning_rate": 7.488672456300113e-08, "loss": 3.8003, "step": 1805000 }, { "epoch": 20.05, "learning_rate": 7.487284276925127e-08, "loss": 3.8068, "step": 1805500 }, { "epoch": 20.06, "learning_rate": 7.485896097550141e-08, "loss": 3.7904, "step": 1806000 }, { "epoch": 20.06, "learning_rate": 7.484507918175155e-08, "loss": 3.7845, "step": 1806500 }, { "epoch": 20.07, "learning_rate": 7.483119738800168e-08, "loss": 3.8028, "step": 1807000 }, { "epoch": 20.07, "learning_rate": 7.481731559425182e-08, "loss": 3.7913, "step": 1807500 }, { "epoch": 20.08, "learning_rate": 7.480343380050195e-08, "loss": 3.7935, "step": 1808000 }, { "epoch": 20.08, "learning_rate": 7.47895520067521e-08, "loss": 3.8113, "step": 1808500 }, { "epoch": 20.09, "learning_rate": 7.477567021300224e-08, "loss": 3.7969, "step": 1809000 }, { "epoch": 20.1, "learning_rate": 7.476178841925238e-08, "loss": 3.8077, "step": 1809500 }, { "epoch": 20.1, "learning_rate": 7.474790662550252e-08, "loss": 3.7805, "step": 1810000 }, { "epoch": 20.11, "learning_rate": 7.473402483175265e-08, "loss": 3.8041, "step": 1810500 }, { "epoch": 20.11, "learning_rate": 7.47201430380028e-08, "loss": 3.7961, "step": 1811000 }, { "epoch": 20.12, "learning_rate": 7.470626124425294e-08, "loss": 3.7882, "step": 1811500 }, { "epoch": 20.12, "learning_rate": 7.469237945050308e-08, "loss": 3.8015, "step": 1812000 }, { "epoch": 20.13, "learning_rate": 7.467849765675321e-08, "loss": 3.7987, "step": 1812500 }, { "epoch": 20.13, "learning_rate": 7.466461586300335e-08, "loss": 3.7985, "step": 1813000 }, { "epoch": 20.14, "learning_rate": 7.465073406925348e-08, "loss": 3.7907, "step": 1813500 }, { "epoch": 20.15, "learning_rate": 7.463685227550362e-08, "loss": 3.7879, "step": 1814000 }, { "epoch": 20.15, "learning_rate": 7.462297048175376e-08, "loss": 3.8134, "step": 1814500 }, { "epoch": 20.16, "learning_rate": 7.46090886880039e-08, "loss": 3.7943, "step": 1815000 }, { "epoch": 20.16, "learning_rate": 7.459520689425405e-08, "loss": 3.784, "step": 1815500 }, { "epoch": 20.17, "learning_rate": 7.458132510050419e-08, "loss": 3.7972, "step": 1816000 }, { "epoch": 20.17, "learning_rate": 7.456744330675432e-08, "loss": 3.7978, "step": 1816500 }, { "epoch": 20.18, "learning_rate": 7.455356151300446e-08, "loss": 3.7801, "step": 1817000 }, { "epoch": 20.18, "learning_rate": 7.45396797192546e-08, "loss": 3.8055, "step": 1817500 }, { "epoch": 20.19, "learning_rate": 7.452579792550475e-08, "loss": 3.7792, "step": 1818000 }, { "epoch": 20.2, "learning_rate": 7.451191613175488e-08, "loss": 3.8062, "step": 1818500 }, { "epoch": 20.2, "learning_rate": 7.449803433800502e-08, "loss": 3.7984, "step": 1819000 }, { "epoch": 20.21, "learning_rate": 7.448415254425515e-08, "loss": 3.7905, "step": 1819500 }, { "epoch": 20.21, "learning_rate": 7.447027075050529e-08, "loss": 3.7896, "step": 1820000 }, { "epoch": 20.22, "learning_rate": 7.445638895675543e-08, "loss": 3.8079, "step": 1820500 }, { "epoch": 20.22, "learning_rate": 7.444250716300557e-08, "loss": 3.7922, "step": 1821000 }, { "epoch": 20.23, "learning_rate": 7.442862536925572e-08, "loss": 3.8041, "step": 1821500 }, { "epoch": 20.23, "learning_rate": 7.441474357550584e-08, "loss": 3.7892, "step": 1822000 }, { "epoch": 20.24, "learning_rate": 7.440086178175599e-08, "loss": 3.7927, "step": 1822500 }, { "epoch": 20.25, "learning_rate": 7.438697998800613e-08, "loss": 3.7934, "step": 1823000 }, { "epoch": 20.25, "learning_rate": 7.437309819425627e-08, "loss": 3.7822, "step": 1823500 }, { "epoch": 20.26, "learning_rate": 7.435921640050641e-08, "loss": 3.7961, "step": 1824000 }, { "epoch": 20.26, "learning_rate": 7.434533460675654e-08, "loss": 3.7783, "step": 1824500 }, { "epoch": 20.27, "learning_rate": 7.433145281300668e-08, "loss": 3.7934, "step": 1825000 }, { "epoch": 20.27, "learning_rate": 7.431757101925681e-08, "loss": 3.8011, "step": 1825500 }, { "epoch": 20.28, "learning_rate": 7.430368922550696e-08, "loss": 3.7755, "step": 1826000 }, { "epoch": 20.28, "learning_rate": 7.42898074317571e-08, "loss": 3.7925, "step": 1826500 }, { "epoch": 20.29, "learning_rate": 7.427592563800724e-08, "loss": 3.8185, "step": 1827000 }, { "epoch": 20.3, "learning_rate": 7.426204384425737e-08, "loss": 3.7931, "step": 1827500 }, { "epoch": 20.3, "learning_rate": 7.424816205050751e-08, "loss": 3.7789, "step": 1828000 }, { "epoch": 20.31, "learning_rate": 7.423428025675765e-08, "loss": 3.8054, "step": 1828500 }, { "epoch": 20.31, "learning_rate": 7.42203984630078e-08, "loss": 3.7812, "step": 1829000 }, { "epoch": 20.32, "learning_rate": 7.420651666925794e-08, "loss": 3.7771, "step": 1829500 }, { "epoch": 20.32, "learning_rate": 7.419263487550808e-08, "loss": 3.7851, "step": 1830000 }, { "epoch": 20.33, "learning_rate": 7.417875308175821e-08, "loss": 3.7804, "step": 1830500 }, { "epoch": 20.33, "learning_rate": 7.416487128800835e-08, "loss": 3.7709, "step": 1831000 }, { "epoch": 20.34, "learning_rate": 7.415098949425848e-08, "loss": 3.8039, "step": 1831500 }, { "epoch": 20.35, "learning_rate": 7.413710770050862e-08, "loss": 3.7956, "step": 1832000 }, { "epoch": 20.35, "learning_rate": 7.412322590675877e-08, "loss": 3.795, "step": 1832500 }, { "epoch": 20.36, "learning_rate": 7.41093441130089e-08, "loss": 3.8189, "step": 1833000 }, { "epoch": 20.36, "learning_rate": 7.409546231925904e-08, "loss": 3.8009, "step": 1833500 }, { "epoch": 20.37, "learning_rate": 7.408158052550918e-08, "loss": 3.7999, "step": 1834000 }, { "epoch": 20.37, "learning_rate": 7.406769873175932e-08, "loss": 3.801, "step": 1834500 }, { "epoch": 20.38, "learning_rate": 7.405381693800946e-08, "loss": 3.783, "step": 1835000 }, { "epoch": 20.38, "learning_rate": 7.40399351442596e-08, "loss": 3.7943, "step": 1835500 }, { "epoch": 20.39, "learning_rate": 7.402605335050974e-08, "loss": 3.7877, "step": 1836000 }, { "epoch": 20.4, "learning_rate": 7.401217155675988e-08, "loss": 3.8005, "step": 1836500 }, { "epoch": 20.4, "learning_rate": 7.399828976301002e-08, "loss": 3.7856, "step": 1837000 }, { "epoch": 20.41, "learning_rate": 7.398440796926015e-08, "loss": 3.791, "step": 1837500 }, { "epoch": 20.41, "learning_rate": 7.397052617551029e-08, "loss": 3.7982, "step": 1838000 }, { "epoch": 20.42, "learning_rate": 7.395664438176043e-08, "loss": 3.7989, "step": 1838500 }, { "epoch": 20.42, "learning_rate": 7.394276258801056e-08, "loss": 3.7937, "step": 1839000 }, { "epoch": 20.43, "learning_rate": 7.39288807942607e-08, "loss": 3.7971, "step": 1839500 }, { "epoch": 20.43, "learning_rate": 7.391499900051085e-08, "loss": 3.7732, "step": 1840000 }, { "epoch": 20.44, "learning_rate": 7.390111720676099e-08, "loss": 3.7934, "step": 1840500 }, { "epoch": 20.45, "learning_rate": 7.388723541301113e-08, "loss": 3.8004, "step": 1841000 }, { "epoch": 20.45, "learning_rate": 7.387335361926126e-08, "loss": 3.7911, "step": 1841500 }, { "epoch": 20.46, "learning_rate": 7.38594718255114e-08, "loss": 3.7887, "step": 1842000 }, { "epoch": 20.46, "learning_rate": 7.384559003176155e-08, "loss": 3.7934, "step": 1842500 }, { "epoch": 20.47, "learning_rate": 7.383170823801167e-08, "loss": 3.7929, "step": 1843000 }, { "epoch": 20.47, "learning_rate": 7.381782644426182e-08, "loss": 3.7775, "step": 1843500 }, { "epoch": 20.48, "learning_rate": 7.380394465051196e-08, "loss": 3.7854, "step": 1844000 }, { "epoch": 20.48, "learning_rate": 7.379006285676209e-08, "loss": 3.7935, "step": 1844500 }, { "epoch": 20.49, "learning_rate": 7.377618106301223e-08, "loss": 3.7867, "step": 1845000 }, { "epoch": 20.5, "learning_rate": 7.376229926926237e-08, "loss": 3.7832, "step": 1845500 }, { "epoch": 20.5, "learning_rate": 7.374841747551251e-08, "loss": 3.8162, "step": 1846000 }, { "epoch": 20.51, "learning_rate": 7.373453568176266e-08, "loss": 3.7905, "step": 1846500 }, { "epoch": 20.51, "learning_rate": 7.372065388801279e-08, "loss": 3.8053, "step": 1847000 }, { "epoch": 20.52, "learning_rate": 7.370677209426293e-08, "loss": 3.7959, "step": 1847500 }, { "epoch": 20.52, "learning_rate": 7.369289030051307e-08, "loss": 3.7998, "step": 1848000 }, { "epoch": 20.53, "learning_rate": 7.367900850676321e-08, "loss": 3.7926, "step": 1848500 }, { "epoch": 20.53, "learning_rate": 7.366512671301334e-08, "loss": 3.7766, "step": 1849000 }, { "epoch": 20.54, "learning_rate": 7.365124491926348e-08, "loss": 3.791, "step": 1849500 }, { "epoch": 20.55, "learning_rate": 7.363736312551361e-08, "loss": 3.8024, "step": 1850000 }, { "epoch": 20.55, "learning_rate": 7.362348133176376e-08, "loss": 3.8076, "step": 1850500 }, { "epoch": 20.56, "learning_rate": 7.36095995380139e-08, "loss": 3.7737, "step": 1851000 }, { "epoch": 20.56, "learning_rate": 7.359571774426404e-08, "loss": 3.7721, "step": 1851500 }, { "epoch": 20.57, "learning_rate": 7.358183595051418e-08, "loss": 3.8007, "step": 1852000 }, { "epoch": 20.57, "learning_rate": 7.356795415676432e-08, "loss": 3.7948, "step": 1852500 }, { "epoch": 20.58, "learning_rate": 7.355407236301445e-08, "loss": 3.7961, "step": 1853000 }, { "epoch": 20.58, "learning_rate": 7.35401905692646e-08, "loss": 3.7918, "step": 1853500 }, { "epoch": 20.59, "learning_rate": 7.352630877551474e-08, "loss": 3.8145, "step": 1854000 }, { "epoch": 20.6, "learning_rate": 7.351242698176488e-08, "loss": 3.7967, "step": 1854500 }, { "epoch": 20.6, "learning_rate": 7.349854518801501e-08, "loss": 3.8052, "step": 1855000 }, { "epoch": 20.61, "learning_rate": 7.348466339426515e-08, "loss": 3.8081, "step": 1855500 }, { "epoch": 20.61, "learning_rate": 7.347078160051528e-08, "loss": 3.7957, "step": 1856000 }, { "epoch": 20.62, "learning_rate": 7.345689980676542e-08, "loss": 3.779, "step": 1856500 }, { "epoch": 20.62, "learning_rate": 7.344301801301556e-08, "loss": 3.7836, "step": 1857000 }, { "epoch": 20.63, "learning_rate": 7.342913621926571e-08, "loss": 3.8091, "step": 1857500 }, { "epoch": 20.63, "learning_rate": 7.341525442551585e-08, "loss": 3.8083, "step": 1858000 }, { "epoch": 20.64, "learning_rate": 7.340137263176598e-08, "loss": 3.8133, "step": 1858500 }, { "epoch": 20.65, "learning_rate": 7.338749083801612e-08, "loss": 3.7899, "step": 1859000 }, { "epoch": 20.65, "learning_rate": 7.337360904426626e-08, "loss": 3.7923, "step": 1859500 }, { "epoch": 20.66, "learning_rate": 7.33597272505164e-08, "loss": 3.7969, "step": 1860000 }, { "epoch": 20.66, "learning_rate": 7.334584545676655e-08, "loss": 3.7909, "step": 1860500 }, { "epoch": 20.67, "learning_rate": 7.333196366301668e-08, "loss": 3.7936, "step": 1861000 }, { "epoch": 20.67, "learning_rate": 7.331808186926682e-08, "loss": 3.7971, "step": 1861500 }, { "epoch": 20.68, "learning_rate": 7.330420007551695e-08, "loss": 3.792, "step": 1862000 }, { "epoch": 20.68, "learning_rate": 7.329031828176709e-08, "loss": 3.7835, "step": 1862500 }, { "epoch": 20.69, "learning_rate": 7.327643648801723e-08, "loss": 3.8016, "step": 1863000 }, { "epoch": 20.69, "learning_rate": 7.326255469426737e-08, "loss": 3.8168, "step": 1863500 }, { "epoch": 20.7, "learning_rate": 7.32486729005175e-08, "loss": 3.8032, "step": 1864000 }, { "epoch": 20.71, "learning_rate": 7.323479110676765e-08, "loss": 3.8184, "step": 1864500 }, { "epoch": 20.71, "learning_rate": 7.322090931301779e-08, "loss": 3.7962, "step": 1865000 }, { "epoch": 20.72, "learning_rate": 7.320702751926793e-08, "loss": 3.7996, "step": 1865500 }, { "epoch": 20.72, "learning_rate": 7.319314572551807e-08, "loss": 3.7789, "step": 1866000 }, { "epoch": 20.73, "learning_rate": 7.317926393176821e-08, "loss": 3.7911, "step": 1866500 }, { "epoch": 20.73, "learning_rate": 7.316538213801834e-08, "loss": 3.805, "step": 1867000 }, { "epoch": 20.74, "learning_rate": 7.315150034426849e-08, "loss": 3.79, "step": 1867500 }, { "epoch": 20.74, "learning_rate": 7.313761855051862e-08, "loss": 3.7989, "step": 1868000 }, { "epoch": 20.75, "learning_rate": 7.312373675676876e-08, "loss": 3.7835, "step": 1868500 }, { "epoch": 20.76, "learning_rate": 7.31098549630189e-08, "loss": 3.8064, "step": 1869000 }, { "epoch": 20.76, "learning_rate": 7.309597316926903e-08, "loss": 3.7765, "step": 1869500 }, { "epoch": 20.77, "learning_rate": 7.308209137551917e-08, "loss": 3.7731, "step": 1870000 }, { "epoch": 20.77, "learning_rate": 7.306820958176931e-08, "loss": 3.8029, "step": 1870500 }, { "epoch": 20.78, "learning_rate": 7.305432778801946e-08, "loss": 3.7952, "step": 1871000 }, { "epoch": 20.78, "learning_rate": 7.30404459942696e-08, "loss": 3.7889, "step": 1871500 }, { "epoch": 20.79, "learning_rate": 7.302656420051974e-08, "loss": 3.8002, "step": 1872000 }, { "epoch": 20.79, "learning_rate": 7.301268240676987e-08, "loss": 3.7875, "step": 1872500 }, { "epoch": 20.8, "learning_rate": 7.299880061302001e-08, "loss": 3.8035, "step": 1873000 }, { "epoch": 20.81, "learning_rate": 7.298491881927014e-08, "loss": 3.7853, "step": 1873500 }, { "epoch": 20.81, "learning_rate": 7.297103702552028e-08, "loss": 3.7991, "step": 1874000 }, { "epoch": 20.82, "learning_rate": 7.295715523177043e-08, "loss": 3.7909, "step": 1874500 }, { "epoch": 20.82, "learning_rate": 7.294327343802057e-08, "loss": 3.7939, "step": 1875000 }, { "epoch": 20.83, "learning_rate": 7.29293916442707e-08, "loss": 3.7988, "step": 1875500 }, { "epoch": 20.83, "learning_rate": 7.291550985052084e-08, "loss": 3.7876, "step": 1876000 }, { "epoch": 20.84, "learning_rate": 7.290162805677098e-08, "loss": 3.7942, "step": 1876500 }, { "epoch": 20.84, "learning_rate": 7.288774626302112e-08, "loss": 3.8014, "step": 1877000 }, { "epoch": 20.85, "learning_rate": 7.287386446927127e-08, "loss": 3.7714, "step": 1877500 }, { "epoch": 20.86, "learning_rate": 7.28599826755214e-08, "loss": 3.7876, "step": 1878000 }, { "epoch": 20.86, "learning_rate": 7.284610088177154e-08, "loss": 3.7936, "step": 1878500 }, { "epoch": 20.87, "learning_rate": 7.283221908802168e-08, "loss": 3.788, "step": 1879000 }, { "epoch": 20.87, "learning_rate": 7.281833729427181e-08, "loss": 3.8013, "step": 1879500 }, { "epoch": 20.88, "learning_rate": 7.280445550052195e-08, "loss": 3.7853, "step": 1880000 }, { "epoch": 20.88, "learning_rate": 7.279057370677209e-08, "loss": 3.776, "step": 1880500 }, { "epoch": 20.89, "learning_rate": 7.277669191302222e-08, "loss": 3.801, "step": 1881000 }, { "epoch": 20.89, "learning_rate": 7.276281011927236e-08, "loss": 3.795, "step": 1881500 }, { "epoch": 20.9, "learning_rate": 7.27489283255225e-08, "loss": 3.8004, "step": 1882000 }, { "epoch": 20.91, "learning_rate": 7.273504653177265e-08, "loss": 3.7954, "step": 1882500 }, { "epoch": 20.91, "learning_rate": 7.272116473802279e-08, "loss": 3.806, "step": 1883000 }, { "epoch": 20.92, "learning_rate": 7.270728294427292e-08, "loss": 3.7901, "step": 1883500 }, { "epoch": 20.92, "learning_rate": 7.269340115052306e-08, "loss": 3.7892, "step": 1884000 }, { "epoch": 20.93, "learning_rate": 7.26795193567732e-08, "loss": 3.7932, "step": 1884500 }, { "epoch": 20.93, "learning_rate": 7.266563756302335e-08, "loss": 3.7816, "step": 1885000 }, { "epoch": 20.94, "learning_rate": 7.265175576927348e-08, "loss": 3.7863, "step": 1885500 }, { "epoch": 20.94, "learning_rate": 7.263787397552362e-08, "loss": 3.7845, "step": 1886000 }, { "epoch": 20.95, "learning_rate": 7.262399218177375e-08, "loss": 3.7904, "step": 1886500 }, { "epoch": 20.96, "learning_rate": 7.261011038802389e-08, "loss": 3.7817, "step": 1887000 }, { "epoch": 20.96, "learning_rate": 7.259622859427403e-08, "loss": 3.7957, "step": 1887500 }, { "epoch": 20.97, "learning_rate": 7.258234680052417e-08, "loss": 3.8018, "step": 1888000 }, { "epoch": 20.97, "learning_rate": 7.256846500677432e-08, "loss": 3.8093, "step": 1888500 }, { "epoch": 20.98, "learning_rate": 7.255458321302446e-08, "loss": 3.8011, "step": 1889000 }, { "epoch": 20.98, "learning_rate": 7.254070141927459e-08, "loss": 3.8015, "step": 1889500 }, { "epoch": 20.99, "learning_rate": 7.252681962552473e-08, "loss": 3.7908, "step": 1890000 }, { "epoch": 20.99, "learning_rate": 7.251293783177487e-08, "loss": 3.7689, "step": 1890500 }, { "epoch": 21.0, "eval_loss": 3.8509271144866943, "eval_runtime": 6.3078, "eval_samples_per_second": 246.362, "step": 1890966 }, { "epoch": 21.0, "learning_rate": 7.249905603802501e-08, "loss": 3.7892, "step": 1891000 }, { "epoch": 21.01, "learning_rate": 7.248517424427514e-08, "loss": 3.7717, "step": 1891500 }, { "epoch": 21.01, "learning_rate": 7.247129245052529e-08, "loss": 3.812, "step": 1892000 }, { "epoch": 21.02, "learning_rate": 7.245741065677541e-08, "loss": 3.8008, "step": 1892500 }, { "epoch": 21.02, "learning_rate": 7.244352886302556e-08, "loss": 3.8002, "step": 1893000 }, { "epoch": 21.03, "learning_rate": 7.24296470692757e-08, "loss": 3.7934, "step": 1893500 }, { "epoch": 21.03, "learning_rate": 7.241576527552584e-08, "loss": 3.7885, "step": 1894000 }, { "epoch": 21.04, "learning_rate": 7.240188348177598e-08, "loss": 3.796, "step": 1894500 }, { "epoch": 21.04, "learning_rate": 7.238800168802611e-08, "loss": 3.784, "step": 1895000 }, { "epoch": 21.05, "learning_rate": 7.237411989427625e-08, "loss": 3.8008, "step": 1895500 }, { "epoch": 21.06, "learning_rate": 7.23602381005264e-08, "loss": 3.7905, "step": 1896000 }, { "epoch": 21.06, "learning_rate": 7.234635630677654e-08, "loss": 3.8028, "step": 1896500 }, { "epoch": 21.07, "learning_rate": 7.233247451302668e-08, "loss": 3.7682, "step": 1897000 }, { "epoch": 21.07, "learning_rate": 7.231859271927681e-08, "loss": 3.801, "step": 1897500 }, { "epoch": 21.08, "learning_rate": 7.230471092552695e-08, "loss": 3.7704, "step": 1898000 }, { "epoch": 21.08, "learning_rate": 7.229082913177708e-08, "loss": 3.7966, "step": 1898500 }, { "epoch": 21.09, "learning_rate": 7.227694733802722e-08, "loss": 3.7775, "step": 1899000 }, { "epoch": 21.09, "learning_rate": 7.226306554427737e-08, "loss": 3.7871, "step": 1899500 }, { "epoch": 21.1, "learning_rate": 7.224918375052751e-08, "loss": 3.7854, "step": 1900000 }, { "epoch": 21.11, "learning_rate": 7.223530195677764e-08, "loss": 3.7765, "step": 1900500 }, { "epoch": 21.11, "learning_rate": 7.222142016302778e-08, "loss": 3.8002, "step": 1901000 }, { "epoch": 21.12, "learning_rate": 7.220753836927792e-08, "loss": 3.7949, "step": 1901500 }, { "epoch": 21.12, "learning_rate": 7.219365657552806e-08, "loss": 3.7935, "step": 1902000 }, { "epoch": 21.13, "learning_rate": 7.21797747817782e-08, "loss": 3.8165, "step": 1902500 }, { "epoch": 21.13, "learning_rate": 7.216589298802835e-08, "loss": 3.7842, "step": 1903000 }, { "epoch": 21.14, "learning_rate": 7.215201119427848e-08, "loss": 3.8086, "step": 1903500 }, { "epoch": 21.14, "learning_rate": 7.213812940052861e-08, "loss": 3.7924, "step": 1904000 }, { "epoch": 21.15, "learning_rate": 7.212424760677875e-08, "loss": 3.7946, "step": 1904500 }, { "epoch": 21.16, "learning_rate": 7.211036581302889e-08, "loss": 3.8132, "step": 1905000 }, { "epoch": 21.16, "learning_rate": 7.209648401927903e-08, "loss": 3.8024, "step": 1905500 }, { "epoch": 21.17, "learning_rate": 7.208260222552916e-08, "loss": 3.7942, "step": 1906000 }, { "epoch": 21.17, "learning_rate": 7.20687204317793e-08, "loss": 3.8022, "step": 1906500 }, { "epoch": 21.18, "learning_rate": 7.205483863802945e-08, "loss": 3.7964, "step": 1907000 }, { "epoch": 21.18, "learning_rate": 7.204095684427959e-08, "loss": 3.7667, "step": 1907500 }, { "epoch": 21.19, "learning_rate": 7.202707505052973e-08, "loss": 3.7926, "step": 1908000 }, { "epoch": 21.19, "learning_rate": 7.201319325677987e-08, "loss": 3.7961, "step": 1908500 }, { "epoch": 21.2, "learning_rate": 7.199931146303e-08, "loss": 3.8026, "step": 1909000 }, { "epoch": 21.21, "learning_rate": 7.198542966928015e-08, "loss": 3.7988, "step": 1909500 }, { "epoch": 21.21, "learning_rate": 7.197154787553027e-08, "loss": 3.7841, "step": 1910000 }, { "epoch": 21.22, "learning_rate": 7.195766608178042e-08, "loss": 3.7996, "step": 1910500 }, { "epoch": 21.22, "learning_rate": 7.194378428803056e-08, "loss": 3.797, "step": 1911000 }, { "epoch": 21.23, "learning_rate": 7.19299024942807e-08, "loss": 3.7973, "step": 1911500 }, { "epoch": 21.23, "learning_rate": 7.191602070053083e-08, "loss": 3.8092, "step": 1912000 }, { "epoch": 21.24, "learning_rate": 7.190213890678097e-08, "loss": 3.7942, "step": 1912500 }, { "epoch": 21.24, "learning_rate": 7.188825711303111e-08, "loss": 3.7885, "step": 1913000 }, { "epoch": 21.25, "learning_rate": 7.187437531928126e-08, "loss": 3.7892, "step": 1913500 }, { "epoch": 21.26, "learning_rate": 7.18604935255314e-08, "loss": 3.7898, "step": 1914000 }, { "epoch": 21.26, "learning_rate": 7.184661173178153e-08, "loss": 3.7825, "step": 1914500 }, { "epoch": 21.27, "learning_rate": 7.183272993803167e-08, "loss": 3.7905, "step": 1915000 }, { "epoch": 21.27, "learning_rate": 7.181884814428181e-08, "loss": 3.7796, "step": 1915500 }, { "epoch": 21.28, "learning_rate": 7.180496635053194e-08, "loss": 3.8, "step": 1916000 }, { "epoch": 21.28, "learning_rate": 7.179108455678208e-08, "loss": 3.7928, "step": 1916500 }, { "epoch": 21.29, "learning_rate": 7.177720276303223e-08, "loss": 3.7943, "step": 1917000 }, { "epoch": 21.29, "learning_rate": 7.176332096928236e-08, "loss": 3.7971, "step": 1917500 }, { "epoch": 21.3, "learning_rate": 7.17494391755325e-08, "loss": 3.7886, "step": 1918000 }, { "epoch": 21.31, "learning_rate": 7.173555738178264e-08, "loss": 3.7806, "step": 1918500 }, { "epoch": 21.31, "learning_rate": 7.172167558803278e-08, "loss": 3.7901, "step": 1919000 }, { "epoch": 21.32, "learning_rate": 7.170779379428292e-08, "loss": 3.7816, "step": 1919500 }, { "epoch": 21.32, "learning_rate": 7.169391200053305e-08, "loss": 3.8032, "step": 1920000 }, { "epoch": 21.33, "learning_rate": 7.16800302067832e-08, "loss": 3.7795, "step": 1920500 }, { "epoch": 21.33, "learning_rate": 7.166614841303334e-08, "loss": 3.7688, "step": 1921000 }, { "epoch": 21.34, "learning_rate": 7.165226661928348e-08, "loss": 3.7695, "step": 1921500 }, { "epoch": 21.34, "learning_rate": 7.163838482553361e-08, "loss": 3.8006, "step": 1922000 }, { "epoch": 21.35, "learning_rate": 7.162450303178375e-08, "loss": 3.8097, "step": 1922500 }, { "epoch": 21.36, "learning_rate": 7.161062123803388e-08, "loss": 3.7796, "step": 1923000 }, { "epoch": 21.36, "learning_rate": 7.159673944428402e-08, "loss": 3.7949, "step": 1923500 }, { "epoch": 21.37, "learning_rate": 7.158285765053417e-08, "loss": 3.776, "step": 1924000 }, { "epoch": 21.37, "learning_rate": 7.156897585678431e-08, "loss": 3.78, "step": 1924500 }, { "epoch": 21.38, "learning_rate": 7.155509406303445e-08, "loss": 3.7884, "step": 1925000 }, { "epoch": 21.38, "learning_rate": 7.154121226928459e-08, "loss": 3.803, "step": 1925500 }, { "epoch": 21.39, "learning_rate": 7.152733047553472e-08, "loss": 3.786, "step": 1926000 }, { "epoch": 21.39, "learning_rate": 7.151344868178486e-08, "loss": 3.7773, "step": 1926500 }, { "epoch": 21.4, "learning_rate": 7.1499566888035e-08, "loss": 3.798, "step": 1927000 }, { "epoch": 21.41, "learning_rate": 7.148568509428515e-08, "loss": 3.7925, "step": 1927500 }, { "epoch": 21.41, "learning_rate": 7.147180330053528e-08, "loss": 3.7903, "step": 1928000 }, { "epoch": 21.42, "learning_rate": 7.145792150678542e-08, "loss": 3.7757, "step": 1928500 }, { "epoch": 21.42, "learning_rate": 7.144403971303555e-08, "loss": 3.7916, "step": 1929000 }, { "epoch": 21.43, "learning_rate": 7.143015791928569e-08, "loss": 3.7897, "step": 1929500 }, { "epoch": 21.43, "learning_rate": 7.141627612553583e-08, "loss": 3.789, "step": 1930000 }, { "epoch": 21.44, "learning_rate": 7.140239433178597e-08, "loss": 3.8001, "step": 1930500 }, { "epoch": 21.44, "learning_rate": 7.138851253803612e-08, "loss": 3.7804, "step": 1931000 }, { "epoch": 21.45, "learning_rate": 7.137463074428625e-08, "loss": 3.7953, "step": 1931500 }, { "epoch": 21.46, "learning_rate": 7.136074895053639e-08, "loss": 3.7868, "step": 1932000 }, { "epoch": 21.46, "learning_rate": 7.134686715678653e-08, "loss": 3.7864, "step": 1932500 }, { "epoch": 21.47, "learning_rate": 7.133298536303667e-08, "loss": 3.8067, "step": 1933000 }, { "epoch": 21.47, "learning_rate": 7.131910356928682e-08, "loss": 3.7949, "step": 1933500 }, { "epoch": 21.48, "learning_rate": 7.130522177553694e-08, "loss": 3.8007, "step": 1934000 }, { "epoch": 21.48, "learning_rate": 7.129133998178707e-08, "loss": 3.7754, "step": 1934500 }, { "epoch": 21.49, "learning_rate": 7.127745818803722e-08, "loss": 3.8005, "step": 1935000 }, { "epoch": 21.49, "learning_rate": 7.126357639428736e-08, "loss": 3.7777, "step": 1935500 }, { "epoch": 21.5, "learning_rate": 7.12496946005375e-08, "loss": 3.7963, "step": 1936000 }, { "epoch": 21.51, "learning_rate": 7.123581280678764e-08, "loss": 3.7851, "step": 1936500 }, { "epoch": 21.51, "learning_rate": 7.122193101303777e-08, "loss": 3.7674, "step": 1937000 }, { "epoch": 21.52, "learning_rate": 7.120804921928791e-08, "loss": 3.8032, "step": 1937500 }, { "epoch": 21.52, "learning_rate": 7.119416742553806e-08, "loss": 3.7782, "step": 1938000 }, { "epoch": 21.53, "learning_rate": 7.11802856317882e-08, "loss": 3.7864, "step": 1938500 }, { "epoch": 21.53, "learning_rate": 7.116640383803834e-08, "loss": 3.789, "step": 1939000 }, { "epoch": 21.54, "learning_rate": 7.115252204428848e-08, "loss": 3.7701, "step": 1939500 }, { "epoch": 21.54, "learning_rate": 7.113864025053861e-08, "loss": 3.7921, "step": 1940000 }, { "epoch": 21.55, "learning_rate": 7.112475845678874e-08, "loss": 3.7854, "step": 1940500 }, { "epoch": 21.56, "learning_rate": 7.111087666303888e-08, "loss": 3.7882, "step": 1941000 }, { "epoch": 21.56, "learning_rate": 7.109699486928903e-08, "loss": 3.7841, "step": 1941500 }, { "epoch": 21.57, "learning_rate": 7.108311307553917e-08, "loss": 3.7935, "step": 1942000 }, { "epoch": 21.57, "learning_rate": 7.10692312817893e-08, "loss": 3.7878, "step": 1942500 }, { "epoch": 21.58, "learning_rate": 7.105534948803944e-08, "loss": 3.787, "step": 1943000 }, { "epoch": 21.58, "learning_rate": 7.104146769428958e-08, "loss": 3.8077, "step": 1943500 }, { "epoch": 21.59, "learning_rate": 7.102758590053972e-08, "loss": 3.7806, "step": 1944000 }, { "epoch": 21.59, "learning_rate": 7.101370410678987e-08, "loss": 3.7905, "step": 1944500 }, { "epoch": 21.6, "learning_rate": 7.099982231304001e-08, "loss": 3.7657, "step": 1945000 }, { "epoch": 21.61, "learning_rate": 7.098594051929014e-08, "loss": 3.796, "step": 1945500 }, { "epoch": 21.61, "learning_rate": 7.097205872554028e-08, "loss": 3.7807, "step": 1946000 }, { "epoch": 21.62, "learning_rate": 7.095817693179041e-08, "loss": 3.7795, "step": 1946500 }, { "epoch": 21.62, "learning_rate": 7.094429513804055e-08, "loss": 3.8025, "step": 1947000 }, { "epoch": 21.63, "learning_rate": 7.093041334429069e-08, "loss": 3.8012, "step": 1947500 }, { "epoch": 21.63, "learning_rate": 7.091653155054083e-08, "loss": 3.79, "step": 1948000 }, { "epoch": 21.64, "learning_rate": 7.090264975679096e-08, "loss": 3.7857, "step": 1948500 }, { "epoch": 21.64, "learning_rate": 7.08887679630411e-08, "loss": 3.7686, "step": 1949000 }, { "epoch": 21.65, "learning_rate": 7.087488616929125e-08, "loss": 3.7948, "step": 1949500 }, { "epoch": 21.66, "learning_rate": 7.086100437554139e-08, "loss": 3.7862, "step": 1950000 }, { "epoch": 21.66, "learning_rate": 7.084712258179153e-08, "loss": 3.7964, "step": 1950500 }, { "epoch": 21.67, "learning_rate": 7.083324078804166e-08, "loss": 3.7797, "step": 1951000 }, { "epoch": 21.67, "learning_rate": 7.08193589942918e-08, "loss": 3.8038, "step": 1951500 }, { "epoch": 21.68, "learning_rate": 7.080547720054195e-08, "loss": 3.7925, "step": 1952000 }, { "epoch": 21.68, "learning_rate": 7.079159540679208e-08, "loss": 3.7775, "step": 1952500 }, { "epoch": 21.69, "learning_rate": 7.077771361304222e-08, "loss": 3.788, "step": 1953000 }, { "epoch": 21.69, "learning_rate": 7.076383181929236e-08, "loss": 3.7817, "step": 1953500 }, { "epoch": 21.7, "learning_rate": 7.074995002554249e-08, "loss": 3.7919, "step": 1954000 }, { "epoch": 21.71, "learning_rate": 7.073606823179263e-08, "loss": 3.761, "step": 1954500 }, { "epoch": 21.71, "learning_rate": 7.072218643804277e-08, "loss": 3.8048, "step": 1955000 }, { "epoch": 21.72, "learning_rate": 7.070830464429292e-08, "loss": 3.801, "step": 1955500 }, { "epoch": 21.72, "learning_rate": 7.069442285054306e-08, "loss": 3.7954, "step": 1956000 }, { "epoch": 21.73, "learning_rate": 7.068054105679319e-08, "loss": 3.7729, "step": 1956500 }, { "epoch": 21.73, "learning_rate": 7.066665926304333e-08, "loss": 3.7781, "step": 1957000 }, { "epoch": 21.74, "learning_rate": 7.065277746929347e-08, "loss": 3.7812, "step": 1957500 }, { "epoch": 21.74, "learning_rate": 7.063889567554361e-08, "loss": 3.7885, "step": 1958000 }, { "epoch": 21.75, "learning_rate": 7.062501388179374e-08, "loss": 3.7942, "step": 1958500 }, { "epoch": 21.76, "learning_rate": 7.061113208804389e-08, "loss": 3.7899, "step": 1959000 }, { "epoch": 21.76, "learning_rate": 7.059725029429401e-08, "loss": 3.7894, "step": 1959500 }, { "epoch": 21.77, "learning_rate": 7.058336850054416e-08, "loss": 3.8002, "step": 1960000 }, { "epoch": 21.77, "learning_rate": 7.05694867067943e-08, "loss": 3.7883, "step": 1960500 }, { "epoch": 21.78, "learning_rate": 7.055560491304444e-08, "loss": 3.8024, "step": 1961000 }, { "epoch": 21.78, "learning_rate": 7.054172311929458e-08, "loss": 3.8021, "step": 1961500 }, { "epoch": 21.79, "learning_rate": 7.052784132554473e-08, "loss": 3.7879, "step": 1962000 }, { "epoch": 21.79, "learning_rate": 7.051395953179485e-08, "loss": 3.7841, "step": 1962500 }, { "epoch": 21.8, "learning_rate": 7.0500077738045e-08, "loss": 3.8009, "step": 1963000 }, { "epoch": 21.81, "learning_rate": 7.048619594429514e-08, "loss": 3.8139, "step": 1963500 }, { "epoch": 21.81, "learning_rate": 7.047231415054528e-08, "loss": 3.7907, "step": 1964000 }, { "epoch": 21.82, "learning_rate": 7.045843235679541e-08, "loss": 3.7904, "step": 1964500 }, { "epoch": 21.82, "learning_rate": 7.044455056304554e-08, "loss": 3.7757, "step": 1965000 }, { "epoch": 21.83, "learning_rate": 7.043066876929568e-08, "loss": 3.7891, "step": 1965500 }, { "epoch": 21.83, "learning_rate": 7.041678697554582e-08, "loss": 3.7992, "step": 1966000 }, { "epoch": 21.84, "learning_rate": 7.040290518179597e-08, "loss": 3.7669, "step": 1966500 }, { "epoch": 21.84, "learning_rate": 7.038902338804611e-08, "loss": 3.809, "step": 1967000 }, { "epoch": 21.85, "learning_rate": 7.037514159429625e-08, "loss": 3.7909, "step": 1967500 }, { "epoch": 21.86, "learning_rate": 7.036125980054638e-08, "loss": 3.7907, "step": 1968000 }, { "epoch": 21.86, "learning_rate": 7.034737800679652e-08, "loss": 3.7807, "step": 1968500 }, { "epoch": 21.87, "learning_rate": 7.033349621304666e-08, "loss": 3.787, "step": 1969000 }, { "epoch": 21.87, "learning_rate": 7.031961441929681e-08, "loss": 3.7761, "step": 1969500 }, { "epoch": 21.88, "learning_rate": 7.030573262554695e-08, "loss": 3.779, "step": 1970000 }, { "epoch": 21.88, "learning_rate": 7.029185083179708e-08, "loss": 3.8079, "step": 1970500 }, { "epoch": 21.89, "learning_rate": 7.027796903804721e-08, "loss": 3.7868, "step": 1971000 }, { "epoch": 21.89, "learning_rate": 7.026408724429735e-08, "loss": 3.7868, "step": 1971500 }, { "epoch": 21.9, "learning_rate": 7.025020545054749e-08, "loss": 3.7996, "step": 1972000 }, { "epoch": 21.91, "learning_rate": 7.023632365679763e-08, "loss": 3.7828, "step": 1972500 }, { "epoch": 21.91, "learning_rate": 7.022244186304778e-08, "loss": 3.7873, "step": 1973000 }, { "epoch": 21.92, "learning_rate": 7.02085600692979e-08, "loss": 3.7874, "step": 1973500 }, { "epoch": 21.92, "learning_rate": 7.019467827554805e-08, "loss": 3.7938, "step": 1974000 }, { "epoch": 21.93, "learning_rate": 7.018079648179819e-08, "loss": 3.7869, "step": 1974500 }, { "epoch": 21.93, "learning_rate": 7.016691468804833e-08, "loss": 3.798, "step": 1975000 }, { "epoch": 21.94, "learning_rate": 7.015303289429847e-08, "loss": 3.7919, "step": 1975500 }, { "epoch": 21.94, "learning_rate": 7.013915110054862e-08, "loss": 3.7861, "step": 1976000 }, { "epoch": 21.95, "learning_rate": 7.012526930679875e-08, "loss": 3.795, "step": 1976500 }, { "epoch": 21.96, "learning_rate": 7.011138751304887e-08, "loss": 3.7829, "step": 1977000 }, { "epoch": 21.96, "learning_rate": 7.009750571929902e-08, "loss": 3.8138, "step": 1977500 }, { "epoch": 21.97, "learning_rate": 7.008362392554916e-08, "loss": 3.7756, "step": 1978000 }, { "epoch": 21.97, "learning_rate": 7.00697421317993e-08, "loss": 3.7906, "step": 1978500 }, { "epoch": 21.98, "learning_rate": 7.005586033804943e-08, "loss": 3.79, "step": 1979000 }, { "epoch": 21.98, "learning_rate": 7.004197854429957e-08, "loss": 3.8046, "step": 1979500 }, { "epoch": 21.99, "learning_rate": 7.002809675054971e-08, "loss": 3.7864, "step": 1980000 }, { "epoch": 21.99, "learning_rate": 7.001421495679986e-08, "loss": 3.7936, "step": 1980500 }, { "epoch": 22.0, "learning_rate": 7.000033316305e-08, "loss": 3.7851, "step": 1981000 }, { "epoch": 22.0, "eval_loss": 3.8484787940979004, "eval_runtime": 6.3174, "eval_samples_per_second": 245.986, "step": 1981012 }, { "epoch": 22.01, "learning_rate": 6.998645136930014e-08, "loss": 3.7871, "step": 1981500 }, { "epoch": 22.01, "learning_rate": 6.997256957555027e-08, "loss": 3.7924, "step": 1982000 }, { "epoch": 22.02, "learning_rate": 6.995868778180041e-08, "loss": 3.7953, "step": 1982500 }, { "epoch": 22.02, "learning_rate": 6.994480598805054e-08, "loss": 3.7907, "step": 1983000 }, { "epoch": 22.03, "learning_rate": 6.993092419430068e-08, "loss": 3.7666, "step": 1983500 }, { "epoch": 22.03, "learning_rate": 6.991704240055083e-08, "loss": 3.7855, "step": 1984000 }, { "epoch": 22.04, "learning_rate": 6.990316060680097e-08, "loss": 3.7738, "step": 1984500 }, { "epoch": 22.04, "learning_rate": 6.98892788130511e-08, "loss": 3.7925, "step": 1985000 }, { "epoch": 22.05, "learning_rate": 6.987539701930124e-08, "loss": 3.7909, "step": 1985500 }, { "epoch": 22.06, "learning_rate": 6.986151522555138e-08, "loss": 3.8002, "step": 1986000 }, { "epoch": 22.06, "learning_rate": 6.984763343180152e-08, "loss": 3.7835, "step": 1986500 }, { "epoch": 22.07, "learning_rate": 6.983375163805167e-08, "loss": 3.7926, "step": 1987000 }, { "epoch": 22.07, "learning_rate": 6.98198698443018e-08, "loss": 3.7992, "step": 1987500 }, { "epoch": 22.08, "learning_rate": 6.980598805055194e-08, "loss": 3.7971, "step": 1988000 }, { "epoch": 22.08, "learning_rate": 6.979210625680208e-08, "loss": 3.7985, "step": 1988500 }, { "epoch": 22.09, "learning_rate": 6.977822446305221e-08, "loss": 3.7874, "step": 1989000 }, { "epoch": 22.09, "learning_rate": 6.976434266930235e-08, "loss": 3.7873, "step": 1989500 }, { "epoch": 22.1, "learning_rate": 6.97504608755525e-08, "loss": 3.7859, "step": 1990000 }, { "epoch": 22.11, "learning_rate": 6.973657908180262e-08, "loss": 3.8092, "step": 1990500 }, { "epoch": 22.11, "learning_rate": 6.972269728805277e-08, "loss": 3.7872, "step": 1991000 }, { "epoch": 22.12, "learning_rate": 6.970881549430291e-08, "loss": 3.7697, "step": 1991500 }, { "epoch": 22.12, "learning_rate": 6.969493370055305e-08, "loss": 3.7745, "step": 1992000 }, { "epoch": 22.13, "learning_rate": 6.968105190680319e-08, "loss": 3.7949, "step": 1992500 }, { "epoch": 22.13, "learning_rate": 6.966717011305333e-08, "loss": 3.7931, "step": 1993000 }, { "epoch": 22.14, "learning_rate": 6.965328831930346e-08, "loss": 3.7968, "step": 1993500 }, { "epoch": 22.14, "learning_rate": 6.96394065255536e-08, "loss": 3.7767, "step": 1994000 }, { "epoch": 22.15, "learning_rate": 6.962552473180375e-08, "loss": 3.8058, "step": 1994500 }, { "epoch": 22.16, "learning_rate": 6.961164293805388e-08, "loss": 3.7957, "step": 1995000 }, { "epoch": 22.16, "learning_rate": 6.959776114430402e-08, "loss": 3.7939, "step": 1995500 }, { "epoch": 22.17, "learning_rate": 6.958387935055415e-08, "loss": 3.7841, "step": 1996000 }, { "epoch": 22.17, "learning_rate": 6.956999755680429e-08, "loss": 3.7881, "step": 1996500 }, { "epoch": 22.18, "learning_rate": 6.955611576305443e-08, "loss": 3.7743, "step": 1997000 }, { "epoch": 22.18, "learning_rate": 6.954223396930457e-08, "loss": 3.8086, "step": 1997500 }, { "epoch": 22.19, "learning_rate": 6.952835217555472e-08, "loss": 3.7994, "step": 1998000 }, { "epoch": 22.19, "learning_rate": 6.951447038180486e-08, "loss": 3.8022, "step": 1998500 }, { "epoch": 22.2, "learning_rate": 6.950058858805499e-08, "loss": 3.7674, "step": 1999000 }, { "epoch": 22.21, "learning_rate": 6.948670679430513e-08, "loss": 3.7945, "step": 1999500 }, { "epoch": 22.21, "learning_rate": 6.947282500055527e-08, "loss": 3.7764, "step": 2000000 }, { "epoch": 22.22, "learning_rate": 6.945894320680542e-08, "loss": 3.7923, "step": 2000500 }, { "epoch": 22.22, "learning_rate": 6.944506141305554e-08, "loss": 3.7838, "step": 2001000 }, { "epoch": 22.23, "learning_rate": 6.943117961930567e-08, "loss": 3.7779, "step": 2001500 }, { "epoch": 22.23, "learning_rate": 6.941729782555582e-08, "loss": 3.7668, "step": 2002000 }, { "epoch": 22.24, "learning_rate": 6.940341603180596e-08, "loss": 3.7736, "step": 2002500 }, { "epoch": 22.24, "learning_rate": 6.93895342380561e-08, "loss": 3.7946, "step": 2003000 }, { "epoch": 22.25, "learning_rate": 6.937565244430624e-08, "loss": 3.8017, "step": 2003500 }, { "epoch": 22.26, "learning_rate": 6.936177065055638e-08, "loss": 3.79, "step": 2004000 }, { "epoch": 22.26, "learning_rate": 6.934788885680651e-08, "loss": 3.7758, "step": 2004500 }, { "epoch": 22.27, "learning_rate": 6.933400706305666e-08, "loss": 3.7753, "step": 2005000 }, { "epoch": 22.27, "learning_rate": 6.93201252693068e-08, "loss": 3.7827, "step": 2005500 }, { "epoch": 22.28, "learning_rate": 6.930624347555694e-08, "loss": 3.7772, "step": 2006000 }, { "epoch": 22.28, "learning_rate": 6.929236168180708e-08, "loss": 3.7935, "step": 2006500 }, { "epoch": 22.29, "learning_rate": 6.927847988805721e-08, "loss": 3.7779, "step": 2007000 }, { "epoch": 22.29, "learning_rate": 6.926459809430734e-08, "loss": 3.796, "step": 2007500 }, { "epoch": 22.3, "learning_rate": 6.925071630055748e-08, "loss": 3.762, "step": 2008000 }, { "epoch": 22.31, "learning_rate": 6.923683450680763e-08, "loss": 3.7926, "step": 2008500 }, { "epoch": 22.31, "learning_rate": 6.922295271305777e-08, "loss": 3.7823, "step": 2009000 }, { "epoch": 22.32, "learning_rate": 6.920907091930791e-08, "loss": 3.802, "step": 2009500 }, { "epoch": 22.32, "learning_rate": 6.919518912555804e-08, "loss": 3.7753, "step": 2010000 }, { "epoch": 22.33, "learning_rate": 6.918130733180818e-08, "loss": 3.7734, "step": 2010500 }, { "epoch": 22.33, "learning_rate": 6.916742553805832e-08, "loss": 3.7947, "step": 2011000 }, { "epoch": 22.34, "learning_rate": 6.915354374430847e-08, "loss": 3.7967, "step": 2011500 }, { "epoch": 22.34, "learning_rate": 6.913966195055861e-08, "loss": 3.7954, "step": 2012000 }, { "epoch": 22.35, "learning_rate": 6.912578015680875e-08, "loss": 3.7812, "step": 2012500 }, { "epoch": 22.36, "learning_rate": 6.911189836305888e-08, "loss": 3.7872, "step": 2013000 }, { "epoch": 22.36, "learning_rate": 6.909801656930901e-08, "loss": 3.7915, "step": 2013500 }, { "epoch": 22.37, "learning_rate": 6.908413477555915e-08, "loss": 3.7899, "step": 2014000 }, { "epoch": 22.37, "learning_rate": 6.907025298180929e-08, "loss": 3.7875, "step": 2014500 }, { "epoch": 22.38, "learning_rate": 6.905637118805944e-08, "loss": 3.7868, "step": 2015000 }, { "epoch": 22.38, "learning_rate": 6.904248939430956e-08, "loss": 3.8169, "step": 2015500 }, { "epoch": 22.39, "learning_rate": 6.90286076005597e-08, "loss": 3.7773, "step": 2016000 }, { "epoch": 22.39, "learning_rate": 6.901472580680985e-08, "loss": 3.7923, "step": 2016500 }, { "epoch": 22.4, "learning_rate": 6.900084401305999e-08, "loss": 3.7977, "step": 2017000 }, { "epoch": 22.41, "learning_rate": 6.898696221931013e-08, "loss": 3.7897, "step": 2017500 }, { "epoch": 22.41, "learning_rate": 6.897308042556028e-08, "loss": 3.7929, "step": 2018000 }, { "epoch": 22.42, "learning_rate": 6.89591986318104e-08, "loss": 3.8075, "step": 2018500 }, { "epoch": 22.42, "learning_rate": 6.894531683806055e-08, "loss": 3.7867, "step": 2019000 }, { "epoch": 22.43, "learning_rate": 6.893143504431068e-08, "loss": 3.7928, "step": 2019500 }, { "epoch": 22.43, "learning_rate": 6.891755325056082e-08, "loss": 3.7794, "step": 2020000 }, { "epoch": 22.44, "learning_rate": 6.890367145681096e-08, "loss": 3.7838, "step": 2020500 }, { "epoch": 22.44, "learning_rate": 6.88897896630611e-08, "loss": 3.7674, "step": 2021000 }, { "epoch": 22.45, "learning_rate": 6.887590786931123e-08, "loss": 3.7865, "step": 2021500 }, { "epoch": 22.46, "learning_rate": 6.886202607556137e-08, "loss": 3.7721, "step": 2022000 }, { "epoch": 22.46, "learning_rate": 6.884814428181152e-08, "loss": 3.7869, "step": 2022500 }, { "epoch": 22.47, "learning_rate": 6.883426248806166e-08, "loss": 3.785, "step": 2023000 }, { "epoch": 22.47, "learning_rate": 6.88203806943118e-08, "loss": 3.7929, "step": 2023500 }, { "epoch": 22.48, "learning_rate": 6.880649890056193e-08, "loss": 3.7761, "step": 2024000 }, { "epoch": 22.48, "learning_rate": 6.879261710681207e-08, "loss": 3.7886, "step": 2024500 }, { "epoch": 22.49, "learning_rate": 6.877873531306221e-08, "loss": 3.7965, "step": 2025000 }, { "epoch": 22.49, "learning_rate": 6.876485351931234e-08, "loss": 3.7879, "step": 2025500 }, { "epoch": 22.5, "learning_rate": 6.875097172556249e-08, "loss": 3.7966, "step": 2026000 }, { "epoch": 22.51, "learning_rate": 6.873708993181263e-08, "loss": 3.7729, "step": 2026500 }, { "epoch": 22.51, "learning_rate": 6.872320813806276e-08, "loss": 3.7856, "step": 2027000 }, { "epoch": 22.52, "learning_rate": 6.87093263443129e-08, "loss": 3.7729, "step": 2027500 }, { "epoch": 22.52, "learning_rate": 6.869544455056304e-08, "loss": 3.7935, "step": 2028000 }, { "epoch": 22.53, "learning_rate": 6.868156275681318e-08, "loss": 3.7977, "step": 2028500 }, { "epoch": 22.53, "learning_rate": 6.866768096306333e-08, "loss": 3.7955, "step": 2029000 }, { "epoch": 22.54, "learning_rate": 6.865379916931347e-08, "loss": 3.7826, "step": 2029500 }, { "epoch": 22.54, "learning_rate": 6.86399173755636e-08, "loss": 3.7743, "step": 2030000 }, { "epoch": 22.55, "learning_rate": 6.862603558181374e-08, "loss": 3.8023, "step": 2030500 }, { "epoch": 22.56, "learning_rate": 6.861215378806388e-08, "loss": 3.7918, "step": 2031000 }, { "epoch": 22.56, "learning_rate": 6.859827199431401e-08, "loss": 3.7863, "step": 2031500 }, { "epoch": 22.57, "learning_rate": 6.858439020056415e-08, "loss": 3.7613, "step": 2032000 }, { "epoch": 22.57, "learning_rate": 6.857050840681428e-08, "loss": 3.7797, "step": 2032500 }, { "epoch": 22.58, "learning_rate": 6.855662661306442e-08, "loss": 3.798, "step": 2033000 }, { "epoch": 22.58, "learning_rate": 6.854274481931457e-08, "loss": 3.7839, "step": 2033500 }, { "epoch": 22.59, "learning_rate": 6.852886302556471e-08, "loss": 3.7743, "step": 2034000 }, { "epoch": 22.59, "learning_rate": 6.851498123181485e-08, "loss": 3.7922, "step": 2034500 }, { "epoch": 22.6, "learning_rate": 6.850109943806499e-08, "loss": 3.7975, "step": 2035000 }, { "epoch": 22.61, "learning_rate": 6.848721764431512e-08, "loss": 3.8052, "step": 2035500 }, { "epoch": 22.61, "learning_rate": 6.847333585056526e-08, "loss": 3.7909, "step": 2036000 }, { "epoch": 22.62, "learning_rate": 6.845945405681541e-08, "loss": 3.7927, "step": 2036500 }, { "epoch": 22.62, "learning_rate": 6.844557226306555e-08, "loss": 3.7935, "step": 2037000 }, { "epoch": 22.63, "learning_rate": 6.843169046931568e-08, "loss": 3.7803, "step": 2037500 }, { "epoch": 22.63, "learning_rate": 6.841780867556581e-08, "loss": 3.7838, "step": 2038000 }, { "epoch": 22.64, "learning_rate": 6.840392688181595e-08, "loss": 3.7787, "step": 2038500 }, { "epoch": 22.64, "learning_rate": 6.839004508806609e-08, "loss": 3.7879, "step": 2039000 }, { "epoch": 22.65, "learning_rate": 6.837616329431623e-08, "loss": 3.7746, "step": 2039500 }, { "epoch": 22.66, "learning_rate": 6.836228150056638e-08, "loss": 3.784, "step": 2040000 }, { "epoch": 22.66, "learning_rate": 6.834839970681652e-08, "loss": 3.7763, "step": 2040500 }, { "epoch": 22.67, "learning_rate": 6.833451791306665e-08, "loss": 3.7761, "step": 2041000 }, { "epoch": 22.67, "learning_rate": 6.832063611931679e-08, "loss": 3.7993, "step": 2041500 }, { "epoch": 22.68, "learning_rate": 6.830675432556693e-08, "loss": 3.8021, "step": 2042000 }, { "epoch": 22.68, "learning_rate": 6.829287253181707e-08, "loss": 3.7867, "step": 2042500 }, { "epoch": 22.69, "learning_rate": 6.827899073806722e-08, "loss": 3.7863, "step": 2043000 }, { "epoch": 22.69, "learning_rate": 6.826510894431735e-08, "loss": 3.789, "step": 2043500 }, { "epoch": 22.7, "learning_rate": 6.825122715056747e-08, "loss": 3.7828, "step": 2044000 }, { "epoch": 22.71, "learning_rate": 6.823734535681762e-08, "loss": 3.7976, "step": 2044500 }, { "epoch": 22.71, "learning_rate": 6.822346356306776e-08, "loss": 3.7772, "step": 2045000 }, { "epoch": 22.72, "learning_rate": 6.82095817693179e-08, "loss": 3.7953, "step": 2045500 }, { "epoch": 22.72, "learning_rate": 6.819569997556804e-08, "loss": 3.7684, "step": 2046000 }, { "epoch": 22.73, "learning_rate": 6.818181818181817e-08, "loss": 3.7981, "step": 2046500 }, { "epoch": 22.73, "learning_rate": 6.816793638806831e-08, "loss": 3.7904, "step": 2047000 }, { "epoch": 22.74, "learning_rate": 6.815405459431846e-08, "loss": 3.7839, "step": 2047500 }, { "epoch": 22.74, "learning_rate": 6.81401728005686e-08, "loss": 3.7794, "step": 2048000 }, { "epoch": 22.75, "learning_rate": 6.812629100681874e-08, "loss": 3.78, "step": 2048500 }, { "epoch": 22.76, "learning_rate": 6.811240921306888e-08, "loss": 3.7841, "step": 2049000 }, { "epoch": 22.76, "learning_rate": 6.809852741931901e-08, "loss": 3.7841, "step": 2049500 }, { "epoch": 22.77, "learning_rate": 6.808464562556914e-08, "loss": 3.7819, "step": 2050000 }, { "epoch": 22.77, "learning_rate": 6.807076383181928e-08, "loss": 3.786, "step": 2050500 }, { "epoch": 22.78, "learning_rate": 6.805688203806943e-08, "loss": 3.8044, "step": 2051000 }, { "epoch": 22.78, "learning_rate": 6.804300024431957e-08, "loss": 3.7804, "step": 2051500 }, { "epoch": 22.79, "learning_rate": 6.80291184505697e-08, "loss": 3.7735, "step": 2052000 }, { "epoch": 22.79, "learning_rate": 6.801523665681984e-08, "loss": 3.8017, "step": 2052500 }, { "epoch": 22.8, "learning_rate": 6.800135486306998e-08, "loss": 3.7766, "step": 2053000 }, { "epoch": 22.81, "learning_rate": 6.798747306932012e-08, "loss": 3.7809, "step": 2053500 }, { "epoch": 22.81, "learning_rate": 6.797359127557027e-08, "loss": 3.7856, "step": 2054000 }, { "epoch": 22.82, "learning_rate": 6.795970948182041e-08, "loss": 3.7727, "step": 2054500 }, { "epoch": 22.82, "learning_rate": 6.794582768807054e-08, "loss": 3.798, "step": 2055000 }, { "epoch": 22.83, "learning_rate": 6.793194589432068e-08, "loss": 3.7815, "step": 2055500 }, { "epoch": 22.83, "learning_rate": 6.791806410057081e-08, "loss": 3.7905, "step": 2056000 }, { "epoch": 22.84, "learning_rate": 6.790418230682095e-08, "loss": 3.7875, "step": 2056500 }, { "epoch": 22.84, "learning_rate": 6.78903005130711e-08, "loss": 3.7908, "step": 2057000 }, { "epoch": 22.85, "learning_rate": 6.787641871932124e-08, "loss": 3.7915, "step": 2057500 }, { "epoch": 22.85, "learning_rate": 6.786253692557137e-08, "loss": 3.7781, "step": 2058000 }, { "epoch": 22.86, "learning_rate": 6.784865513182151e-08, "loss": 3.7852, "step": 2058500 }, { "epoch": 22.87, "learning_rate": 6.783477333807165e-08, "loss": 3.796, "step": 2059000 }, { "epoch": 22.87, "learning_rate": 6.782089154432179e-08, "loss": 3.7695, "step": 2059500 }, { "epoch": 22.88, "learning_rate": 6.780700975057193e-08, "loss": 3.7731, "step": 2060000 }, { "epoch": 22.88, "learning_rate": 6.779312795682206e-08, "loss": 3.7799, "step": 2060500 }, { "epoch": 22.89, "learning_rate": 6.77792461630722e-08, "loss": 3.7956, "step": 2061000 }, { "epoch": 22.89, "learning_rate": 6.776536436932235e-08, "loss": 3.7715, "step": 2061500 }, { "epoch": 22.9, "learning_rate": 6.775148257557248e-08, "loss": 3.7685, "step": 2062000 }, { "epoch": 22.9, "learning_rate": 6.773760078182262e-08, "loss": 3.7821, "step": 2062500 }, { "epoch": 22.91, "learning_rate": 6.772371898807276e-08, "loss": 3.7799, "step": 2063000 }, { "epoch": 22.92, "learning_rate": 6.770983719432289e-08, "loss": 3.7798, "step": 2063500 }, { "epoch": 22.92, "learning_rate": 6.769595540057303e-08, "loss": 3.8001, "step": 2064000 }, { "epoch": 22.93, "learning_rate": 6.768207360682318e-08, "loss": 3.761, "step": 2064500 }, { "epoch": 22.93, "learning_rate": 6.766819181307332e-08, "loss": 3.7929, "step": 2065000 }, { "epoch": 22.94, "learning_rate": 6.765431001932346e-08, "loss": 3.7812, "step": 2065500 }, { "epoch": 22.94, "learning_rate": 6.76404282255736e-08, "loss": 3.7751, "step": 2066000 }, { "epoch": 22.95, "learning_rate": 6.762654643182373e-08, "loss": 3.778, "step": 2066500 }, { "epoch": 22.95, "learning_rate": 6.761266463807387e-08, "loss": 3.7746, "step": 2067000 }, { "epoch": 22.96, "learning_rate": 6.759878284432402e-08, "loss": 3.7818, "step": 2067500 }, { "epoch": 22.97, "learning_rate": 6.758490105057414e-08, "loss": 3.7983, "step": 2068000 }, { "epoch": 22.97, "learning_rate": 6.757101925682429e-08, "loss": 3.7863, "step": 2068500 }, { "epoch": 22.98, "learning_rate": 6.755713746307442e-08, "loss": 3.7871, "step": 2069000 }, { "epoch": 22.98, "learning_rate": 6.754325566932456e-08, "loss": 3.8034, "step": 2069500 }, { "epoch": 22.99, "learning_rate": 6.75293738755747e-08, "loss": 3.7894, "step": 2070000 }, { "epoch": 22.99, "learning_rate": 6.751549208182484e-08, "loss": 3.789, "step": 2070500 }, { "epoch": 23.0, "learning_rate": 6.750161028807498e-08, "loss": 3.7924, "step": 2071000 }, { "epoch": 23.0, "eval_loss": 3.8470423221588135, "eval_runtime": 6.312, "eval_samples_per_second": 246.197, "step": 2071058 }, { "epoch": 23.0, "learning_rate": 6.748772849432513e-08, "loss": 3.7791, "step": 2071500 }, { "epoch": 23.01, "learning_rate": 6.747384670057526e-08, "loss": 3.788, "step": 2072000 }, { "epoch": 23.02, "learning_rate": 6.74599649068254e-08, "loss": 3.7828, "step": 2072500 }, { "epoch": 23.02, "learning_rate": 6.744608311307554e-08, "loss": 3.781, "step": 2073000 }, { "epoch": 23.03, "learning_rate": 6.743220131932568e-08, "loss": 3.7739, "step": 2073500 }, { "epoch": 23.03, "learning_rate": 6.741831952557581e-08, "loss": 3.7775, "step": 2074000 }, { "epoch": 23.04, "learning_rate": 6.740443773182594e-08, "loss": 3.7998, "step": 2074500 }, { "epoch": 23.04, "learning_rate": 6.739055593807608e-08, "loss": 3.7705, "step": 2075000 }, { "epoch": 23.05, "learning_rate": 6.737667414432623e-08, "loss": 3.7864, "step": 2075500 }, { "epoch": 23.05, "learning_rate": 6.736279235057637e-08, "loss": 3.7928, "step": 2076000 }, { "epoch": 23.06, "learning_rate": 6.734891055682651e-08, "loss": 3.7908, "step": 2076500 }, { "epoch": 23.07, "learning_rate": 6.733502876307665e-08, "loss": 3.778, "step": 2077000 }, { "epoch": 23.07, "learning_rate": 6.732114696932678e-08, "loss": 3.7924, "step": 2077500 }, { "epoch": 23.08, "learning_rate": 6.730726517557692e-08, "loss": 3.79, "step": 2078000 }, { "epoch": 23.08, "learning_rate": 6.729338338182707e-08, "loss": 3.781, "step": 2078500 }, { "epoch": 23.09, "learning_rate": 6.727950158807721e-08, "loss": 3.7905, "step": 2079000 }, { "epoch": 23.09, "learning_rate": 6.726561979432735e-08, "loss": 3.7688, "step": 2079500 }, { "epoch": 23.1, "learning_rate": 6.725173800057748e-08, "loss": 3.7778, "step": 2080000 }, { "epoch": 23.1, "learning_rate": 6.723785620682761e-08, "loss": 3.7955, "step": 2080500 }, { "epoch": 23.11, "learning_rate": 6.722397441307775e-08, "loss": 3.7835, "step": 2081000 }, { "epoch": 23.12, "learning_rate": 6.721009261932789e-08, "loss": 3.7987, "step": 2081500 }, { "epoch": 23.12, "learning_rate": 6.719621082557804e-08, "loss": 3.7814, "step": 2082000 }, { "epoch": 23.13, "learning_rate": 6.718232903182818e-08, "loss": 3.7963, "step": 2082500 }, { "epoch": 23.13, "learning_rate": 6.71684472380783e-08, "loss": 3.7843, "step": 2083000 }, { "epoch": 23.14, "learning_rate": 6.715456544432845e-08, "loss": 3.7916, "step": 2083500 }, { "epoch": 23.14, "learning_rate": 6.714068365057859e-08, "loss": 3.7684, "step": 2084000 }, { "epoch": 23.15, "learning_rate": 6.712680185682873e-08, "loss": 3.784, "step": 2084500 }, { "epoch": 23.15, "learning_rate": 6.711292006307888e-08, "loss": 3.7906, "step": 2085000 }, { "epoch": 23.16, "learning_rate": 6.709903826932902e-08, "loss": 3.8045, "step": 2085500 }, { "epoch": 23.17, "learning_rate": 6.708515647557915e-08, "loss": 3.7896, "step": 2086000 }, { "epoch": 23.17, "learning_rate": 6.707127468182928e-08, "loss": 3.7908, "step": 2086500 }, { "epoch": 23.18, "learning_rate": 6.705739288807942e-08, "loss": 3.7766, "step": 2087000 }, { "epoch": 23.18, "learning_rate": 6.704351109432956e-08, "loss": 3.7789, "step": 2087500 }, { "epoch": 23.19, "learning_rate": 6.70296293005797e-08, "loss": 3.7694, "step": 2088000 }, { "epoch": 23.19, "learning_rate": 6.701574750682984e-08, "loss": 3.7662, "step": 2088500 }, { "epoch": 23.2, "learning_rate": 6.700186571307997e-08, "loss": 3.7678, "step": 2089000 }, { "epoch": 23.2, "learning_rate": 6.698798391933012e-08, "loss": 3.7835, "step": 2089500 }, { "epoch": 23.21, "learning_rate": 6.697410212558026e-08, "loss": 3.7938, "step": 2090000 }, { "epoch": 23.22, "learning_rate": 6.69602203318304e-08, "loss": 3.8115, "step": 2090500 }, { "epoch": 23.22, "learning_rate": 6.694633853808054e-08, "loss": 3.7777, "step": 2091000 }, { "epoch": 23.23, "learning_rate": 6.693245674433067e-08, "loss": 3.7758, "step": 2091500 }, { "epoch": 23.23, "learning_rate": 6.691857495058081e-08, "loss": 3.8004, "step": 2092000 }, { "epoch": 23.24, "learning_rate": 6.690469315683094e-08, "loss": 3.7832, "step": 2092500 }, { "epoch": 23.24, "learning_rate": 6.689081136308109e-08, "loss": 3.788, "step": 2093000 }, { "epoch": 23.25, "learning_rate": 6.687692956933123e-08, "loss": 3.7774, "step": 2093500 }, { "epoch": 23.25, "learning_rate": 6.686304777558137e-08, "loss": 3.786, "step": 2094000 }, { "epoch": 23.26, "learning_rate": 6.68491659818315e-08, "loss": 3.7752, "step": 2094500 }, { "epoch": 23.27, "learning_rate": 6.683528418808164e-08, "loss": 3.7946, "step": 2095000 }, { "epoch": 23.27, "learning_rate": 6.682140239433178e-08, "loss": 3.7814, "step": 2095500 }, { "epoch": 23.28, "learning_rate": 6.680752060058193e-08, "loss": 3.7875, "step": 2096000 }, { "epoch": 23.28, "learning_rate": 6.679363880683207e-08, "loss": 3.7734, "step": 2096500 }, { "epoch": 23.29, "learning_rate": 6.67797570130822e-08, "loss": 3.7906, "step": 2097000 }, { "epoch": 23.29, "learning_rate": 6.676587521933234e-08, "loss": 3.7844, "step": 2097500 }, { "epoch": 23.3, "learning_rate": 6.675199342558248e-08, "loss": 3.7729, "step": 2098000 }, { "epoch": 23.3, "learning_rate": 6.673811163183261e-08, "loss": 3.7776, "step": 2098500 }, { "epoch": 23.31, "learning_rate": 6.672422983808275e-08, "loss": 3.7633, "step": 2099000 }, { "epoch": 23.32, "learning_rate": 6.67103480443329e-08, "loss": 3.7919, "step": 2099500 }, { "epoch": 23.32, "learning_rate": 6.669646625058302e-08, "loss": 3.7902, "step": 2100000 }, { "epoch": 23.33, "learning_rate": 6.668258445683317e-08, "loss": 3.7825, "step": 2100500 }, { "epoch": 23.33, "learning_rate": 6.666870266308331e-08, "loss": 3.779, "step": 2101000 }, { "epoch": 23.34, "learning_rate": 6.665482086933345e-08, "loss": 3.7957, "step": 2101500 }, { "epoch": 23.34, "learning_rate": 6.66409390755836e-08, "loss": 3.7888, "step": 2102000 }, { "epoch": 23.35, "learning_rate": 6.662705728183374e-08, "loss": 3.7831, "step": 2102500 }, { "epoch": 23.35, "learning_rate": 6.661317548808386e-08, "loss": 3.7662, "step": 2103000 }, { "epoch": 23.36, "learning_rate": 6.659929369433401e-08, "loss": 3.7787, "step": 2103500 }, { "epoch": 23.37, "learning_rate": 6.658541190058415e-08, "loss": 3.7825, "step": 2104000 }, { "epoch": 23.37, "learning_rate": 6.657153010683428e-08, "loss": 3.7757, "step": 2104500 }, { "epoch": 23.38, "learning_rate": 6.655764831308442e-08, "loss": 3.7915, "step": 2105000 }, { "epoch": 23.38, "learning_rate": 6.654376651933455e-08, "loss": 3.7791, "step": 2105500 }, { "epoch": 23.39, "learning_rate": 6.652988472558469e-08, "loss": 3.7841, "step": 2106000 }, { "epoch": 23.39, "learning_rate": 6.651600293183483e-08, "loss": 3.7806, "step": 2106500 }, { "epoch": 23.4, "learning_rate": 6.650212113808498e-08, "loss": 3.7761, "step": 2107000 }, { "epoch": 23.4, "learning_rate": 6.648823934433512e-08, "loss": 3.7916, "step": 2107500 }, { "epoch": 23.41, "learning_rate": 6.647435755058526e-08, "loss": 3.7705, "step": 2108000 }, { "epoch": 23.42, "learning_rate": 6.646047575683539e-08, "loss": 3.7778, "step": 2108500 }, { "epoch": 23.42, "learning_rate": 6.644659396308553e-08, "loss": 3.7875, "step": 2109000 }, { "epoch": 23.43, "learning_rate": 6.643271216933567e-08, "loss": 3.794, "step": 2109500 }, { "epoch": 23.43, "learning_rate": 6.641883037558582e-08, "loss": 3.7824, "step": 2110000 }, { "epoch": 23.44, "learning_rate": 6.640494858183595e-08, "loss": 3.7773, "step": 2110500 }, { "epoch": 23.44, "learning_rate": 6.639106678808607e-08, "loss": 3.7975, "step": 2111000 }, { "epoch": 23.45, "learning_rate": 6.637718499433622e-08, "loss": 3.8004, "step": 2111500 }, { "epoch": 23.45, "learning_rate": 6.636330320058636e-08, "loss": 3.7899, "step": 2112000 }, { "epoch": 23.46, "learning_rate": 6.63494214068365e-08, "loss": 3.7871, "step": 2112500 }, { "epoch": 23.47, "learning_rate": 6.633553961308664e-08, "loss": 3.7853, "step": 2113000 }, { "epoch": 23.47, "learning_rate": 6.632165781933679e-08, "loss": 3.7827, "step": 2113500 }, { "epoch": 23.48, "learning_rate": 6.630777602558692e-08, "loss": 3.7907, "step": 2114000 }, { "epoch": 23.48, "learning_rate": 6.629389423183706e-08, "loss": 3.7943, "step": 2114500 }, { "epoch": 23.49, "learning_rate": 6.62800124380872e-08, "loss": 3.793, "step": 2115000 }, { "epoch": 23.49, "learning_rate": 6.626613064433734e-08, "loss": 3.7769, "step": 2115500 }, { "epoch": 23.5, "learning_rate": 6.625224885058748e-08, "loss": 3.7895, "step": 2116000 }, { "epoch": 23.5, "learning_rate": 6.623836705683761e-08, "loss": 3.7738, "step": 2116500 }, { "epoch": 23.51, "learning_rate": 6.622448526308774e-08, "loss": 3.7806, "step": 2117000 }, { "epoch": 23.52, "learning_rate": 6.621060346933788e-08, "loss": 3.7863, "step": 2117500 }, { "epoch": 23.52, "learning_rate": 6.619672167558803e-08, "loss": 3.7868, "step": 2118000 }, { "epoch": 23.53, "learning_rate": 6.618283988183817e-08, "loss": 3.7666, "step": 2118500 }, { "epoch": 23.53, "learning_rate": 6.616895808808831e-08, "loss": 3.8007, "step": 2119000 }, { "epoch": 23.54, "learning_rate": 6.615507629433844e-08, "loss": 3.7921, "step": 2119500 }, { "epoch": 23.54, "learning_rate": 6.614119450058858e-08, "loss": 3.7645, "step": 2120000 }, { "epoch": 23.55, "learning_rate": 6.612731270683872e-08, "loss": 3.7755, "step": 2120500 }, { "epoch": 23.55, "learning_rate": 6.611343091308887e-08, "loss": 3.7808, "step": 2121000 }, { "epoch": 23.56, "learning_rate": 6.609954911933901e-08, "loss": 3.7815, "step": 2121500 }, { "epoch": 23.57, "learning_rate": 6.608566732558914e-08, "loss": 3.8045, "step": 2122000 }, { "epoch": 23.57, "learning_rate": 6.607178553183928e-08, "loss": 3.7643, "step": 2122500 }, { "epoch": 23.58, "learning_rate": 6.605790373808941e-08, "loss": 3.7958, "step": 2123000 }, { "epoch": 23.58, "learning_rate": 6.604402194433955e-08, "loss": 3.8002, "step": 2123500 }, { "epoch": 23.59, "learning_rate": 6.60301401505897e-08, "loss": 3.7913, "step": 2124000 }, { "epoch": 23.59, "learning_rate": 6.601625835683984e-08, "loss": 3.7967, "step": 2124500 }, { "epoch": 23.6, "learning_rate": 6.600237656308998e-08, "loss": 3.7851, "step": 2125000 }, { "epoch": 23.6, "learning_rate": 6.598849476934011e-08, "loss": 3.7898, "step": 2125500 }, { "epoch": 23.61, "learning_rate": 6.597461297559025e-08, "loss": 3.7706, "step": 2126000 }, { "epoch": 23.62, "learning_rate": 6.596073118184039e-08, "loss": 3.7717, "step": 2126500 }, { "epoch": 23.62, "learning_rate": 6.594684938809053e-08, "loss": 3.793, "step": 2127000 }, { "epoch": 23.63, "learning_rate": 6.593296759434068e-08, "loss": 3.7624, "step": 2127500 }, { "epoch": 23.63, "learning_rate": 6.59190858005908e-08, "loss": 3.8061, "step": 2128000 }, { "epoch": 23.64, "learning_rate": 6.590520400684095e-08, "loss": 3.7868, "step": 2128500 }, { "epoch": 23.64, "learning_rate": 6.589132221309108e-08, "loss": 3.7649, "step": 2129000 }, { "epoch": 23.65, "learning_rate": 6.587744041934122e-08, "loss": 3.7737, "step": 2129500 }, { "epoch": 23.65, "learning_rate": 6.586355862559136e-08, "loss": 3.7802, "step": 2130000 }, { "epoch": 23.66, "learning_rate": 6.58496768318415e-08, "loss": 3.777, "step": 2130500 }, { "epoch": 23.67, "learning_rate": 6.583579503809163e-08, "loss": 3.7966, "step": 2131000 }, { "epoch": 23.67, "learning_rate": 6.582191324434178e-08, "loss": 3.784, "step": 2131500 }, { "epoch": 23.68, "learning_rate": 6.580803145059192e-08, "loss": 3.7752, "step": 2132000 }, { "epoch": 23.68, "learning_rate": 6.579414965684206e-08, "loss": 3.7828, "step": 2132500 }, { "epoch": 23.69, "learning_rate": 6.57802678630922e-08, "loss": 3.7681, "step": 2133000 }, { "epoch": 23.69, "learning_rate": 6.576638606934233e-08, "loss": 3.7744, "step": 2133500 }, { "epoch": 23.7, "learning_rate": 6.575250427559247e-08, "loss": 3.7893, "step": 2134000 }, { "epoch": 23.7, "learning_rate": 6.573862248184262e-08, "loss": 3.77, "step": 2134500 }, { "epoch": 23.71, "learning_rate": 6.572474068809274e-08, "loss": 3.7745, "step": 2135000 }, { "epoch": 23.72, "learning_rate": 6.571085889434289e-08, "loss": 3.786, "step": 2135500 }, { "epoch": 23.72, "learning_rate": 6.569697710059303e-08, "loss": 3.7672, "step": 2136000 }, { "epoch": 23.73, "learning_rate": 6.568309530684316e-08, "loss": 3.8036, "step": 2136500 }, { "epoch": 23.73, "learning_rate": 6.56692135130933e-08, "loss": 3.7874, "step": 2137000 }, { "epoch": 23.74, "learning_rate": 6.565533171934344e-08, "loss": 3.765, "step": 2137500 }, { "epoch": 23.74, "learning_rate": 6.564144992559358e-08, "loss": 3.7837, "step": 2138000 }, { "epoch": 23.75, "learning_rate": 6.562756813184373e-08, "loss": 3.7807, "step": 2138500 }, { "epoch": 23.75, "learning_rate": 6.561368633809387e-08, "loss": 3.7927, "step": 2139000 }, { "epoch": 23.76, "learning_rate": 6.5599804544344e-08, "loss": 3.768, "step": 2139500 }, { "epoch": 23.77, "learning_rate": 6.558592275059414e-08, "loss": 3.7887, "step": 2140000 }, { "epoch": 23.77, "learning_rate": 6.557204095684428e-08, "loss": 3.775, "step": 2140500 }, { "epoch": 23.78, "learning_rate": 6.555815916309441e-08, "loss": 3.7894, "step": 2141000 }, { "epoch": 23.78, "learning_rate": 6.554427736934455e-08, "loss": 3.7793, "step": 2141500 }, { "epoch": 23.79, "learning_rate": 6.553039557559468e-08, "loss": 3.7877, "step": 2142000 }, { "epoch": 23.79, "learning_rate": 6.551651378184483e-08, "loss": 3.7821, "step": 2142500 }, { "epoch": 23.8, "learning_rate": 6.550263198809497e-08, "loss": 3.7771, "step": 2143000 }, { "epoch": 23.8, "learning_rate": 6.548875019434511e-08, "loss": 3.7745, "step": 2143500 }, { "epoch": 23.81, "learning_rate": 6.547486840059525e-08, "loss": 3.7832, "step": 2144000 }, { "epoch": 23.82, "learning_rate": 6.54609866068454e-08, "loss": 3.7857, "step": 2144500 }, { "epoch": 23.82, "learning_rate": 6.544710481309552e-08, "loss": 3.7874, "step": 2145000 }, { "epoch": 23.83, "learning_rate": 6.543322301934567e-08, "loss": 3.7822, "step": 2145500 }, { "epoch": 23.83, "learning_rate": 6.541934122559581e-08, "loss": 3.789, "step": 2146000 }, { "epoch": 23.84, "learning_rate": 6.540545943184595e-08, "loss": 3.7924, "step": 2146500 }, { "epoch": 23.84, "learning_rate": 6.539157763809608e-08, "loss": 3.7907, "step": 2147000 }, { "epoch": 23.85, "learning_rate": 6.537769584434621e-08, "loss": 3.7795, "step": 2147500 }, { "epoch": 23.85, "learning_rate": 6.536381405059635e-08, "loss": 3.7665, "step": 2148000 }, { "epoch": 23.86, "learning_rate": 6.534993225684649e-08, "loss": 3.7786, "step": 2148500 }, { "epoch": 23.87, "learning_rate": 6.533605046309664e-08, "loss": 3.7822, "step": 2149000 }, { "epoch": 23.87, "learning_rate": 6.532216866934678e-08, "loss": 3.7764, "step": 2149500 }, { "epoch": 23.88, "learning_rate": 6.530828687559692e-08, "loss": 3.7871, "step": 2150000 }, { "epoch": 23.88, "learning_rate": 6.529440508184705e-08, "loss": 3.7928, "step": 2150500 }, { "epoch": 23.89, "learning_rate": 6.528052328809719e-08, "loss": 3.787, "step": 2151000 }, { "epoch": 23.89, "learning_rate": 6.526664149434733e-08, "loss": 3.7874, "step": 2151500 }, { "epoch": 23.9, "learning_rate": 6.525275970059748e-08, "loss": 3.7823, "step": 2152000 }, { "epoch": 23.9, "learning_rate": 6.52388779068476e-08, "loss": 3.7776, "step": 2152500 }, { "epoch": 23.91, "learning_rate": 6.522499611309775e-08, "loss": 3.7992, "step": 2153000 }, { "epoch": 23.92, "learning_rate": 6.521111431934788e-08, "loss": 3.7724, "step": 2153500 }, { "epoch": 23.92, "learning_rate": 6.519723252559802e-08, "loss": 3.7752, "step": 2154000 }, { "epoch": 23.93, "learning_rate": 6.518335073184816e-08, "loss": 3.7777, "step": 2154500 }, { "epoch": 23.93, "learning_rate": 6.51694689380983e-08, "loss": 3.7808, "step": 2155000 }, { "epoch": 23.94, "learning_rate": 6.515558714434845e-08, "loss": 3.7812, "step": 2155500 }, { "epoch": 23.94, "learning_rate": 6.514170535059857e-08, "loss": 3.7915, "step": 2156000 }, { "epoch": 23.95, "learning_rate": 6.512782355684872e-08, "loss": 3.7822, "step": 2156500 }, { "epoch": 23.95, "learning_rate": 6.511394176309886e-08, "loss": 3.7834, "step": 2157000 }, { "epoch": 23.96, "learning_rate": 6.5100059969349e-08, "loss": 3.7774, "step": 2157500 }, { "epoch": 23.97, "learning_rate": 6.508617817559914e-08, "loss": 3.7848, "step": 2158000 }, { "epoch": 23.97, "learning_rate": 6.507229638184927e-08, "loss": 3.7703, "step": 2158500 }, { "epoch": 23.98, "learning_rate": 6.505841458809941e-08, "loss": 3.7652, "step": 2159000 }, { "epoch": 23.98, "learning_rate": 6.504453279434954e-08, "loss": 3.7697, "step": 2159500 }, { "epoch": 23.99, "learning_rate": 6.503065100059969e-08, "loss": 3.7894, "step": 2160000 }, { "epoch": 23.99, "learning_rate": 6.501676920684983e-08, "loss": 3.7768, "step": 2160500 }, { "epoch": 24.0, "learning_rate": 6.500288741309997e-08, "loss": 3.7882, "step": 2161000 }, { "epoch": 24.0, "eval_loss": 3.845104455947876, "eval_runtime": 6.3115, "eval_samples_per_second": 246.219, "step": 2161104 }, { "epoch": 24.0, "learning_rate": 6.498900561935011e-08, "loss": 3.799, "step": 2161500 }, { "epoch": 24.01, "learning_rate": 6.497512382560024e-08, "loss": 3.8033, "step": 2162000 }, { "epoch": 24.02, "learning_rate": 6.496124203185038e-08, "loss": 3.7733, "step": 2162500 }, { "epoch": 24.02, "learning_rate": 6.494736023810053e-08, "loss": 3.7673, "step": 2163000 }, { "epoch": 24.03, "learning_rate": 6.493347844435067e-08, "loss": 3.7686, "step": 2163500 }, { "epoch": 24.03, "learning_rate": 6.491959665060081e-08, "loss": 3.7849, "step": 2164000 }, { "epoch": 24.04, "learning_rate": 6.490571485685094e-08, "loss": 3.7871, "step": 2164500 }, { "epoch": 24.04, "learning_rate": 6.489183306310108e-08, "loss": 3.7774, "step": 2165000 }, { "epoch": 24.05, "learning_rate": 6.487795126935121e-08, "loss": 3.7842, "step": 2165500 }, { "epoch": 24.05, "learning_rate": 6.486406947560135e-08, "loss": 3.781, "step": 2166000 }, { "epoch": 24.06, "learning_rate": 6.48501876818515e-08, "loss": 3.7829, "step": 2166500 }, { "epoch": 24.07, "learning_rate": 6.483630588810164e-08, "loss": 3.7887, "step": 2167000 }, { "epoch": 24.07, "learning_rate": 6.482242409435177e-08, "loss": 3.8049, "step": 2167500 }, { "epoch": 24.08, "learning_rate": 6.480854230060191e-08, "loss": 3.773, "step": 2168000 }, { "epoch": 24.08, "learning_rate": 6.479466050685205e-08, "loss": 3.7746, "step": 2168500 }, { "epoch": 24.09, "learning_rate": 6.47807787131022e-08, "loss": 3.7972, "step": 2169000 }, { "epoch": 24.09, "learning_rate": 6.476689691935234e-08, "loss": 3.792, "step": 2169500 }, { "epoch": 24.1, "learning_rate": 6.475301512560246e-08, "loss": 3.7721, "step": 2170000 }, { "epoch": 24.1, "learning_rate": 6.473913333185261e-08, "loss": 3.7841, "step": 2170500 }, { "epoch": 24.11, "learning_rate": 6.472525153810275e-08, "loss": 3.7675, "step": 2171000 }, { "epoch": 24.12, "learning_rate": 6.471136974435288e-08, "loss": 3.7843, "step": 2171500 }, { "epoch": 24.12, "learning_rate": 6.469748795060302e-08, "loss": 3.7789, "step": 2172000 }, { "epoch": 24.13, "learning_rate": 6.468360615685316e-08, "loss": 3.7668, "step": 2172500 }, { "epoch": 24.13, "learning_rate": 6.466972436310329e-08, "loss": 3.7814, "step": 2173000 }, { "epoch": 24.14, "learning_rate": 6.465584256935343e-08, "loss": 3.7736, "step": 2173500 }, { "epoch": 24.14, "learning_rate": 6.464196077560358e-08, "loss": 3.7837, "step": 2174000 }, { "epoch": 24.15, "learning_rate": 6.462807898185372e-08, "loss": 3.806, "step": 2174500 }, { "epoch": 24.15, "learning_rate": 6.461419718810386e-08, "loss": 3.7826, "step": 2175000 }, { "epoch": 24.16, "learning_rate": 6.4600315394354e-08, "loss": 3.7926, "step": 2175500 }, { "epoch": 24.17, "learning_rate": 6.458643360060413e-08, "loss": 3.7778, "step": 2176000 }, { "epoch": 24.17, "learning_rate": 6.457255180685427e-08, "loss": 3.7669, "step": 2176500 }, { "epoch": 24.18, "learning_rate": 6.455867001310442e-08, "loss": 3.7813, "step": 2177000 }, { "epoch": 24.18, "learning_rate": 6.454478821935455e-08, "loss": 3.7727, "step": 2177500 }, { "epoch": 24.19, "learning_rate": 6.453090642560469e-08, "loss": 3.7971, "step": 2178000 }, { "epoch": 24.19, "learning_rate": 6.451702463185482e-08, "loss": 3.7803, "step": 2178500 }, { "epoch": 24.2, "learning_rate": 6.450314283810496e-08, "loss": 3.7864, "step": 2179000 }, { "epoch": 24.2, "learning_rate": 6.44892610443551e-08, "loss": 3.7819, "step": 2179500 }, { "epoch": 24.21, "learning_rate": 6.447537925060524e-08, "loss": 3.7894, "step": 2180000 }, { "epoch": 24.22, "learning_rate": 6.446149745685539e-08, "loss": 3.7786, "step": 2180500 }, { "epoch": 24.22, "learning_rate": 6.444761566310553e-08, "loss": 3.7788, "step": 2181000 }, { "epoch": 24.23, "learning_rate": 6.443373386935566e-08, "loss": 3.7876, "step": 2181500 }, { "epoch": 24.23, "learning_rate": 6.44198520756058e-08, "loss": 3.7829, "step": 2182000 }, { "epoch": 24.24, "learning_rate": 6.440597028185594e-08, "loss": 3.7617, "step": 2182500 }, { "epoch": 24.24, "learning_rate": 6.439208848810607e-08, "loss": 3.7806, "step": 2183000 }, { "epoch": 24.25, "learning_rate": 6.437820669435621e-08, "loss": 3.7923, "step": 2183500 }, { "epoch": 24.25, "learning_rate": 6.436432490060636e-08, "loss": 3.7772, "step": 2184000 }, { "epoch": 24.26, "learning_rate": 6.435044310685648e-08, "loss": 3.7799, "step": 2184500 }, { "epoch": 24.27, "learning_rate": 6.433656131310663e-08, "loss": 3.787, "step": 2185000 }, { "epoch": 24.27, "learning_rate": 6.432267951935677e-08, "loss": 3.787, "step": 2185500 }, { "epoch": 24.28, "learning_rate": 6.430879772560691e-08, "loss": 3.7804, "step": 2186000 }, { "epoch": 24.28, "learning_rate": 6.429491593185705e-08, "loss": 3.7947, "step": 2186500 }, { "epoch": 24.29, "learning_rate": 6.428103413810718e-08, "loss": 3.7832, "step": 2187000 }, { "epoch": 24.29, "learning_rate": 6.426715234435733e-08, "loss": 3.7852, "step": 2187500 }, { "epoch": 24.3, "learning_rate": 6.425327055060747e-08, "loss": 3.7644, "step": 2188000 }, { "epoch": 24.3, "learning_rate": 6.423938875685761e-08, "loss": 3.8106, "step": 2188500 }, { "epoch": 24.31, "learning_rate": 6.422550696310774e-08, "loss": 3.7837, "step": 2189000 }, { "epoch": 24.32, "learning_rate": 6.421162516935788e-08, "loss": 3.7755, "step": 2189500 }, { "epoch": 24.32, "learning_rate": 6.419774337560801e-08, "loss": 3.7884, "step": 2190000 }, { "epoch": 24.33, "learning_rate": 6.418386158185815e-08, "loss": 3.7811, "step": 2190500 }, { "epoch": 24.33, "learning_rate": 6.41699797881083e-08, "loss": 3.7565, "step": 2191000 }, { "epoch": 24.34, "learning_rate": 6.415609799435844e-08, "loss": 3.7716, "step": 2191500 }, { "epoch": 24.34, "learning_rate": 6.414221620060858e-08, "loss": 3.7875, "step": 2192000 }, { "epoch": 24.35, "learning_rate": 6.412833440685871e-08, "loss": 3.7719, "step": 2192500 }, { "epoch": 24.35, "learning_rate": 6.411445261310885e-08, "loss": 3.7735, "step": 2193000 }, { "epoch": 24.36, "learning_rate": 6.410057081935899e-08, "loss": 3.7724, "step": 2193500 }, { "epoch": 24.37, "learning_rate": 6.408668902560913e-08, "loss": 3.7767, "step": 2194000 }, { "epoch": 24.37, "learning_rate": 6.407280723185928e-08, "loss": 3.787, "step": 2194500 }, { "epoch": 24.38, "learning_rate": 6.40589254381094e-08, "loss": 3.778, "step": 2195000 }, { "epoch": 24.38, "learning_rate": 6.404504364435955e-08, "loss": 3.7974, "step": 2195500 }, { "epoch": 24.39, "learning_rate": 6.403116185060968e-08, "loss": 3.7689, "step": 2196000 }, { "epoch": 24.39, "learning_rate": 6.401728005685982e-08, "loss": 3.7784, "step": 2196500 }, { "epoch": 24.4, "learning_rate": 6.400339826310996e-08, "loss": 3.7827, "step": 2197000 }, { "epoch": 24.4, "learning_rate": 6.39895164693601e-08, "loss": 3.7858, "step": 2197500 }, { "epoch": 24.41, "learning_rate": 6.397563467561025e-08, "loss": 3.7733, "step": 2198000 }, { "epoch": 24.42, "learning_rate": 6.396175288186038e-08, "loss": 3.7667, "step": 2198500 }, { "epoch": 24.42, "learning_rate": 6.394787108811052e-08, "loss": 3.7876, "step": 2199000 }, { "epoch": 24.43, "learning_rate": 6.393398929436066e-08, "loss": 3.7564, "step": 2199500 }, { "epoch": 24.43, "learning_rate": 6.39201075006108e-08, "loss": 3.7918, "step": 2200000 }, { "epoch": 24.44, "learning_rate": 6.390622570686094e-08, "loss": 3.7811, "step": 2200500 }, { "epoch": 24.44, "learning_rate": 6.389234391311107e-08, "loss": 3.7851, "step": 2201000 }, { "epoch": 24.45, "learning_rate": 6.387846211936122e-08, "loss": 3.7748, "step": 2201500 }, { "epoch": 24.45, "learning_rate": 6.386458032561134e-08, "loss": 3.7831, "step": 2202000 }, { "epoch": 24.46, "learning_rate": 6.385069853186149e-08, "loss": 3.7806, "step": 2202500 }, { "epoch": 24.47, "learning_rate": 6.383681673811163e-08, "loss": 3.7971, "step": 2203000 }, { "epoch": 24.47, "learning_rate": 6.382293494436177e-08, "loss": 3.7712, "step": 2203500 }, { "epoch": 24.48, "learning_rate": 6.38090531506119e-08, "loss": 3.7673, "step": 2204000 }, { "epoch": 24.48, "learning_rate": 6.379517135686204e-08, "loss": 3.7984, "step": 2204500 }, { "epoch": 24.49, "learning_rate": 6.378128956311219e-08, "loss": 3.7709, "step": 2205000 }, { "epoch": 24.49, "learning_rate": 6.376740776936233e-08, "loss": 3.7762, "step": 2205500 }, { "epoch": 24.5, "learning_rate": 6.375352597561247e-08, "loss": 3.7866, "step": 2206000 }, { "epoch": 24.5, "learning_rate": 6.37396441818626e-08, "loss": 3.7719, "step": 2206500 }, { "epoch": 24.51, "learning_rate": 6.372576238811274e-08, "loss": 3.7738, "step": 2207000 }, { "epoch": 24.52, "learning_rate": 6.371188059436288e-08, "loss": 3.7794, "step": 2207500 }, { "epoch": 24.52, "learning_rate": 6.369799880061301e-08, "loss": 3.7505, "step": 2208000 }, { "epoch": 24.53, "learning_rate": 6.368411700686315e-08, "loss": 3.7902, "step": 2208500 }, { "epoch": 24.53, "learning_rate": 6.36702352131133e-08, "loss": 3.7845, "step": 2209000 }, { "epoch": 24.54, "learning_rate": 6.365635341936343e-08, "loss": 3.7939, "step": 2209500 }, { "epoch": 24.54, "learning_rate": 6.364247162561357e-08, "loss": 3.7785, "step": 2210000 }, { "epoch": 24.55, "learning_rate": 6.362858983186371e-08, "loss": 3.7825, "step": 2210500 }, { "epoch": 24.55, "learning_rate": 6.361470803811385e-08, "loss": 3.7744, "step": 2211000 }, { "epoch": 24.56, "learning_rate": 6.3600826244364e-08, "loss": 3.7761, "step": 2211500 }, { "epoch": 24.57, "learning_rate": 6.358694445061414e-08, "loss": 3.7914, "step": 2212000 }, { "epoch": 24.57, "learning_rate": 6.357306265686427e-08, "loss": 3.7731, "step": 2212500 }, { "epoch": 24.58, "learning_rate": 6.355918086311441e-08, "loss": 3.7857, "step": 2213000 }, { "epoch": 24.58, "learning_rate": 6.354529906936455e-08, "loss": 3.7786, "step": 2213500 }, { "epoch": 24.59, "learning_rate": 6.353141727561468e-08, "loss": 3.7713, "step": 2214000 }, { "epoch": 24.59, "learning_rate": 6.351753548186482e-08, "loss": 3.7961, "step": 2214500 }, { "epoch": 24.6, "learning_rate": 6.350365368811495e-08, "loss": 3.7708, "step": 2215000 }, { "epoch": 24.6, "learning_rate": 6.348977189436509e-08, "loss": 3.7718, "step": 2215500 }, { "epoch": 24.61, "learning_rate": 6.347589010061524e-08, "loss": 3.7713, "step": 2216000 }, { "epoch": 24.62, "learning_rate": 6.346200830686538e-08, "loss": 3.763, "step": 2216500 }, { "epoch": 24.62, "learning_rate": 6.344812651311552e-08, "loss": 3.7869, "step": 2217000 }, { "epoch": 24.63, "learning_rate": 6.343424471936566e-08, "loss": 3.7868, "step": 2217500 }, { "epoch": 24.63, "learning_rate": 6.342036292561579e-08, "loss": 3.7583, "step": 2218000 }, { "epoch": 24.64, "learning_rate": 6.340648113186593e-08, "loss": 3.7679, "step": 2218500 }, { "epoch": 24.64, "learning_rate": 6.339259933811608e-08, "loss": 3.7673, "step": 2219000 }, { "epoch": 24.65, "learning_rate": 6.33787175443662e-08, "loss": 3.7673, "step": 2219500 }, { "epoch": 24.65, "learning_rate": 6.336483575061635e-08, "loss": 3.7909, "step": 2220000 }, { "epoch": 24.66, "learning_rate": 6.335095395686649e-08, "loss": 3.764, "step": 2220500 }, { "epoch": 24.67, "learning_rate": 6.333707216311662e-08, "loss": 3.7871, "step": 2221000 }, { "epoch": 24.67, "learning_rate": 6.332319036936676e-08, "loss": 3.7685, "step": 2221500 }, { "epoch": 24.68, "learning_rate": 6.33093085756169e-08, "loss": 3.7835, "step": 2222000 }, { "epoch": 24.68, "learning_rate": 6.329542678186705e-08, "loss": 3.7625, "step": 2222500 }, { "epoch": 24.69, "learning_rate": 6.328154498811719e-08, "loss": 3.7959, "step": 2223000 }, { "epoch": 24.69, "learning_rate": 6.326766319436732e-08, "loss": 3.7993, "step": 2223500 }, { "epoch": 24.7, "learning_rate": 6.325378140061746e-08, "loss": 3.7785, "step": 2224000 }, { "epoch": 24.7, "learning_rate": 6.32398996068676e-08, "loss": 3.7664, "step": 2224500 }, { "epoch": 24.71, "learning_rate": 6.322601781311774e-08, "loss": 3.7902, "step": 2225000 }, { "epoch": 24.72, "learning_rate": 6.321213601936787e-08, "loss": 3.7818, "step": 2225500 }, { "epoch": 24.72, "learning_rate": 6.319825422561801e-08, "loss": 3.7948, "step": 2226000 }, { "epoch": 24.73, "learning_rate": 6.318437243186814e-08, "loss": 3.7724, "step": 2226500 }, { "epoch": 24.73, "learning_rate": 6.317049063811829e-08, "loss": 3.7673, "step": 2227000 }, { "epoch": 24.74, "learning_rate": 6.315660884436843e-08, "loss": 3.7821, "step": 2227500 }, { "epoch": 24.74, "learning_rate": 6.314272705061857e-08, "loss": 3.7784, "step": 2228000 }, { "epoch": 24.75, "learning_rate": 6.312884525686871e-08, "loss": 3.781, "step": 2228500 }, { "epoch": 24.75, "learning_rate": 6.311496346311884e-08, "loss": 3.7779, "step": 2229000 }, { "epoch": 24.76, "learning_rate": 6.310108166936898e-08, "loss": 3.799, "step": 2229500 }, { "epoch": 24.77, "learning_rate": 6.308719987561913e-08, "loss": 3.7793, "step": 2230000 }, { "epoch": 24.77, "learning_rate": 6.307331808186927e-08, "loss": 3.7945, "step": 2230500 }, { "epoch": 24.78, "learning_rate": 6.305943628811941e-08, "loss": 3.7786, "step": 2231000 }, { "epoch": 24.78, "learning_rate": 6.304555449436954e-08, "loss": 3.7963, "step": 2231500 }, { "epoch": 24.79, "learning_rate": 6.303167270061968e-08, "loss": 3.7816, "step": 2232000 }, { "epoch": 24.79, "learning_rate": 6.301779090686981e-08, "loss": 3.7791, "step": 2232500 }, { "epoch": 24.8, "learning_rate": 6.300390911311995e-08, "loss": 3.7793, "step": 2233000 }, { "epoch": 24.8, "learning_rate": 6.29900273193701e-08, "loss": 3.7623, "step": 2233500 }, { "epoch": 24.81, "learning_rate": 6.297614552562024e-08, "loss": 3.7672, "step": 2234000 }, { "epoch": 24.82, "learning_rate": 6.296226373187038e-08, "loss": 3.7618, "step": 2234500 }, { "epoch": 24.82, "learning_rate": 6.294838193812051e-08, "loss": 3.7773, "step": 2235000 }, { "epoch": 24.83, "learning_rate": 6.293450014437065e-08, "loss": 3.7935, "step": 2235500 }, { "epoch": 24.83, "learning_rate": 6.29206183506208e-08, "loss": 3.7926, "step": 2236000 }, { "epoch": 24.84, "learning_rate": 6.290673655687094e-08, "loss": 3.7756, "step": 2236500 }, { "epoch": 24.84, "learning_rate": 6.289285476312108e-08, "loss": 3.7582, "step": 2237000 }, { "epoch": 24.85, "learning_rate": 6.287897296937121e-08, "loss": 3.758, "step": 2237500 }, { "epoch": 24.85, "learning_rate": 6.286509117562135e-08, "loss": 3.7717, "step": 2238000 }, { "epoch": 24.86, "learning_rate": 6.285120938187148e-08, "loss": 3.7701, "step": 2238500 }, { "epoch": 24.87, "learning_rate": 6.283732758812162e-08, "loss": 3.7683, "step": 2239000 }, { "epoch": 24.87, "learning_rate": 6.282344579437176e-08, "loss": 3.7978, "step": 2239500 }, { "epoch": 24.88, "learning_rate": 6.28095640006219e-08, "loss": 3.7998, "step": 2240000 }, { "epoch": 24.88, "learning_rate": 6.279568220687203e-08, "loss": 3.7641, "step": 2240500 }, { "epoch": 24.89, "learning_rate": 6.278180041312218e-08, "loss": 3.798, "step": 2241000 }, { "epoch": 24.89, "learning_rate": 6.276791861937232e-08, "loss": 3.7828, "step": 2241500 }, { "epoch": 24.9, "learning_rate": 6.275403682562246e-08, "loss": 3.7854, "step": 2242000 }, { "epoch": 24.9, "learning_rate": 6.27401550318726e-08, "loss": 3.7846, "step": 2242500 }, { "epoch": 24.91, "learning_rate": 6.272627323812275e-08, "loss": 3.7684, "step": 2243000 }, { "epoch": 24.92, "learning_rate": 6.271239144437287e-08, "loss": 3.7712, "step": 2243500 }, { "epoch": 24.92, "learning_rate": 6.269850965062302e-08, "loss": 3.7804, "step": 2244000 }, { "epoch": 24.93, "learning_rate": 6.268462785687315e-08, "loss": 3.7827, "step": 2244500 }, { "epoch": 24.93, "learning_rate": 6.267074606312329e-08, "loss": 3.7814, "step": 2245000 }, { "epoch": 24.94, "learning_rate": 6.265686426937343e-08, "loss": 3.7627, "step": 2245500 }, { "epoch": 24.94, "learning_rate": 6.264298247562356e-08, "loss": 3.7555, "step": 2246000 }, { "epoch": 24.95, "learning_rate": 6.26291006818737e-08, "loss": 3.804, "step": 2246500 }, { "epoch": 24.95, "learning_rate": 6.261521888812384e-08, "loss": 3.7932, "step": 2247000 }, { "epoch": 24.96, "learning_rate": 6.260133709437399e-08, "loss": 3.7669, "step": 2247500 }, { "epoch": 24.97, "learning_rate": 6.258745530062413e-08, "loss": 3.7754, "step": 2248000 }, { "epoch": 24.97, "learning_rate": 6.257357350687427e-08, "loss": 3.7734, "step": 2248500 }, { "epoch": 24.98, "learning_rate": 6.25596917131244e-08, "loss": 3.7856, "step": 2249000 }, { "epoch": 24.98, "learning_rate": 6.254580991937454e-08, "loss": 3.7783, "step": 2249500 }, { "epoch": 24.99, "learning_rate": 6.253192812562467e-08, "loss": 3.7844, "step": 2250000 }, { "epoch": 24.99, "learning_rate": 6.251804633187481e-08, "loss": 3.7854, "step": 2250500 }, { "epoch": 25.0, "learning_rate": 6.250416453812496e-08, "loss": 3.7824, "step": 2251000 }, { "epoch": 25.0, "eval_loss": 3.843087911605835, "eval_runtime": 6.3026, "eval_samples_per_second": 246.566, "step": 2251150 }, { "epoch": 25.0, "learning_rate": 6.249028274437508e-08, "loss": 3.7844, "step": 2251500 }, { "epoch": 25.01, "learning_rate": 6.247640095062523e-08, "loss": 3.7535, "step": 2252000 }, { "epoch": 25.01, "learning_rate": 6.246251915687537e-08, "loss": 3.7942, "step": 2252500 }, { "epoch": 25.02, "learning_rate": 6.244863736312551e-08, "loss": 3.7737, "step": 2253000 }, { "epoch": 25.03, "learning_rate": 6.243475556937565e-08, "loss": 3.7851, "step": 2253500 }, { "epoch": 25.03, "learning_rate": 6.24208737756258e-08, "loss": 3.789, "step": 2254000 }, { "epoch": 25.04, "learning_rate": 6.240699198187593e-08, "loss": 3.7767, "step": 2254500 }, { "epoch": 25.04, "learning_rate": 6.239311018812607e-08, "loss": 3.7702, "step": 2255000 }, { "epoch": 25.05, "learning_rate": 6.237922839437621e-08, "loss": 3.7632, "step": 2255500 }, { "epoch": 25.05, "learning_rate": 6.236534660062634e-08, "loss": 3.7774, "step": 2256000 }, { "epoch": 25.06, "learning_rate": 6.235146480687648e-08, "loss": 3.7659, "step": 2256500 }, { "epoch": 25.06, "learning_rate": 6.233758301312662e-08, "loss": 3.7828, "step": 2257000 }, { "epoch": 25.07, "learning_rate": 6.232370121937675e-08, "loss": 3.7881, "step": 2257500 }, { "epoch": 25.08, "learning_rate": 6.23098194256269e-08, "loss": 3.7728, "step": 2258000 }, { "epoch": 25.08, "learning_rate": 6.229593763187704e-08, "loss": 3.7864, "step": 2258500 }, { "epoch": 25.09, "learning_rate": 6.228205583812718e-08, "loss": 3.7813, "step": 2259000 }, { "epoch": 25.09, "learning_rate": 6.226817404437732e-08, "loss": 3.7711, "step": 2259500 }, { "epoch": 25.1, "learning_rate": 6.225429225062745e-08, "loss": 3.7546, "step": 2260000 }, { "epoch": 25.1, "learning_rate": 6.224041045687759e-08, "loss": 3.7778, "step": 2260500 }, { "epoch": 25.11, "learning_rate": 6.222652866312773e-08, "loss": 3.7861, "step": 2261000 }, { "epoch": 25.11, "learning_rate": 6.221264686937788e-08, "loss": 3.7819, "step": 2261500 }, { "epoch": 25.12, "learning_rate": 6.2198765075628e-08, "loss": 3.768, "step": 2262000 }, { "epoch": 25.13, "learning_rate": 6.218488328187815e-08, "loss": 3.7939, "step": 2262500 }, { "epoch": 25.13, "learning_rate": 6.217100148812828e-08, "loss": 3.7827, "step": 2263000 }, { "epoch": 25.14, "learning_rate": 6.215711969437842e-08, "loss": 3.792, "step": 2263500 }, { "epoch": 25.14, "learning_rate": 6.214323790062856e-08, "loss": 3.7551, "step": 2264000 }, { "epoch": 25.15, "learning_rate": 6.21293561068787e-08, "loss": 3.7861, "step": 2264500 }, { "epoch": 25.15, "learning_rate": 6.211547431312885e-08, "loss": 3.7695, "step": 2265000 }, { "epoch": 25.16, "learning_rate": 6.210159251937898e-08, "loss": 3.7738, "step": 2265500 }, { "epoch": 25.16, "learning_rate": 6.208771072562912e-08, "loss": 3.7672, "step": 2266000 }, { "epoch": 25.17, "learning_rate": 6.207382893187926e-08, "loss": 3.7886, "step": 2266500 }, { "epoch": 25.18, "learning_rate": 6.20599471381294e-08, "loss": 3.7748, "step": 2267000 }, { "epoch": 25.18, "learning_rate": 6.204606534437954e-08, "loss": 3.7885, "step": 2267500 }, { "epoch": 25.19, "learning_rate": 6.203218355062967e-08, "loss": 3.7758, "step": 2268000 }, { "epoch": 25.19, "learning_rate": 6.201830175687982e-08, "loss": 3.775, "step": 2268500 }, { "epoch": 25.2, "learning_rate": 6.200441996312994e-08, "loss": 3.7799, "step": 2269000 }, { "epoch": 25.2, "learning_rate": 6.199053816938009e-08, "loss": 3.7692, "step": 2269500 }, { "epoch": 25.21, "learning_rate": 6.197665637563023e-08, "loss": 3.7766, "step": 2270000 }, { "epoch": 25.21, "learning_rate": 6.196277458188037e-08, "loss": 3.7793, "step": 2270500 }, { "epoch": 25.22, "learning_rate": 6.194889278813051e-08, "loss": 3.7781, "step": 2271000 }, { "epoch": 25.23, "learning_rate": 6.193501099438064e-08, "loss": 3.7762, "step": 2271500 }, { "epoch": 25.23, "learning_rate": 6.192112920063079e-08, "loss": 3.7535, "step": 2272000 }, { "epoch": 25.24, "learning_rate": 6.190724740688093e-08, "loss": 3.7742, "step": 2272500 }, { "epoch": 25.24, "learning_rate": 6.189336561313107e-08, "loss": 3.8031, "step": 2273000 }, { "epoch": 25.25, "learning_rate": 6.187948381938121e-08, "loss": 3.7771, "step": 2273500 }, { "epoch": 25.25, "learning_rate": 6.186560202563134e-08, "loss": 3.7847, "step": 2274000 }, { "epoch": 25.26, "learning_rate": 6.185172023188148e-08, "loss": 3.7708, "step": 2274500 }, { "epoch": 25.26, "learning_rate": 6.183783843813161e-08, "loss": 3.7818, "step": 2275000 }, { "epoch": 25.27, "learning_rate": 6.182395664438175e-08, "loss": 3.7806, "step": 2275500 }, { "epoch": 25.28, "learning_rate": 6.18100748506319e-08, "loss": 3.78, "step": 2276000 }, { "epoch": 25.28, "learning_rate": 6.179619305688204e-08, "loss": 3.773, "step": 2276500 }, { "epoch": 25.29, "learning_rate": 6.178231126313217e-08, "loss": 3.7871, "step": 2277000 }, { "epoch": 25.29, "learning_rate": 6.176842946938231e-08, "loss": 3.7756, "step": 2277500 }, { "epoch": 25.3, "learning_rate": 6.175454767563245e-08, "loss": 3.7719, "step": 2278000 }, { "epoch": 25.3, "learning_rate": 6.17406658818826e-08, "loss": 3.7703, "step": 2278500 }, { "epoch": 25.31, "learning_rate": 6.172678408813274e-08, "loss": 3.7784, "step": 2279000 }, { "epoch": 25.31, "learning_rate": 6.171290229438288e-08, "loss": 3.7751, "step": 2279500 }, { "epoch": 25.32, "learning_rate": 6.169902050063301e-08, "loss": 3.7622, "step": 2280000 }, { "epoch": 25.33, "learning_rate": 6.168513870688314e-08, "loss": 3.7569, "step": 2280500 }, { "epoch": 25.33, "learning_rate": 6.167125691313328e-08, "loss": 3.7805, "step": 2281000 }, { "epoch": 25.34, "learning_rate": 6.165737511938342e-08, "loss": 3.7769, "step": 2281500 }, { "epoch": 25.34, "learning_rate": 6.164349332563356e-08, "loss": 3.7592, "step": 2282000 }, { "epoch": 25.35, "learning_rate": 6.16296115318837e-08, "loss": 3.7694, "step": 2282500 }, { "epoch": 25.35, "learning_rate": 6.161572973813384e-08, "loss": 3.771, "step": 2283000 }, { "epoch": 25.36, "learning_rate": 6.160184794438398e-08, "loss": 3.7988, "step": 2283500 }, { "epoch": 25.36, "learning_rate": 6.158796615063412e-08, "loss": 3.7844, "step": 2284000 }, { "epoch": 25.37, "learning_rate": 6.157408435688426e-08, "loss": 3.7726, "step": 2284500 }, { "epoch": 25.38, "learning_rate": 6.15602025631344e-08, "loss": 3.7683, "step": 2285000 }, { "epoch": 25.38, "learning_rate": 6.154632076938453e-08, "loss": 3.7863, "step": 2285500 }, { "epoch": 25.39, "learning_rate": 6.153243897563468e-08, "loss": 3.7739, "step": 2286000 }, { "epoch": 25.39, "learning_rate": 6.15185571818848e-08, "loss": 3.7684, "step": 2286500 }, { "epoch": 25.4, "learning_rate": 6.150467538813495e-08, "loss": 3.7718, "step": 2287000 }, { "epoch": 25.4, "learning_rate": 6.149079359438509e-08, "loss": 3.775, "step": 2287500 }, { "epoch": 25.41, "learning_rate": 6.147691180063522e-08, "loss": 3.7615, "step": 2288000 }, { "epoch": 25.41, "learning_rate": 6.146303000688536e-08, "loss": 3.7717, "step": 2288500 }, { "epoch": 25.42, "learning_rate": 6.14491482131355e-08, "loss": 3.7709, "step": 2289000 }, { "epoch": 25.43, "learning_rate": 6.143526641938565e-08, "loss": 3.7846, "step": 2289500 }, { "epoch": 25.43, "learning_rate": 6.142138462563579e-08, "loss": 3.7955, "step": 2290000 }, { "epoch": 25.44, "learning_rate": 6.140750283188593e-08, "loss": 3.7863, "step": 2290500 }, { "epoch": 25.44, "learning_rate": 6.139362103813606e-08, "loss": 3.7812, "step": 2291000 }, { "epoch": 25.45, "learning_rate": 6.13797392443862e-08, "loss": 3.7727, "step": 2291500 }, { "epoch": 25.45, "learning_rate": 6.136585745063634e-08, "loss": 3.7847, "step": 2292000 }, { "epoch": 25.46, "learning_rate": 6.135197565688647e-08, "loss": 3.7922, "step": 2292500 }, { "epoch": 25.46, "learning_rate": 6.133809386313661e-08, "loss": 3.7741, "step": 2293000 }, { "epoch": 25.47, "learning_rate": 6.132421206938676e-08, "loss": 3.809, "step": 2293500 }, { "epoch": 25.48, "learning_rate": 6.131033027563689e-08, "loss": 3.7726, "step": 2294000 }, { "epoch": 25.48, "learning_rate": 6.129644848188703e-08, "loss": 3.7901, "step": 2294500 }, { "epoch": 25.49, "learning_rate": 6.128256668813717e-08, "loss": 3.7835, "step": 2295000 }, { "epoch": 25.49, "learning_rate": 6.126868489438731e-08, "loss": 3.7768, "step": 2295500 }, { "epoch": 25.5, "learning_rate": 6.125480310063746e-08, "loss": 3.7663, "step": 2296000 }, { "epoch": 25.5, "learning_rate": 6.124092130688758e-08, "loss": 3.7675, "step": 2296500 }, { "epoch": 25.51, "learning_rate": 6.122703951313773e-08, "loss": 3.7719, "step": 2297000 }, { "epoch": 25.51, "learning_rate": 6.121315771938787e-08, "loss": 3.7881, "step": 2297500 }, { "epoch": 25.52, "learning_rate": 6.119927592563801e-08, "loss": 3.7921, "step": 2298000 }, { "epoch": 25.53, "learning_rate": 6.118539413188814e-08, "loss": 3.7846, "step": 2298500 }, { "epoch": 25.53, "learning_rate": 6.117151233813828e-08, "loss": 3.791, "step": 2299000 }, { "epoch": 25.54, "learning_rate": 6.115763054438841e-08, "loss": 3.7707, "step": 2299500 }, { "epoch": 25.54, "learning_rate": 6.114374875063855e-08, "loss": 3.7853, "step": 2300000 }, { "epoch": 25.55, "learning_rate": 6.11298669568887e-08, "loss": 3.7911, "step": 2300500 }, { "epoch": 25.55, "learning_rate": 6.111598516313884e-08, "loss": 3.7801, "step": 2301000 }, { "epoch": 25.56, "learning_rate": 6.110210336938898e-08, "loss": 3.7801, "step": 2301500 }, { "epoch": 25.56, "learning_rate": 6.108822157563911e-08, "loss": 3.7603, "step": 2302000 }, { "epoch": 25.57, "learning_rate": 6.107433978188925e-08, "loss": 3.7835, "step": 2302500 }, { "epoch": 25.58, "learning_rate": 6.10604579881394e-08, "loss": 3.7807, "step": 2303000 }, { "epoch": 25.58, "learning_rate": 6.104657619438954e-08, "loss": 3.7854, "step": 2303500 }, { "epoch": 25.59, "learning_rate": 6.103269440063968e-08, "loss": 3.8086, "step": 2304000 }, { "epoch": 25.59, "learning_rate": 6.101881260688981e-08, "loss": 3.7595, "step": 2304500 }, { "epoch": 25.6, "learning_rate": 6.100493081313995e-08, "loss": 3.7767, "step": 2305000 }, { "epoch": 25.6, "learning_rate": 6.099104901939008e-08, "loss": 3.7811, "step": 2305500 }, { "epoch": 25.61, "learning_rate": 6.097716722564022e-08, "loss": 3.7619, "step": 2306000 }, { "epoch": 25.61, "learning_rate": 6.096328543189036e-08, "loss": 3.7782, "step": 2306500 }, { "epoch": 25.62, "learning_rate": 6.09494036381405e-08, "loss": 3.7546, "step": 2307000 }, { "epoch": 25.63, "learning_rate": 6.093552184439065e-08, "loss": 3.789, "step": 2307500 }, { "epoch": 25.63, "learning_rate": 6.092164005064078e-08, "loss": 3.7785, "step": 2308000 }, { "epoch": 25.64, "learning_rate": 6.090775825689092e-08, "loss": 3.7831, "step": 2308500 }, { "epoch": 25.64, "learning_rate": 6.089387646314106e-08, "loss": 3.7747, "step": 2309000 }, { "epoch": 25.65, "learning_rate": 6.08799946693912e-08, "loss": 3.7653, "step": 2309500 }, { "epoch": 25.65, "learning_rate": 6.086611287564135e-08, "loss": 3.7682, "step": 2310000 }, { "epoch": 25.66, "learning_rate": 6.085223108189147e-08, "loss": 3.7801, "step": 2310500 }, { "epoch": 25.66, "learning_rate": 6.08383492881416e-08, "loss": 3.7543, "step": 2311000 }, { "epoch": 25.67, "learning_rate": 6.082446749439175e-08, "loss": 3.8091, "step": 2311500 }, { "epoch": 25.68, "learning_rate": 6.081058570064189e-08, "loss": 3.7603, "step": 2312000 }, { "epoch": 25.68, "learning_rate": 6.079670390689203e-08, "loss": 3.7774, "step": 2312500 }, { "epoch": 25.69, "learning_rate": 6.078282211314217e-08, "loss": 3.7646, "step": 2313000 }, { "epoch": 25.69, "learning_rate": 6.07689403193923e-08, "loss": 3.7538, "step": 2313500 }, { "epoch": 25.7, "learning_rate": 6.075505852564244e-08, "loss": 3.7725, "step": 2314000 }, { "epoch": 25.7, "learning_rate": 6.074117673189259e-08, "loss": 3.7809, "step": 2314500 }, { "epoch": 25.71, "learning_rate": 6.072729493814273e-08, "loss": 3.7843, "step": 2315000 }, { "epoch": 25.71, "learning_rate": 6.071341314439287e-08, "loss": 3.7941, "step": 2315500 }, { "epoch": 25.72, "learning_rate": 6.069953135064301e-08, "loss": 3.7652, "step": 2316000 }, { "epoch": 25.73, "learning_rate": 6.068564955689314e-08, "loss": 3.7785, "step": 2316500 }, { "epoch": 25.73, "learning_rate": 6.067176776314327e-08, "loss": 3.7539, "step": 2317000 }, { "epoch": 25.74, "learning_rate": 6.065788596939341e-08, "loss": 3.7901, "step": 2317500 }, { "epoch": 25.74, "learning_rate": 6.064400417564356e-08, "loss": 3.7908, "step": 2318000 }, { "epoch": 25.75, "learning_rate": 6.06301223818937e-08, "loss": 3.7813, "step": 2318500 }, { "epoch": 25.75, "learning_rate": 6.061624058814383e-08, "loss": 3.7818, "step": 2319000 }, { "epoch": 25.76, "learning_rate": 6.060235879439397e-08, "loss": 3.7954, "step": 2319500 }, { "epoch": 25.76, "learning_rate": 6.058847700064411e-08, "loss": 3.7612, "step": 2320000 }, { "epoch": 25.77, "learning_rate": 6.057459520689425e-08, "loss": 3.7849, "step": 2320500 }, { "epoch": 25.78, "learning_rate": 6.05607134131444e-08, "loss": 3.793, "step": 2321000 }, { "epoch": 25.78, "learning_rate": 6.054683161939454e-08, "loss": 3.7967, "step": 2321500 }, { "epoch": 25.79, "learning_rate": 6.053294982564467e-08, "loss": 3.7936, "step": 2322000 }, { "epoch": 25.79, "learning_rate": 6.051906803189481e-08, "loss": 3.7671, "step": 2322500 }, { "epoch": 25.8, "learning_rate": 6.050518623814494e-08, "loss": 3.7697, "step": 2323000 }, { "epoch": 25.8, "learning_rate": 6.049130444439508e-08, "loss": 3.7808, "step": 2323500 }, { "epoch": 25.81, "learning_rate": 6.047742265064522e-08, "loss": 3.7815, "step": 2324000 }, { "epoch": 25.81, "learning_rate": 6.046354085689535e-08, "loss": 3.77, "step": 2324500 }, { "epoch": 25.82, "learning_rate": 6.04496590631455e-08, "loss": 3.7696, "step": 2325000 }, { "epoch": 25.83, "learning_rate": 6.043577726939564e-08, "loss": 3.7941, "step": 2325500 }, { "epoch": 25.83, "learning_rate": 6.042189547564578e-08, "loss": 3.7838, "step": 2326000 }, { "epoch": 25.84, "learning_rate": 6.040801368189592e-08, "loss": 3.7897, "step": 2326500 }, { "epoch": 25.84, "learning_rate": 6.039413188814606e-08, "loss": 3.7522, "step": 2327000 }, { "epoch": 25.85, "learning_rate": 6.038025009439619e-08, "loss": 3.7578, "step": 2327500 }, { "epoch": 25.85, "learning_rate": 6.036636830064634e-08, "loss": 3.7898, "step": 2328000 }, { "epoch": 25.86, "learning_rate": 6.035248650689648e-08, "loss": 3.7778, "step": 2328500 }, { "epoch": 25.86, "learning_rate": 6.03386047131466e-08, "loss": 3.7821, "step": 2329000 }, { "epoch": 25.87, "learning_rate": 6.032472291939675e-08, "loss": 3.7623, "step": 2329500 }, { "epoch": 25.88, "learning_rate": 6.031084112564689e-08, "loss": 3.7655, "step": 2330000 }, { "epoch": 25.88, "learning_rate": 6.029695933189702e-08, "loss": 3.7794, "step": 2330500 }, { "epoch": 25.89, "learning_rate": 6.028307753814716e-08, "loss": 3.7804, "step": 2331000 }, { "epoch": 25.89, "learning_rate": 6.02691957443973e-08, "loss": 3.787, "step": 2331500 }, { "epoch": 25.9, "learning_rate": 6.025531395064745e-08, "loss": 3.7841, "step": 2332000 }, { "epoch": 25.9, "learning_rate": 6.024143215689759e-08, "loss": 3.7678, "step": 2332500 }, { "epoch": 25.91, "learning_rate": 6.022755036314772e-08, "loss": 3.7612, "step": 2333000 }, { "epoch": 25.91, "learning_rate": 6.021366856939786e-08, "loss": 3.7752, "step": 2333500 }, { "epoch": 25.92, "learning_rate": 6.0199786775648e-08, "loss": 3.7913, "step": 2334000 }, { "epoch": 25.93, "learning_rate": 6.018590498189814e-08, "loss": 3.7827, "step": 2334500 }, { "epoch": 25.93, "learning_rate": 6.017202318814827e-08, "loss": 3.7714, "step": 2335000 }, { "epoch": 25.94, "learning_rate": 6.015814139439842e-08, "loss": 3.7884, "step": 2335500 }, { "epoch": 25.94, "learning_rate": 6.014425960064855e-08, "loss": 3.7875, "step": 2336000 }, { "epoch": 25.95, "learning_rate": 6.013037780689869e-08, "loss": 3.7807, "step": 2336500 }, { "epoch": 25.95, "learning_rate": 6.011649601314883e-08, "loss": 3.7625, "step": 2337000 }, { "epoch": 25.96, "learning_rate": 6.010261421939897e-08, "loss": 3.7606, "step": 2337500 }, { "epoch": 25.96, "learning_rate": 6.008873242564911e-08, "loss": 3.7747, "step": 2338000 }, { "epoch": 25.97, "learning_rate": 6.007485063189926e-08, "loss": 3.7736, "step": 2338500 }, { "epoch": 25.98, "learning_rate": 6.006096883814939e-08, "loss": 3.7746, "step": 2339000 }, { "epoch": 25.98, "learning_rate": 6.004708704439953e-08, "loss": 3.7684, "step": 2339500 }, { "epoch": 25.99, "learning_rate": 6.003320525064967e-08, "loss": 3.7687, "step": 2340000 }, { "epoch": 25.99, "learning_rate": 6.001932345689981e-08, "loss": 3.7736, "step": 2340500 }, { "epoch": 26.0, "learning_rate": 6.000544166314994e-08, "loss": 3.7579, "step": 2341000 }, { "epoch": 26.0, "eval_loss": 3.8411214351654053, "eval_runtime": 6.2991, "eval_samples_per_second": 246.7, "step": 2341196 }, { "epoch": 26.0, "learning_rate": 5.999155986940007e-08, "loss": 3.7662, "step": 2341500 }, { "epoch": 26.01, "learning_rate": 5.997767807565021e-08, "loss": 3.785, "step": 2342000 }, { "epoch": 26.01, "learning_rate": 5.996379628190035e-08, "loss": 3.7827, "step": 2342500 }, { "epoch": 26.02, "learning_rate": 5.99499144881505e-08, "loss": 3.7691, "step": 2343000 }, { "epoch": 26.03, "learning_rate": 5.993603269440064e-08, "loss": 3.7776, "step": 2343500 }, { "epoch": 26.03, "learning_rate": 5.992215090065078e-08, "loss": 3.7821, "step": 2344000 }, { "epoch": 26.04, "learning_rate": 5.990826910690091e-08, "loss": 3.7668, "step": 2344500 }, { "epoch": 26.04, "learning_rate": 5.989438731315105e-08, "loss": 3.7789, "step": 2345000 }, { "epoch": 26.05, "learning_rate": 5.98805055194012e-08, "loss": 3.7851, "step": 2345500 }, { "epoch": 26.05, "learning_rate": 5.986662372565134e-08, "loss": 3.7689, "step": 2346000 }, { "epoch": 26.06, "learning_rate": 5.985274193190148e-08, "loss": 3.7674, "step": 2346500 }, { "epoch": 26.06, "learning_rate": 5.983886013815161e-08, "loss": 3.773, "step": 2347000 }, { "epoch": 26.07, "learning_rate": 5.982497834440174e-08, "loss": 3.7795, "step": 2347500 }, { "epoch": 26.08, "learning_rate": 5.981109655065188e-08, "loss": 3.7839, "step": 2348000 }, { "epoch": 26.08, "learning_rate": 5.979721475690202e-08, "loss": 3.7577, "step": 2348500 }, { "epoch": 26.09, "learning_rate": 5.978333296315216e-08, "loss": 3.7677, "step": 2349000 }, { "epoch": 26.09, "learning_rate": 5.976945116940231e-08, "loss": 3.7729, "step": 2349500 }, { "epoch": 26.1, "learning_rate": 5.975556937565244e-08, "loss": 3.7759, "step": 2350000 }, { "epoch": 26.1, "learning_rate": 5.974168758190258e-08, "loss": 3.7739, "step": 2350500 }, { "epoch": 26.11, "learning_rate": 5.972780578815272e-08, "loss": 3.7726, "step": 2351000 }, { "epoch": 26.11, "learning_rate": 5.971392399440286e-08, "loss": 3.7724, "step": 2351500 }, { "epoch": 26.12, "learning_rate": 5.9700042200653e-08, "loss": 3.7776, "step": 2352000 }, { "epoch": 26.13, "learning_rate": 5.968616040690315e-08, "loss": 3.7806, "step": 2352500 }, { "epoch": 26.13, "learning_rate": 5.967227861315328e-08, "loss": 3.7977, "step": 2353000 }, { "epoch": 26.14, "learning_rate": 5.96583968194034e-08, "loss": 3.7766, "step": 2353500 }, { "epoch": 26.14, "learning_rate": 5.964451502565355e-08, "loss": 3.7802, "step": 2354000 }, { "epoch": 26.15, "learning_rate": 5.963063323190369e-08, "loss": 3.7702, "step": 2354500 }, { "epoch": 26.15, "learning_rate": 5.961675143815383e-08, "loss": 3.7937, "step": 2355000 }, { "epoch": 26.16, "learning_rate": 5.960286964440396e-08, "loss": 3.7515, "step": 2355500 }, { "epoch": 26.16, "learning_rate": 5.9588987850654103e-08, "loss": 3.7776, "step": 2356000 }, { "epoch": 26.17, "learning_rate": 5.9575106056904246e-08, "loss": 3.7774, "step": 2356500 }, { "epoch": 26.18, "learning_rate": 5.956122426315439e-08, "loss": 3.7552, "step": 2357000 }, { "epoch": 26.18, "learning_rate": 5.9547342469404524e-08, "loss": 3.7695, "step": 2357500 }, { "epoch": 26.19, "learning_rate": 5.9533460675654666e-08, "loss": 3.7692, "step": 2358000 }, { "epoch": 26.19, "learning_rate": 5.9519578881904795e-08, "loss": 3.7857, "step": 2358500 }, { "epoch": 26.2, "learning_rate": 5.950569708815494e-08, "loss": 3.7701, "step": 2359000 }, { "epoch": 26.2, "learning_rate": 5.949181529440508e-08, "loss": 3.7657, "step": 2359500 }, { "epoch": 26.21, "learning_rate": 5.947793350065522e-08, "loss": 3.7755, "step": 2360000 }, { "epoch": 26.21, "learning_rate": 5.946405170690536e-08, "loss": 3.7925, "step": 2360500 }, { "epoch": 26.22, "learning_rate": 5.945016991315549e-08, "loss": 3.7727, "step": 2361000 }, { "epoch": 26.23, "learning_rate": 5.943628811940563e-08, "loss": 3.7822, "step": 2361500 }, { "epoch": 26.23, "learning_rate": 5.942240632565577e-08, "loss": 3.7901, "step": 2362000 }, { "epoch": 26.24, "learning_rate": 5.940852453190591e-08, "loss": 3.7575, "step": 2362500 }, { "epoch": 26.24, "learning_rate": 5.9394642738156055e-08, "loss": 3.7709, "step": 2363000 }, { "epoch": 26.25, "learning_rate": 5.938076094440619e-08, "loss": 3.7666, "step": 2363500 }, { "epoch": 26.25, "learning_rate": 5.9366879150656327e-08, "loss": 3.7754, "step": 2364000 }, { "epoch": 26.26, "learning_rate": 5.935299735690646e-08, "loss": 3.7759, "step": 2364500 }, { "epoch": 26.26, "learning_rate": 5.9339115563156605e-08, "loss": 3.7902, "step": 2365000 }, { "epoch": 26.27, "learning_rate": 5.932523376940675e-08, "loss": 3.7736, "step": 2365500 }, { "epoch": 26.28, "learning_rate": 5.931135197565689e-08, "loss": 3.7741, "step": 2366000 }, { "epoch": 26.28, "learning_rate": 5.9297470181907025e-08, "loss": 3.7795, "step": 2366500 }, { "epoch": 26.29, "learning_rate": 5.928358838815716e-08, "loss": 3.7559, "step": 2367000 }, { "epoch": 26.29, "learning_rate": 5.9269706594407296e-08, "loss": 3.7731, "step": 2367500 }, { "epoch": 26.3, "learning_rate": 5.925582480065744e-08, "loss": 3.7804, "step": 2368000 }, { "epoch": 26.3, "learning_rate": 5.924194300690758e-08, "loss": 3.7877, "step": 2368500 }, { "epoch": 26.31, "learning_rate": 5.922806121315772e-08, "loss": 3.771, "step": 2369000 }, { "epoch": 26.31, "learning_rate": 5.921417941940785e-08, "loss": 3.76, "step": 2369500 }, { "epoch": 26.32, "learning_rate": 5.9200297625657994e-08, "loss": 3.7905, "step": 2370000 }, { "epoch": 26.33, "learning_rate": 5.918641583190813e-08, "loss": 3.7537, "step": 2370500 }, { "epoch": 26.33, "learning_rate": 5.917253403815827e-08, "loss": 3.7657, "step": 2371000 }, { "epoch": 26.34, "learning_rate": 5.9158652244408414e-08, "loss": 3.7689, "step": 2371500 }, { "epoch": 26.34, "learning_rate": 5.9144770450658557e-08, "loss": 3.7825, "step": 2372000 }, { "epoch": 26.35, "learning_rate": 5.9130888656908686e-08, "loss": 3.778, "step": 2372500 }, { "epoch": 26.35, "learning_rate": 5.911700686315883e-08, "loss": 3.7542, "step": 2373000 }, { "epoch": 26.36, "learning_rate": 5.9103125069408963e-08, "loss": 3.7827, "step": 2373500 }, { "epoch": 26.36, "learning_rate": 5.9089243275659106e-08, "loss": 3.7815, "step": 2374000 }, { "epoch": 26.37, "learning_rate": 5.907536148190925e-08, "loss": 3.7804, "step": 2374500 }, { "epoch": 26.38, "learning_rate": 5.906147968815939e-08, "loss": 3.7731, "step": 2375000 }, { "epoch": 26.38, "learning_rate": 5.904759789440952e-08, "loss": 3.765, "step": 2375500 }, { "epoch": 26.39, "learning_rate": 5.903371610065966e-08, "loss": 3.7802, "step": 2376000 }, { "epoch": 26.39, "learning_rate": 5.90198343069098e-08, "loss": 3.7828, "step": 2376500 }, { "epoch": 26.4, "learning_rate": 5.900595251315994e-08, "loss": 3.7764, "step": 2377000 }, { "epoch": 26.4, "learning_rate": 5.899207071941008e-08, "loss": 3.7724, "step": 2377500 }, { "epoch": 26.41, "learning_rate": 5.897818892566021e-08, "loss": 3.7816, "step": 2378000 }, { "epoch": 26.41, "learning_rate": 5.896430713191035e-08, "loss": 3.7665, "step": 2378500 }, { "epoch": 26.42, "learning_rate": 5.8950425338160495e-08, "loss": 3.7869, "step": 2379000 }, { "epoch": 26.43, "learning_rate": 5.893654354441063e-08, "loss": 3.7587, "step": 2379500 }, { "epoch": 26.43, "learning_rate": 5.892266175066077e-08, "loss": 3.7765, "step": 2380000 }, { "epoch": 26.44, "learning_rate": 5.8908779956910915e-08, "loss": 3.7869, "step": 2380500 }, { "epoch": 26.44, "learning_rate": 5.8894898163161044e-08, "loss": 3.7881, "step": 2381000 }, { "epoch": 26.45, "learning_rate": 5.888101636941119e-08, "loss": 3.7907, "step": 2381500 }, { "epoch": 26.45, "learning_rate": 5.886713457566133e-08, "loss": 3.775, "step": 2382000 }, { "epoch": 26.46, "learning_rate": 5.8853252781911465e-08, "loss": 3.7772, "step": 2382500 }, { "epoch": 26.46, "learning_rate": 5.883937098816161e-08, "loss": 3.776, "step": 2383000 }, { "epoch": 26.47, "learning_rate": 5.8825489194411736e-08, "loss": 3.7497, "step": 2383500 }, { "epoch": 26.48, "learning_rate": 5.881160740066188e-08, "loss": 3.7557, "step": 2384000 }, { "epoch": 26.48, "learning_rate": 5.879772560691202e-08, "loss": 3.7666, "step": 2384500 }, { "epoch": 26.49, "learning_rate": 5.878384381316216e-08, "loss": 3.7546, "step": 2385000 }, { "epoch": 26.49, "learning_rate": 5.87699620194123e-08, "loss": 3.7955, "step": 2385500 }, { "epoch": 26.5, "learning_rate": 5.875608022566244e-08, "loss": 3.7892, "step": 2386000 }, { "epoch": 26.5, "learning_rate": 5.874219843191257e-08, "loss": 3.8018, "step": 2386500 }, { "epoch": 26.51, "learning_rate": 5.872831663816271e-08, "loss": 3.7711, "step": 2387000 }, { "epoch": 26.51, "learning_rate": 5.8714434844412854e-08, "loss": 3.7864, "step": 2387500 }, { "epoch": 26.52, "learning_rate": 5.870055305066299e-08, "loss": 3.7797, "step": 2388000 }, { "epoch": 26.53, "learning_rate": 5.868667125691313e-08, "loss": 3.7586, "step": 2388500 }, { "epoch": 26.53, "learning_rate": 5.8672789463163274e-08, "loss": 3.7873, "step": 2389000 }, { "epoch": 26.54, "learning_rate": 5.8658907669413403e-08, "loss": 3.7757, "step": 2389500 }, { "epoch": 26.54, "learning_rate": 5.8645025875663546e-08, "loss": 3.7946, "step": 2390000 }, { "epoch": 26.55, "learning_rate": 5.863114408191369e-08, "loss": 3.7511, "step": 2390500 }, { "epoch": 26.55, "learning_rate": 5.8617262288163824e-08, "loss": 3.7815, "step": 2391000 }, { "epoch": 26.56, "learning_rate": 5.8603380494413966e-08, "loss": 3.7808, "step": 2391500 }, { "epoch": 26.56, "learning_rate": 5.8589498700664095e-08, "loss": 3.7613, "step": 2392000 }, { "epoch": 26.57, "learning_rate": 5.857561690691424e-08, "loss": 3.7756, "step": 2392500 }, { "epoch": 26.58, "learning_rate": 5.856173511316438e-08, "loss": 3.7898, "step": 2393000 }, { "epoch": 26.58, "learning_rate": 5.854785331941452e-08, "loss": 3.7744, "step": 2393500 }, { "epoch": 26.59, "learning_rate": 5.853397152566466e-08, "loss": 3.7509, "step": 2394000 }, { "epoch": 26.59, "learning_rate": 5.85200897319148e-08, "loss": 3.7573, "step": 2394500 }, { "epoch": 26.6, "learning_rate": 5.850620793816493e-08, "loss": 3.7545, "step": 2395000 }, { "epoch": 26.6, "learning_rate": 5.849232614441507e-08, "loss": 3.7856, "step": 2395500 }, { "epoch": 26.61, "learning_rate": 5.847844435066521e-08, "loss": 3.7902, "step": 2396000 }, { "epoch": 26.61, "learning_rate": 5.8464562556915355e-08, "loss": 3.7875, "step": 2396500 }, { "epoch": 26.62, "learning_rate": 5.845068076316549e-08, "loss": 3.7675, "step": 2397000 }, { "epoch": 26.63, "learning_rate": 5.8436798969415633e-08, "loss": 3.7775, "step": 2397500 }, { "epoch": 26.63, "learning_rate": 5.842291717566576e-08, "loss": 3.7969, "step": 2398000 }, { "epoch": 26.64, "learning_rate": 5.8409035381915905e-08, "loss": 3.7738, "step": 2398500 }, { "epoch": 26.64, "learning_rate": 5.839515358816605e-08, "loss": 3.775, "step": 2399000 }, { "epoch": 26.65, "learning_rate": 5.838127179441619e-08, "loss": 3.7892, "step": 2399500 }, { "epoch": 26.65, "learning_rate": 5.8367390000666325e-08, "loss": 3.7576, "step": 2400000 }, { "epoch": 26.66, "learning_rate": 5.835350820691646e-08, "loss": 3.7613, "step": 2400500 }, { "epoch": 26.66, "learning_rate": 5.8339626413166596e-08, "loss": 3.7818, "step": 2401000 }, { "epoch": 26.67, "learning_rate": 5.832574461941674e-08, "loss": 3.7678, "step": 2401500 }, { "epoch": 26.68, "learning_rate": 5.831186282566688e-08, "loss": 3.7704, "step": 2402000 }, { "epoch": 26.68, "learning_rate": 5.829798103191702e-08, "loss": 3.7732, "step": 2402500 }, { "epoch": 26.69, "learning_rate": 5.828409923816716e-08, "loss": 3.7856, "step": 2403000 }, { "epoch": 26.69, "learning_rate": 5.8270217444417294e-08, "loss": 3.753, "step": 2403500 }, { "epoch": 26.7, "learning_rate": 5.825633565066743e-08, "loss": 3.7683, "step": 2404000 }, { "epoch": 26.7, "learning_rate": 5.824245385691757e-08, "loss": 3.7785, "step": 2404500 }, { "epoch": 26.71, "learning_rate": 5.8228572063167714e-08, "loss": 3.7612, "step": 2405000 }, { "epoch": 26.71, "learning_rate": 5.8214690269417857e-08, "loss": 3.8029, "step": 2405500 }, { "epoch": 26.72, "learning_rate": 5.8200808475667986e-08, "loss": 3.7592, "step": 2406000 }, { "epoch": 26.73, "learning_rate": 5.818692668191813e-08, "loss": 3.7703, "step": 2406500 }, { "epoch": 26.73, "learning_rate": 5.8173044888168264e-08, "loss": 3.8063, "step": 2407000 }, { "epoch": 26.74, "learning_rate": 5.8159163094418406e-08, "loss": 3.7556, "step": 2407500 }, { "epoch": 26.74, "learning_rate": 5.814528130066855e-08, "loss": 3.7629, "step": 2408000 }, { "epoch": 26.75, "learning_rate": 5.813139950691869e-08, "loss": 3.7713, "step": 2408500 }, { "epoch": 26.75, "learning_rate": 5.811751771316882e-08, "loss": 3.7854, "step": 2409000 }, { "epoch": 26.76, "learning_rate": 5.810363591941896e-08, "loss": 3.7742, "step": 2409500 }, { "epoch": 26.76, "learning_rate": 5.80897541256691e-08, "loss": 3.7741, "step": 2410000 }, { "epoch": 26.77, "learning_rate": 5.807587233191924e-08, "loss": 3.7968, "step": 2410500 }, { "epoch": 26.78, "learning_rate": 5.806199053816938e-08, "loss": 3.786, "step": 2411000 }, { "epoch": 26.78, "learning_rate": 5.8048108744419524e-08, "loss": 3.7779, "step": 2411500 }, { "epoch": 26.79, "learning_rate": 5.803422695066965e-08, "loss": 3.7668, "step": 2412000 }, { "epoch": 26.79, "learning_rate": 5.8020345156919795e-08, "loss": 3.7873, "step": 2412500 }, { "epoch": 26.8, "learning_rate": 5.800646336316993e-08, "loss": 3.7635, "step": 2413000 }, { "epoch": 26.8, "learning_rate": 5.799258156942007e-08, "loss": 3.7599, "step": 2413500 }, { "epoch": 26.81, "learning_rate": 5.7978699775670216e-08, "loss": 3.7678, "step": 2414000 }, { "epoch": 26.81, "learning_rate": 5.7964817981920345e-08, "loss": 3.794, "step": 2414500 }, { "epoch": 26.82, "learning_rate": 5.795093618817049e-08, "loss": 3.7688, "step": 2415000 }, { "epoch": 26.83, "learning_rate": 5.793705439442063e-08, "loss": 3.7707, "step": 2415500 }, { "epoch": 26.83, "learning_rate": 5.7923172600670765e-08, "loss": 3.7452, "step": 2416000 }, { "epoch": 26.84, "learning_rate": 5.790929080692091e-08, "loss": 3.7764, "step": 2416500 }, { "epoch": 26.84, "learning_rate": 5.789540901317105e-08, "loss": 3.7758, "step": 2417000 }, { "epoch": 26.85, "learning_rate": 5.788152721942118e-08, "loss": 3.7747, "step": 2417500 }, { "epoch": 26.85, "learning_rate": 5.786764542567132e-08, "loss": 3.7881, "step": 2418000 }, { "epoch": 26.86, "learning_rate": 5.7853763631921456e-08, "loss": 3.7616, "step": 2418500 }, { "epoch": 26.86, "learning_rate": 5.78398818381716e-08, "loss": 3.7473, "step": 2419000 }, { "epoch": 26.87, "learning_rate": 5.782600004442174e-08, "loss": 3.7767, "step": 2419500 }, { "epoch": 26.88, "learning_rate": 5.781211825067187e-08, "loss": 3.7812, "step": 2420000 }, { "epoch": 26.88, "learning_rate": 5.779823645692201e-08, "loss": 3.7537, "step": 2420500 }, { "epoch": 26.89, "learning_rate": 5.7784354663172154e-08, "loss": 3.7691, "step": 2421000 }, { "epoch": 26.89, "learning_rate": 5.777047286942229e-08, "loss": 3.7589, "step": 2421500 }, { "epoch": 26.9, "learning_rate": 5.775659107567243e-08, "loss": 3.7915, "step": 2422000 }, { "epoch": 26.9, "learning_rate": 5.7742709281922574e-08, "loss": 3.7548, "step": 2422500 }, { "epoch": 26.91, "learning_rate": 5.7728827488172704e-08, "loss": 3.7735, "step": 2423000 }, { "epoch": 26.91, "learning_rate": 5.7714945694422846e-08, "loss": 3.7637, "step": 2423500 }, { "epoch": 26.92, "learning_rate": 5.770106390067299e-08, "loss": 3.7853, "step": 2424000 }, { "epoch": 26.93, "learning_rate": 5.7687182106923124e-08, "loss": 3.7713, "step": 2424500 }, { "epoch": 26.93, "learning_rate": 5.7673300313173266e-08, "loss": 3.769, "step": 2425000 }, { "epoch": 26.94, "learning_rate": 5.765941851942341e-08, "loss": 3.7731, "step": 2425500 }, { "epoch": 26.94, "learning_rate": 5.764553672567354e-08, "loss": 3.7869, "step": 2426000 }, { "epoch": 26.95, "learning_rate": 5.763165493192368e-08, "loss": 3.7782, "step": 2426500 }, { "epoch": 26.95, "learning_rate": 5.761777313817382e-08, "loss": 3.7669, "step": 2427000 }, { "epoch": 26.96, "learning_rate": 5.760389134442396e-08, "loss": 3.7904, "step": 2427500 }, { "epoch": 26.96, "learning_rate": 5.75900095506741e-08, "loss": 3.7676, "step": 2428000 }, { "epoch": 26.97, "learning_rate": 5.757612775692423e-08, "loss": 3.7673, "step": 2428500 }, { "epoch": 26.98, "learning_rate": 5.756224596317437e-08, "loss": 3.7503, "step": 2429000 }, { "epoch": 26.98, "learning_rate": 5.754836416942451e-08, "loss": 3.7921, "step": 2429500 }, { "epoch": 26.99, "learning_rate": 5.7534482375674656e-08, "loss": 3.7747, "step": 2430000 }, { "epoch": 26.99, "learning_rate": 5.752060058192479e-08, "loss": 3.7642, "step": 2430500 }, { "epoch": 27.0, "learning_rate": 5.7506718788174933e-08, "loss": 3.7778, "step": 2431000 }, { "epoch": 27.0, "eval_loss": 3.839319944381714, "eval_runtime": 6.3037, "eval_samples_per_second": 246.521, "step": 2431242 }, { "epoch": 27.0, "learning_rate": 5.749283699442506e-08, "loss": 3.7644, "step": 2431500 }, { "epoch": 27.01, "learning_rate": 5.7478955200675205e-08, "loss": 3.7694, "step": 2432000 }, { "epoch": 27.01, "learning_rate": 5.746507340692535e-08, "loss": 3.7794, "step": 2432500 }, { "epoch": 27.02, "learning_rate": 5.745119161317549e-08, "loss": 3.7575, "step": 2433000 }, { "epoch": 27.03, "learning_rate": 5.7437309819425625e-08, "loss": 3.786, "step": 2433500 }, { "epoch": 27.03, "learning_rate": 5.742342802567577e-08, "loss": 3.7761, "step": 2434000 }, { "epoch": 27.04, "learning_rate": 5.7409546231925896e-08, "loss": 3.7711, "step": 2434500 }, { "epoch": 27.04, "learning_rate": 5.739566443817604e-08, "loss": 3.7599, "step": 2435000 }, { "epoch": 27.05, "learning_rate": 5.738178264442618e-08, "loss": 3.7726, "step": 2435500 }, { "epoch": 27.05, "learning_rate": 5.736790085067632e-08, "loss": 3.7816, "step": 2436000 }, { "epoch": 27.06, "learning_rate": 5.735401905692646e-08, "loss": 3.7926, "step": 2436500 }, { "epoch": 27.06, "learning_rate": 5.7340137263176594e-08, "loss": 3.77, "step": 2437000 }, { "epoch": 27.07, "learning_rate": 5.732625546942673e-08, "loss": 3.7489, "step": 2437500 }, { "epoch": 27.08, "learning_rate": 5.731237367567687e-08, "loss": 3.7666, "step": 2438000 }, { "epoch": 27.08, "learning_rate": 5.7298491881927014e-08, "loss": 3.7658, "step": 2438500 }, { "epoch": 27.09, "learning_rate": 5.728461008817716e-08, "loss": 3.785, "step": 2439000 }, { "epoch": 27.09, "learning_rate": 5.727072829442729e-08, "loss": 3.7628, "step": 2439500 }, { "epoch": 27.1, "learning_rate": 5.725684650067743e-08, "loss": 3.7657, "step": 2440000 }, { "epoch": 27.1, "learning_rate": 5.7242964706927564e-08, "loss": 3.7782, "step": 2440500 }, { "epoch": 27.11, "learning_rate": 5.7229082913177706e-08, "loss": 3.7781, "step": 2441000 }, { "epoch": 27.11, "learning_rate": 5.721520111942785e-08, "loss": 3.7833, "step": 2441500 }, { "epoch": 27.12, "learning_rate": 5.720131932567799e-08, "loss": 3.7717, "step": 2442000 }, { "epoch": 27.13, "learning_rate": 5.718743753192812e-08, "loss": 3.7749, "step": 2442500 }, { "epoch": 27.13, "learning_rate": 5.717355573817826e-08, "loss": 3.7478, "step": 2443000 }, { "epoch": 27.14, "learning_rate": 5.71596739444284e-08, "loss": 3.7702, "step": 2443500 }, { "epoch": 27.14, "learning_rate": 5.714579215067854e-08, "loss": 3.7641, "step": 2444000 }, { "epoch": 27.15, "learning_rate": 5.713191035692868e-08, "loss": 3.7758, "step": 2444500 }, { "epoch": 27.15, "learning_rate": 5.7118028563178824e-08, "loss": 3.7961, "step": 2445000 }, { "epoch": 27.16, "learning_rate": 5.710414676942895e-08, "loss": 3.7872, "step": 2445500 }, { "epoch": 27.16, "learning_rate": 5.7090264975679095e-08, "loss": 3.7932, "step": 2446000 }, { "epoch": 27.17, "learning_rate": 5.707638318192923e-08, "loss": 3.7592, "step": 2446500 }, { "epoch": 27.17, "learning_rate": 5.7062501388179373e-08, "loss": 3.761, "step": 2447000 }, { "epoch": 27.18, "learning_rate": 5.7048619594429516e-08, "loss": 3.7795, "step": 2447500 }, { "epoch": 27.19, "learning_rate": 5.703473780067966e-08, "loss": 3.7765, "step": 2448000 }, { "epoch": 27.19, "learning_rate": 5.702085600692979e-08, "loss": 3.7743, "step": 2448500 }, { "epoch": 27.2, "learning_rate": 5.700697421317992e-08, "loss": 3.7592, "step": 2449000 }, { "epoch": 27.2, "learning_rate": 5.6993092419430065e-08, "loss": 3.7796, "step": 2449500 }, { "epoch": 27.21, "learning_rate": 5.697921062568021e-08, "loss": 3.773, "step": 2450000 }, { "epoch": 27.21, "learning_rate": 5.696532883193035e-08, "loss": 3.768, "step": 2450500 }, { "epoch": 27.22, "learning_rate": 5.695144703818048e-08, "loss": 3.7485, "step": 2451000 }, { "epoch": 27.22, "learning_rate": 5.693756524443062e-08, "loss": 3.7807, "step": 2451500 }, { "epoch": 27.23, "learning_rate": 5.6923683450680756e-08, "loss": 3.7811, "step": 2452000 }, { "epoch": 27.24, "learning_rate": 5.69098016569309e-08, "loss": 3.7592, "step": 2452500 }, { "epoch": 27.24, "learning_rate": 5.689591986318104e-08, "loss": 3.7698, "step": 2453000 }, { "epoch": 27.25, "learning_rate": 5.688203806943118e-08, "loss": 3.7714, "step": 2453500 }, { "epoch": 27.25, "learning_rate": 5.686815627568131e-08, "loss": 3.77, "step": 2454000 }, { "epoch": 27.26, "learning_rate": 5.6854274481931454e-08, "loss": 3.7745, "step": 2454500 }, { "epoch": 27.26, "learning_rate": 5.684039268818159e-08, "loss": 3.7805, "step": 2455000 }, { "epoch": 27.27, "learning_rate": 5.682651089443173e-08, "loss": 3.7796, "step": 2455500 }, { "epoch": 27.27, "learning_rate": 5.6812629100681875e-08, "loss": 3.7723, "step": 2456000 }, { "epoch": 27.28, "learning_rate": 5.6798747306932004e-08, "loss": 3.7536, "step": 2456500 }, { "epoch": 27.29, "learning_rate": 5.6784865513182146e-08, "loss": 3.7881, "step": 2457000 }, { "epoch": 27.29, "learning_rate": 5.677098371943229e-08, "loss": 3.7827, "step": 2457500 }, { "epoch": 27.3, "learning_rate": 5.6757101925682424e-08, "loss": 3.7577, "step": 2458000 }, { "epoch": 27.3, "learning_rate": 5.6743220131932566e-08, "loss": 3.7379, "step": 2458500 }, { "epoch": 27.31, "learning_rate": 5.672933833818271e-08, "loss": 3.7722, "step": 2459000 }, { "epoch": 27.31, "learning_rate": 5.671545654443284e-08, "loss": 3.7742, "step": 2459500 }, { "epoch": 27.32, "learning_rate": 5.670157475068298e-08, "loss": 3.7498, "step": 2460000 }, { "epoch": 27.32, "learning_rate": 5.668769295693312e-08, "loss": 3.7747, "step": 2460500 }, { "epoch": 27.33, "learning_rate": 5.667381116318326e-08, "loss": 3.7791, "step": 2461000 }, { "epoch": 27.34, "learning_rate": 5.66599293694334e-08, "loss": 3.7614, "step": 2461500 }, { "epoch": 27.34, "learning_rate": 5.664604757568354e-08, "loss": 3.7726, "step": 2462000 }, { "epoch": 27.35, "learning_rate": 5.663216578193367e-08, "loss": 3.7659, "step": 2462500 }, { "epoch": 27.35, "learning_rate": 5.6618283988183813e-08, "loss": 3.7647, "step": 2463000 }, { "epoch": 27.36, "learning_rate": 5.6604402194433956e-08, "loss": 3.7823, "step": 2463500 }, { "epoch": 27.36, "learning_rate": 5.659052040068409e-08, "loss": 3.747, "step": 2464000 }, { "epoch": 27.37, "learning_rate": 5.6576638606934233e-08, "loss": 3.7872, "step": 2464500 }, { "epoch": 27.37, "learning_rate": 5.656275681318436e-08, "loss": 3.7819, "step": 2465000 }, { "epoch": 27.38, "learning_rate": 5.6548875019434505e-08, "loss": 3.779, "step": 2465500 }, { "epoch": 27.39, "learning_rate": 5.653499322568465e-08, "loss": 3.7772, "step": 2466000 }, { "epoch": 27.39, "learning_rate": 5.652111143193479e-08, "loss": 3.7762, "step": 2466500 }, { "epoch": 27.4, "learning_rate": 5.6507229638184925e-08, "loss": 3.7859, "step": 2467000 }, { "epoch": 27.4, "learning_rate": 5.649334784443507e-08, "loss": 3.7633, "step": 2467500 }, { "epoch": 27.41, "learning_rate": 5.6479466050685196e-08, "loss": 3.7559, "step": 2468000 }, { "epoch": 27.41, "learning_rate": 5.646558425693534e-08, "loss": 3.7681, "step": 2468500 }, { "epoch": 27.42, "learning_rate": 5.645170246318548e-08, "loss": 3.7661, "step": 2469000 }, { "epoch": 27.42, "learning_rate": 5.643782066943562e-08, "loss": 3.7617, "step": 2469500 }, { "epoch": 27.43, "learning_rate": 5.642393887568576e-08, "loss": 3.7621, "step": 2470000 }, { "epoch": 27.44, "learning_rate": 5.64100570819359e-08, "loss": 3.7585, "step": 2470500 }, { "epoch": 27.44, "learning_rate": 5.639617528818603e-08, "loss": 3.7719, "step": 2471000 }, { "epoch": 27.45, "learning_rate": 5.638229349443617e-08, "loss": 3.7611, "step": 2471500 }, { "epoch": 27.45, "learning_rate": 5.6368411700686315e-08, "loss": 3.7682, "step": 2472000 }, { "epoch": 27.46, "learning_rate": 5.635452990693646e-08, "loss": 3.7916, "step": 2472500 }, { "epoch": 27.46, "learning_rate": 5.634064811318659e-08, "loss": 3.7689, "step": 2473000 }, { "epoch": 27.47, "learning_rate": 5.632676631943673e-08, "loss": 3.7816, "step": 2473500 }, { "epoch": 27.47, "learning_rate": 5.6312884525686864e-08, "loss": 3.772, "step": 2474000 }, { "epoch": 27.48, "learning_rate": 5.6299002731937006e-08, "loss": 3.7815, "step": 2474500 }, { "epoch": 27.49, "learning_rate": 5.628512093818715e-08, "loss": 3.7819, "step": 2475000 }, { "epoch": 27.49, "learning_rate": 5.627123914443729e-08, "loss": 3.7742, "step": 2475500 }, { "epoch": 27.5, "learning_rate": 5.6257357350687426e-08, "loss": 3.7708, "step": 2476000 }, { "epoch": 27.5, "learning_rate": 5.624347555693756e-08, "loss": 3.7639, "step": 2476500 }, { "epoch": 27.51, "learning_rate": 5.62295937631877e-08, "loss": 3.7575, "step": 2477000 }, { "epoch": 27.51, "learning_rate": 5.621571196943784e-08, "loss": 3.7897, "step": 2477500 }, { "epoch": 27.52, "learning_rate": 5.620183017568798e-08, "loss": 3.7764, "step": 2478000 }, { "epoch": 27.52, "learning_rate": 5.6187948381938124e-08, "loss": 3.7682, "step": 2478500 }, { "epoch": 27.53, "learning_rate": 5.617406658818825e-08, "loss": 3.7814, "step": 2479000 }, { "epoch": 27.54, "learning_rate": 5.616018479443839e-08, "loss": 3.7725, "step": 2479500 }, { "epoch": 27.54, "learning_rate": 5.614630300068853e-08, "loss": 3.7657, "step": 2480000 }, { "epoch": 27.55, "learning_rate": 5.6132421206938673e-08, "loss": 3.7681, "step": 2480500 }, { "epoch": 27.55, "learning_rate": 5.6118539413188816e-08, "loss": 3.7775, "step": 2481000 }, { "epoch": 27.56, "learning_rate": 5.610465761943896e-08, "loss": 3.7931, "step": 2481500 }, { "epoch": 27.56, "learning_rate": 5.609077582568909e-08, "loss": 3.8074, "step": 2482000 }, { "epoch": 27.57, "learning_rate": 5.607689403193922e-08, "loss": 3.783, "step": 2482500 }, { "epoch": 27.57, "learning_rate": 5.6063012238189365e-08, "loss": 3.7563, "step": 2483000 }, { "epoch": 27.58, "learning_rate": 5.604913044443951e-08, "loss": 3.766, "step": 2483500 }, { "epoch": 27.59, "learning_rate": 5.603524865068965e-08, "loss": 3.7731, "step": 2484000 }, { "epoch": 27.59, "learning_rate": 5.602136685693979e-08, "loss": 3.7682, "step": 2484500 }, { "epoch": 27.6, "learning_rate": 5.600748506318992e-08, "loss": 3.7857, "step": 2485000 }, { "epoch": 27.6, "learning_rate": 5.5993603269440056e-08, "loss": 3.7857, "step": 2485500 }, { "epoch": 27.61, "learning_rate": 5.59797214756902e-08, "loss": 3.765, "step": 2486000 }, { "epoch": 27.61, "learning_rate": 5.596583968194034e-08, "loss": 3.7875, "step": 2486500 }, { "epoch": 27.62, "learning_rate": 5.595195788819048e-08, "loss": 3.7687, "step": 2487000 }, { "epoch": 27.62, "learning_rate": 5.593807609444061e-08, "loss": 3.7513, "step": 2487500 }, { "epoch": 27.63, "learning_rate": 5.5924194300690754e-08, "loss": 3.7733, "step": 2488000 }, { "epoch": 27.64, "learning_rate": 5.591031250694089e-08, "loss": 3.7782, "step": 2488500 }, { "epoch": 27.64, "learning_rate": 5.589643071319103e-08, "loss": 3.7516, "step": 2489000 }, { "epoch": 27.65, "learning_rate": 5.5882548919441175e-08, "loss": 3.7798, "step": 2489500 }, { "epoch": 27.65, "learning_rate": 5.586866712569132e-08, "loss": 3.7559, "step": 2490000 }, { "epoch": 27.66, "learning_rate": 5.5854785331941446e-08, "loss": 3.7728, "step": 2490500 }, { "epoch": 27.66, "learning_rate": 5.584090353819159e-08, "loss": 3.7841, "step": 2491000 }, { "epoch": 27.67, "learning_rate": 5.5827021744441724e-08, "loss": 3.7556, "step": 2491500 }, { "epoch": 27.67, "learning_rate": 5.5813139950691866e-08, "loss": 3.7736, "step": 2492000 }, { "epoch": 27.68, "learning_rate": 5.579925815694201e-08, "loss": 3.7714, "step": 2492500 }, { "epoch": 27.69, "learning_rate": 5.578537636319215e-08, "loss": 3.7686, "step": 2493000 }, { "epoch": 27.69, "learning_rate": 5.577149456944228e-08, "loss": 3.7593, "step": 2493500 }, { "epoch": 27.7, "learning_rate": 5.575761277569242e-08, "loss": 3.7764, "step": 2494000 }, { "epoch": 27.7, "learning_rate": 5.574373098194256e-08, "loss": 3.7686, "step": 2494500 }, { "epoch": 27.71, "learning_rate": 5.57298491881927e-08, "loss": 3.7731, "step": 2495000 }, { "epoch": 27.71, "learning_rate": 5.571596739444284e-08, "loss": 3.7633, "step": 2495500 }, { "epoch": 27.72, "learning_rate": 5.570208560069297e-08, "loss": 3.7755, "step": 2496000 }, { "epoch": 27.72, "learning_rate": 5.5688203806943113e-08, "loss": 3.775, "step": 2496500 }, { "epoch": 27.73, "learning_rate": 5.5674322013193256e-08, "loss": 3.7678, "step": 2497000 }, { "epoch": 27.74, "learning_rate": 5.566044021944339e-08, "loss": 3.7694, "step": 2497500 }, { "epoch": 27.74, "learning_rate": 5.5646558425693534e-08, "loss": 3.7903, "step": 2498000 }, { "epoch": 27.75, "learning_rate": 5.5632676631943676e-08, "loss": 3.7774, "step": 2498500 }, { "epoch": 27.75, "learning_rate": 5.5618794838193805e-08, "loss": 3.7595, "step": 2499000 }, { "epoch": 27.76, "learning_rate": 5.560491304444395e-08, "loss": 3.7634, "step": 2499500 }, { "epoch": 27.76, "learning_rate": 5.559103125069409e-08, "loss": 3.7895, "step": 2500000 }, { "epoch": 27.77, "learning_rate": 5.5577149456944225e-08, "loss": 3.7898, "step": 2500500 }, { "epoch": 27.77, "learning_rate": 5.556326766319437e-08, "loss": 3.7875, "step": 2501000 }, { "epoch": 27.78, "learning_rate": 5.5549385869444496e-08, "loss": 3.7768, "step": 2501500 }, { "epoch": 27.79, "learning_rate": 5.553550407569464e-08, "loss": 3.7725, "step": 2502000 }, { "epoch": 27.79, "learning_rate": 5.552162228194478e-08, "loss": 3.7867, "step": 2502500 }, { "epoch": 27.8, "learning_rate": 5.550774048819492e-08, "loss": 3.7657, "step": 2503000 }, { "epoch": 27.8, "learning_rate": 5.549385869444506e-08, "loss": 3.7743, "step": 2503500 }, { "epoch": 27.81, "learning_rate": 5.54799769006952e-08, "loss": 3.7864, "step": 2504000 }, { "epoch": 27.81, "learning_rate": 5.546609510694533e-08, "loss": 3.7541, "step": 2504500 }, { "epoch": 27.82, "learning_rate": 5.545221331319547e-08, "loss": 3.7544, "step": 2505000 }, { "epoch": 27.82, "learning_rate": 5.5438331519445615e-08, "loss": 3.7779, "step": 2505500 }, { "epoch": 27.83, "learning_rate": 5.542444972569576e-08, "loss": 3.7703, "step": 2506000 }, { "epoch": 27.84, "learning_rate": 5.541056793194589e-08, "loss": 3.7863, "step": 2506500 }, { "epoch": 27.84, "learning_rate": 5.5396686138196035e-08, "loss": 3.7769, "step": 2507000 }, { "epoch": 27.85, "learning_rate": 5.5382804344446164e-08, "loss": 3.7691, "step": 2507500 }, { "epoch": 27.85, "learning_rate": 5.5368922550696306e-08, "loss": 3.7773, "step": 2508000 }, { "epoch": 27.86, "learning_rate": 5.535504075694645e-08, "loss": 3.7756, "step": 2508500 }, { "epoch": 27.86, "learning_rate": 5.534115896319659e-08, "loss": 3.7769, "step": 2509000 }, { "epoch": 27.87, "learning_rate": 5.5327277169446726e-08, "loss": 3.7518, "step": 2509500 }, { "epoch": 27.87, "learning_rate": 5.5313395375696855e-08, "loss": 3.7714, "step": 2510000 }, { "epoch": 27.88, "learning_rate": 5.5299513581947e-08, "loss": 3.7649, "step": 2510500 }, { "epoch": 27.89, "learning_rate": 5.528563178819714e-08, "loss": 3.7651, "step": 2511000 }, { "epoch": 27.89, "learning_rate": 5.527174999444728e-08, "loss": 3.7759, "step": 2511500 }, { "epoch": 27.9, "learning_rate": 5.5257868200697424e-08, "loss": 3.7776, "step": 2512000 }, { "epoch": 27.9, "learning_rate": 5.524398640694756e-08, "loss": 3.7678, "step": 2512500 }, { "epoch": 27.91, "learning_rate": 5.523010461319769e-08, "loss": 3.7774, "step": 2513000 }, { "epoch": 27.91, "learning_rate": 5.521622281944783e-08, "loss": 3.7678, "step": 2513500 }, { "epoch": 27.92, "learning_rate": 5.5202341025697974e-08, "loss": 3.7747, "step": 2514000 }, { "epoch": 27.92, "learning_rate": 5.5188459231948116e-08, "loss": 3.7619, "step": 2514500 }, { "epoch": 27.93, "learning_rate": 5.517457743819826e-08, "loss": 3.7457, "step": 2515000 }, { "epoch": 27.94, "learning_rate": 5.516069564444839e-08, "loss": 3.7715, "step": 2515500 }, { "epoch": 27.94, "learning_rate": 5.514681385069852e-08, "loss": 3.7597, "step": 2516000 }, { "epoch": 27.95, "learning_rate": 5.5132932056948665e-08, "loss": 3.7613, "step": 2516500 }, { "epoch": 27.95, "learning_rate": 5.511905026319881e-08, "loss": 3.7777, "step": 2517000 }, { "epoch": 27.96, "learning_rate": 5.510516846944895e-08, "loss": 3.7765, "step": 2517500 }, { "epoch": 27.96, "learning_rate": 5.509128667569909e-08, "loss": 3.77, "step": 2518000 }, { "epoch": 27.97, "learning_rate": 5.507740488194922e-08, "loss": 3.7971, "step": 2518500 }, { "epoch": 27.97, "learning_rate": 5.5063523088199356e-08, "loss": 3.7718, "step": 2519000 }, { "epoch": 27.98, "learning_rate": 5.50496412944495e-08, "loss": 3.7833, "step": 2519500 }, { "epoch": 27.99, "learning_rate": 5.503575950069964e-08, "loss": 3.7656, "step": 2520000 }, { "epoch": 27.99, "learning_rate": 5.502187770694978e-08, "loss": 3.7643, "step": 2520500 }, { "epoch": 28.0, "learning_rate": 5.5007995913199926e-08, "loss": 3.7503, "step": 2521000 }, { "epoch": 28.0, "eval_loss": 3.837568759918213, "eval_runtime": 6.3037, "eval_samples_per_second": 246.521, "step": 2521288 }, { "epoch": 28.0, "learning_rate": 5.4994114119450055e-08, "loss": 3.7668, "step": 2521500 }, { "epoch": 28.01, "learning_rate": 5.498023232570019e-08, "loss": 3.7821, "step": 2522000 }, { "epoch": 28.01, "learning_rate": 5.496635053195033e-08, "loss": 3.7631, "step": 2522500 }, { "epoch": 28.02, "learning_rate": 5.4952468738200475e-08, "loss": 3.8038, "step": 2523000 }, { "epoch": 28.02, "learning_rate": 5.493858694445062e-08, "loss": 3.7596, "step": 2523500 }, { "epoch": 28.03, "learning_rate": 5.4924705150700746e-08, "loss": 3.7611, "step": 2524000 }, { "epoch": 28.04, "learning_rate": 5.491082335695089e-08, "loss": 3.7857, "step": 2524500 }, { "epoch": 28.04, "learning_rate": 5.4896941563201024e-08, "loss": 3.7609, "step": 2525000 }, { "epoch": 28.05, "learning_rate": 5.4883059769451166e-08, "loss": 3.7681, "step": 2525500 }, { "epoch": 28.05, "learning_rate": 5.486917797570131e-08, "loss": 3.7794, "step": 2526000 }, { "epoch": 28.06, "learning_rate": 5.485529618195145e-08, "loss": 3.7833, "step": 2526500 }, { "epoch": 28.06, "learning_rate": 5.484141438820158e-08, "loss": 3.7641, "step": 2527000 }, { "epoch": 28.07, "learning_rate": 5.482753259445172e-08, "loss": 3.7664, "step": 2527500 }, { "epoch": 28.07, "learning_rate": 5.481365080070186e-08, "loss": 3.7649, "step": 2528000 }, { "epoch": 28.08, "learning_rate": 5.4799769006952e-08, "loss": 3.7749, "step": 2528500 }, { "epoch": 28.09, "learning_rate": 5.478588721320214e-08, "loss": 3.7693, "step": 2529000 }, { "epoch": 28.09, "learning_rate": 5.4772005419452284e-08, "loss": 3.7652, "step": 2529500 }, { "epoch": 28.1, "learning_rate": 5.4758123625702413e-08, "loss": 3.7573, "step": 2530000 }, { "epoch": 28.1, "learning_rate": 5.4744241831952556e-08, "loss": 3.7508, "step": 2530500 }, { "epoch": 28.11, "learning_rate": 5.473036003820269e-08, "loss": 3.7849, "step": 2531000 }, { "epoch": 28.11, "learning_rate": 5.4716478244452834e-08, "loss": 3.7644, "step": 2531500 }, { "epoch": 28.12, "learning_rate": 5.4702596450702976e-08, "loss": 3.7669, "step": 2532000 }, { "epoch": 28.12, "learning_rate": 5.4688714656953105e-08, "loss": 3.7671, "step": 2532500 }, { "epoch": 28.13, "learning_rate": 5.467483286320325e-08, "loss": 3.7686, "step": 2533000 }, { "epoch": 28.14, "learning_rate": 5.466095106945339e-08, "loss": 3.7706, "step": 2533500 }, { "epoch": 28.14, "learning_rate": 5.4647069275703525e-08, "loss": 3.7591, "step": 2534000 }, { "epoch": 28.15, "learning_rate": 5.463318748195367e-08, "loss": 3.7771, "step": 2534500 }, { "epoch": 28.15, "learning_rate": 5.461930568820381e-08, "loss": 3.7745, "step": 2535000 }, { "epoch": 28.16, "learning_rate": 5.460542389445394e-08, "loss": 3.7868, "step": 2535500 }, { "epoch": 28.16, "learning_rate": 5.459154210070408e-08, "loss": 3.7652, "step": 2536000 }, { "epoch": 28.17, "learning_rate": 5.457766030695422e-08, "loss": 3.7696, "step": 2536500 }, { "epoch": 28.17, "learning_rate": 5.456377851320436e-08, "loss": 3.7645, "step": 2537000 }, { "epoch": 28.18, "learning_rate": 5.45498967194545e-08, "loss": 3.7591, "step": 2537500 }, { "epoch": 28.19, "learning_rate": 5.453601492570463e-08, "loss": 3.7625, "step": 2538000 }, { "epoch": 28.19, "learning_rate": 5.452213313195477e-08, "loss": 3.7703, "step": 2538500 }, { "epoch": 28.2, "learning_rate": 5.4508251338204915e-08, "loss": 3.7596, "step": 2539000 }, { "epoch": 28.2, "learning_rate": 5.449436954445506e-08, "loss": 3.7648, "step": 2539500 }, { "epoch": 28.21, "learning_rate": 5.448048775070519e-08, "loss": 3.7689, "step": 2540000 }, { "epoch": 28.21, "learning_rate": 5.4466605956955335e-08, "loss": 3.7598, "step": 2540500 }, { "epoch": 28.22, "learning_rate": 5.4452724163205464e-08, "loss": 3.7659, "step": 2541000 }, { "epoch": 28.22, "learning_rate": 5.4438842369455606e-08, "loss": 3.7738, "step": 2541500 }, { "epoch": 28.23, "learning_rate": 5.442496057570575e-08, "loss": 3.7589, "step": 2542000 }, { "epoch": 28.24, "learning_rate": 5.441107878195589e-08, "loss": 3.7769, "step": 2542500 }, { "epoch": 28.24, "learning_rate": 5.4397196988206026e-08, "loss": 3.7698, "step": 2543000 }, { "epoch": 28.25, "learning_rate": 5.438331519445617e-08, "loss": 3.773, "step": 2543500 }, { "epoch": 28.25, "learning_rate": 5.43694334007063e-08, "loss": 3.7597, "step": 2544000 }, { "epoch": 28.26, "learning_rate": 5.435555160695644e-08, "loss": 3.7762, "step": 2544500 }, { "epoch": 28.26, "learning_rate": 5.434166981320658e-08, "loss": 3.7725, "step": 2545000 }, { "epoch": 28.27, "learning_rate": 5.4327788019456724e-08, "loss": 3.773, "step": 2545500 }, { "epoch": 28.27, "learning_rate": 5.431390622570686e-08, "loss": 3.7687, "step": 2546000 }, { "epoch": 28.28, "learning_rate": 5.430002443195699e-08, "loss": 3.7913, "step": 2546500 }, { "epoch": 28.29, "learning_rate": 5.428614263820713e-08, "loss": 3.7714, "step": 2547000 }, { "epoch": 28.29, "learning_rate": 5.4272260844457274e-08, "loss": 3.7808, "step": 2547500 }, { "epoch": 28.3, "learning_rate": 5.4258379050707416e-08, "loss": 3.784, "step": 2548000 }, { "epoch": 28.3, "learning_rate": 5.424449725695756e-08, "loss": 3.7787, "step": 2548500 }, { "epoch": 28.31, "learning_rate": 5.4230615463207694e-08, "loss": 3.7822, "step": 2549000 }, { "epoch": 28.31, "learning_rate": 5.421673366945782e-08, "loss": 3.7806, "step": 2549500 }, { "epoch": 28.32, "learning_rate": 5.4202851875707965e-08, "loss": 3.7723, "step": 2550000 }, { "epoch": 28.32, "learning_rate": 5.418897008195811e-08, "loss": 3.7749, "step": 2550500 }, { "epoch": 28.33, "learning_rate": 5.417508828820825e-08, "loss": 3.757, "step": 2551000 }, { "epoch": 28.34, "learning_rate": 5.416120649445839e-08, "loss": 3.7731, "step": 2551500 }, { "epoch": 28.34, "learning_rate": 5.414732470070852e-08, "loss": 3.7605, "step": 2552000 }, { "epoch": 28.35, "learning_rate": 5.4133442906958657e-08, "loss": 3.7846, "step": 2552500 }, { "epoch": 28.35, "learning_rate": 5.41195611132088e-08, "loss": 3.7525, "step": 2553000 }, { "epoch": 28.36, "learning_rate": 5.410567931945894e-08, "loss": 3.7738, "step": 2553500 }, { "epoch": 28.36, "learning_rate": 5.4091797525709083e-08, "loss": 3.7572, "step": 2554000 }, { "epoch": 28.37, "learning_rate": 5.4077915731959226e-08, "loss": 3.77, "step": 2554500 }, { "epoch": 28.37, "learning_rate": 5.4064033938209355e-08, "loss": 3.7914, "step": 2555000 }, { "epoch": 28.38, "learning_rate": 5.405015214445949e-08, "loss": 3.7527, "step": 2555500 }, { "epoch": 28.39, "learning_rate": 5.403627035070963e-08, "loss": 3.7717, "step": 2556000 }, { "epoch": 28.39, "learning_rate": 5.4022388556959775e-08, "loss": 3.7775, "step": 2556500 }, { "epoch": 28.4, "learning_rate": 5.400850676320992e-08, "loss": 3.7579, "step": 2557000 }, { "epoch": 28.4, "learning_rate": 5.399462496946006e-08, "loss": 3.7778, "step": 2557500 }, { "epoch": 28.41, "learning_rate": 5.398074317571019e-08, "loss": 3.7643, "step": 2558000 }, { "epoch": 28.41, "learning_rate": 5.3966861381960324e-08, "loss": 3.785, "step": 2558500 }, { "epoch": 28.42, "learning_rate": 5.3952979588210466e-08, "loss": 3.7529, "step": 2559000 }, { "epoch": 28.42, "learning_rate": 5.393909779446061e-08, "loss": 3.7797, "step": 2559500 }, { "epoch": 28.43, "learning_rate": 5.392521600071075e-08, "loss": 3.7663, "step": 2560000 }, { "epoch": 28.44, "learning_rate": 5.391133420696088e-08, "loss": 3.7744, "step": 2560500 }, { "epoch": 28.44, "learning_rate": 5.389745241321102e-08, "loss": 3.7721, "step": 2561000 }, { "epoch": 28.45, "learning_rate": 5.388357061946116e-08, "loss": 3.7684, "step": 2561500 }, { "epoch": 28.45, "learning_rate": 5.38696888257113e-08, "loss": 3.7405, "step": 2562000 }, { "epoch": 28.46, "learning_rate": 5.385580703196144e-08, "loss": 3.7801, "step": 2562500 }, { "epoch": 28.46, "learning_rate": 5.3841925238211585e-08, "loss": 3.7697, "step": 2563000 }, { "epoch": 28.47, "learning_rate": 5.3828043444461714e-08, "loss": 3.7516, "step": 2563500 }, { "epoch": 28.47, "learning_rate": 5.3814161650711856e-08, "loss": 3.7671, "step": 2564000 }, { "epoch": 28.48, "learning_rate": 5.380027985696199e-08, "loss": 3.7682, "step": 2564500 }, { "epoch": 28.49, "learning_rate": 5.3786398063212134e-08, "loss": 3.7763, "step": 2565000 }, { "epoch": 28.49, "learning_rate": 5.3772516269462276e-08, "loss": 3.7905, "step": 2565500 }, { "epoch": 28.5, "learning_rate": 5.375863447571242e-08, "loss": 3.7611, "step": 2566000 }, { "epoch": 28.5, "learning_rate": 5.374475268196255e-08, "loss": 3.7803, "step": 2566500 }, { "epoch": 28.51, "learning_rate": 5.373087088821269e-08, "loss": 3.7443, "step": 2567000 }, { "epoch": 28.51, "learning_rate": 5.3716989094462825e-08, "loss": 3.7675, "step": 2567500 }, { "epoch": 28.52, "learning_rate": 5.370310730071297e-08, "loss": 3.759, "step": 2568000 }, { "epoch": 28.52, "learning_rate": 5.368922550696311e-08, "loss": 3.7613, "step": 2568500 }, { "epoch": 28.53, "learning_rate": 5.367534371321324e-08, "loss": 3.7768, "step": 2569000 }, { "epoch": 28.54, "learning_rate": 5.366146191946338e-08, "loss": 3.7792, "step": 2569500 }, { "epoch": 28.54, "learning_rate": 5.364758012571352e-08, "loss": 3.7777, "step": 2570000 }, { "epoch": 28.55, "learning_rate": 5.363369833196366e-08, "loss": 3.7599, "step": 2570500 }, { "epoch": 28.55, "learning_rate": 5.36198165382138e-08, "loss": 3.76, "step": 2571000 }, { "epoch": 28.56, "learning_rate": 5.3605934744463943e-08, "loss": 3.7725, "step": 2571500 }, { "epoch": 28.56, "learning_rate": 5.359205295071407e-08, "loss": 3.7684, "step": 2572000 }, { "epoch": 28.57, "learning_rate": 5.3578171156964215e-08, "loss": 3.7799, "step": 2572500 }, { "epoch": 28.57, "learning_rate": 5.356428936321436e-08, "loss": 3.7608, "step": 2573000 }, { "epoch": 28.58, "learning_rate": 5.355040756946449e-08, "loss": 3.7667, "step": 2573500 }, { "epoch": 28.59, "learning_rate": 5.3536525775714635e-08, "loss": 3.7679, "step": 2574000 }, { "epoch": 28.59, "learning_rate": 5.3522643981964764e-08, "loss": 3.7654, "step": 2574500 }, { "epoch": 28.6, "learning_rate": 5.3508762188214906e-08, "loss": 3.7837, "step": 2575000 }, { "epoch": 28.6, "learning_rate": 5.349488039446505e-08, "loss": 3.7658, "step": 2575500 }, { "epoch": 28.61, "learning_rate": 5.348099860071519e-08, "loss": 3.7712, "step": 2576000 }, { "epoch": 28.61, "learning_rate": 5.3467116806965326e-08, "loss": 3.7651, "step": 2576500 }, { "epoch": 28.62, "learning_rate": 5.345323501321547e-08, "loss": 3.7747, "step": 2577000 }, { "epoch": 28.62, "learning_rate": 5.34393532194656e-08, "loss": 3.7698, "step": 2577500 }, { "epoch": 28.63, "learning_rate": 5.342547142571574e-08, "loss": 3.775, "step": 2578000 }, { "epoch": 28.64, "learning_rate": 5.341158963196588e-08, "loss": 3.766, "step": 2578500 }, { "epoch": 28.64, "learning_rate": 5.3397707838216024e-08, "loss": 3.769, "step": 2579000 }, { "epoch": 28.65, "learning_rate": 5.338382604446616e-08, "loss": 3.7623, "step": 2579500 }, { "epoch": 28.65, "learning_rate": 5.33699442507163e-08, "loss": 3.7738, "step": 2580000 }, { "epoch": 28.66, "learning_rate": 5.335606245696643e-08, "loss": 3.7875, "step": 2580500 }, { "epoch": 28.66, "learning_rate": 5.3342180663216574e-08, "loss": 3.7705, "step": 2581000 }, { "epoch": 28.67, "learning_rate": 5.3328298869466716e-08, "loss": 3.7491, "step": 2581500 }, { "epoch": 28.67, "learning_rate": 5.331441707571686e-08, "loss": 3.8024, "step": 2582000 }, { "epoch": 28.68, "learning_rate": 5.3300535281966994e-08, "loss": 3.7934, "step": 2582500 }, { "epoch": 28.69, "learning_rate": 5.328665348821712e-08, "loss": 3.7658, "step": 2583000 }, { "epoch": 28.69, "learning_rate": 5.3272771694467265e-08, "loss": 3.7845, "step": 2583500 }, { "epoch": 28.7, "learning_rate": 5.325888990071741e-08, "loss": 3.7534, "step": 2584000 }, { "epoch": 28.7, "learning_rate": 5.324500810696755e-08, "loss": 3.7584, "step": 2584500 }, { "epoch": 28.71, "learning_rate": 5.323112631321769e-08, "loss": 3.7484, "step": 2585000 }, { "epoch": 28.71, "learning_rate": 5.321724451946783e-08, "loss": 3.7678, "step": 2585500 }, { "epoch": 28.72, "learning_rate": 5.3203362725717957e-08, "loss": 3.7768, "step": 2586000 }, { "epoch": 28.72, "learning_rate": 5.31894809319681e-08, "loss": 3.7751, "step": 2586500 }, { "epoch": 28.73, "learning_rate": 5.317559913821824e-08, "loss": 3.7677, "step": 2587000 }, { "epoch": 28.74, "learning_rate": 5.3161717344468383e-08, "loss": 3.7652, "step": 2587500 }, { "epoch": 28.74, "learning_rate": 5.3147835550718526e-08, "loss": 3.7557, "step": 2588000 }, { "epoch": 28.75, "learning_rate": 5.313395375696866e-08, "loss": 3.7673, "step": 2588500 }, { "epoch": 28.75, "learning_rate": 5.312007196321879e-08, "loss": 3.7609, "step": 2589000 }, { "epoch": 28.76, "learning_rate": 5.310619016946893e-08, "loss": 3.7527, "step": 2589500 }, { "epoch": 28.76, "learning_rate": 5.3092308375719075e-08, "loss": 3.7541, "step": 2590000 }, { "epoch": 28.77, "learning_rate": 5.307842658196922e-08, "loss": 3.7651, "step": 2590500 }, { "epoch": 28.77, "learning_rate": 5.306454478821936e-08, "loss": 3.7737, "step": 2591000 }, { "epoch": 28.78, "learning_rate": 5.305066299446949e-08, "loss": 3.7792, "step": 2591500 }, { "epoch": 28.79, "learning_rate": 5.3036781200719624e-08, "loss": 3.7542, "step": 2592000 }, { "epoch": 28.79, "learning_rate": 5.3022899406969766e-08, "loss": 3.7635, "step": 2592500 }, { "epoch": 28.8, "learning_rate": 5.300901761321991e-08, "loss": 3.7749, "step": 2593000 }, { "epoch": 28.8, "learning_rate": 5.299513581947005e-08, "loss": 3.7702, "step": 2593500 }, { "epoch": 28.81, "learning_rate": 5.298125402572019e-08, "loss": 3.775, "step": 2594000 }, { "epoch": 28.81, "learning_rate": 5.296737223197032e-08, "loss": 3.7669, "step": 2594500 }, { "epoch": 28.82, "learning_rate": 5.295349043822046e-08, "loss": 3.7697, "step": 2595000 }, { "epoch": 28.82, "learning_rate": 5.29396086444706e-08, "loss": 3.7737, "step": 2595500 }, { "epoch": 28.83, "learning_rate": 5.292572685072074e-08, "loss": 3.7578, "step": 2596000 }, { "epoch": 28.84, "learning_rate": 5.2911845056970885e-08, "loss": 3.7476, "step": 2596500 }, { "epoch": 28.84, "learning_rate": 5.2897963263221014e-08, "loss": 3.7765, "step": 2597000 }, { "epoch": 28.85, "learning_rate": 5.2884081469471156e-08, "loss": 3.7741, "step": 2597500 }, { "epoch": 28.85, "learning_rate": 5.287019967572129e-08, "loss": 3.7794, "step": 2598000 }, { "epoch": 28.86, "learning_rate": 5.2856317881971434e-08, "loss": 3.7458, "step": 2598500 }, { "epoch": 28.86, "learning_rate": 5.2842436088221576e-08, "loss": 3.7572, "step": 2599000 }, { "epoch": 28.87, "learning_rate": 5.282855429447172e-08, "loss": 3.7682, "step": 2599500 }, { "epoch": 28.87, "learning_rate": 5.281467250072185e-08, "loss": 3.7551, "step": 2600000 }, { "epoch": 28.88, "learning_rate": 5.280079070697199e-08, "loss": 3.7694, "step": 2600500 }, { "epoch": 28.89, "learning_rate": 5.2786908913222125e-08, "loss": 3.7704, "step": 2601000 }, { "epoch": 28.89, "learning_rate": 5.277302711947227e-08, "loss": 3.776, "step": 2601500 }, { "epoch": 28.9, "learning_rate": 5.275914532572241e-08, "loss": 3.7691, "step": 2602000 }, { "epoch": 28.9, "learning_rate": 5.274526353197255e-08, "loss": 3.7667, "step": 2602500 }, { "epoch": 28.91, "learning_rate": 5.273138173822268e-08, "loss": 3.7694, "step": 2603000 }, { "epoch": 28.91, "learning_rate": 5.2717499944472823e-08, "loss": 3.7928, "step": 2603500 }, { "epoch": 28.92, "learning_rate": 5.270361815072296e-08, "loss": 3.7794, "step": 2604000 }, { "epoch": 28.92, "learning_rate": 5.26897363569731e-08, "loss": 3.766, "step": 2604500 }, { "epoch": 28.93, "learning_rate": 5.2675854563223244e-08, "loss": 3.7884, "step": 2605000 }, { "epoch": 28.94, "learning_rate": 5.266197276947337e-08, "loss": 3.7749, "step": 2605500 }, { "epoch": 28.94, "learning_rate": 5.2648090975723515e-08, "loss": 3.7596, "step": 2606000 }, { "epoch": 28.95, "learning_rate": 5.263420918197366e-08, "loss": 3.7671, "step": 2606500 }, { "epoch": 28.95, "learning_rate": 5.262032738822379e-08, "loss": 3.7743, "step": 2607000 }, { "epoch": 28.96, "learning_rate": 5.2606445594473935e-08, "loss": 3.775, "step": 2607500 }, { "epoch": 28.96, "learning_rate": 5.259256380072408e-08, "loss": 3.7653, "step": 2608000 }, { "epoch": 28.97, "learning_rate": 5.2578682006974206e-08, "loss": 3.7734, "step": 2608500 }, { "epoch": 28.97, "learning_rate": 5.256480021322435e-08, "loss": 3.7429, "step": 2609000 }, { "epoch": 28.98, "learning_rate": 5.255091841947449e-08, "loss": 3.7762, "step": 2609500 }, { "epoch": 28.99, "learning_rate": 5.2537036625724626e-08, "loss": 3.7587, "step": 2610000 }, { "epoch": 28.99, "learning_rate": 5.252315483197477e-08, "loss": 3.7841, "step": 2610500 }, { "epoch": 29.0, "learning_rate": 5.25092730382249e-08, "loss": 3.7746, "step": 2611000 }, { "epoch": 29.0, "eval_loss": 3.836146116256714, "eval_runtime": 6.2989, "eval_samples_per_second": 246.709, "step": 2611334 }, { "epoch": 29.0, "learning_rate": 5.249539124447504e-08, "loss": 3.7836, "step": 2611500 }, { "epoch": 29.01, "learning_rate": 5.248150945072518e-08, "loss": 3.7712, "step": 2612000 }, { "epoch": 29.01, "learning_rate": 5.2467627656975325e-08, "loss": 3.77, "step": 2612500 }, { "epoch": 29.02, "learning_rate": 5.245374586322546e-08, "loss": 3.7509, "step": 2613000 }, { "epoch": 29.02, "learning_rate": 5.24398640694756e-08, "loss": 3.7809, "step": 2613500 }, { "epoch": 29.03, "learning_rate": 5.242598227572573e-08, "loss": 3.7669, "step": 2614000 }, { "epoch": 29.04, "learning_rate": 5.2412100481975874e-08, "loss": 3.7776, "step": 2614500 }, { "epoch": 29.04, "learning_rate": 5.2398218688226016e-08, "loss": 3.7686, "step": 2615000 }, { "epoch": 29.05, "learning_rate": 5.238433689447616e-08, "loss": 3.767, "step": 2615500 }, { "epoch": 29.05, "learning_rate": 5.2370455100726294e-08, "loss": 3.7638, "step": 2616000 }, { "epoch": 29.06, "learning_rate": 5.2356573306976436e-08, "loss": 3.7723, "step": 2616500 }, { "epoch": 29.06, "learning_rate": 5.2342691513226565e-08, "loss": 3.7813, "step": 2617000 }, { "epoch": 29.07, "learning_rate": 5.232880971947671e-08, "loss": 3.7649, "step": 2617500 }, { "epoch": 29.07, "learning_rate": 5.231492792572685e-08, "loss": 3.7858, "step": 2618000 }, { "epoch": 29.08, "learning_rate": 5.230104613197699e-08, "loss": 3.7564, "step": 2618500 }, { "epoch": 29.09, "learning_rate": 5.228716433822713e-08, "loss": 3.7837, "step": 2619000 }, { "epoch": 29.09, "learning_rate": 5.227328254447726e-08, "loss": 3.7526, "step": 2619500 }, { "epoch": 29.1, "learning_rate": 5.22594007507274e-08, "loss": 3.7426, "step": 2620000 }, { "epoch": 29.1, "learning_rate": 5.224551895697754e-08, "loss": 3.7511, "step": 2620500 }, { "epoch": 29.11, "learning_rate": 5.2231637163227683e-08, "loss": 3.7685, "step": 2621000 }, { "epoch": 29.11, "learning_rate": 5.2217755369477826e-08, "loss": 3.7696, "step": 2621500 }, { "epoch": 29.12, "learning_rate": 5.220387357572796e-08, "loss": 3.7784, "step": 2622000 }, { "epoch": 29.12, "learning_rate": 5.218999178197809e-08, "loss": 3.7704, "step": 2622500 }, { "epoch": 29.13, "learning_rate": 5.217610998822823e-08, "loss": 3.7666, "step": 2623000 }, { "epoch": 29.14, "learning_rate": 5.2162228194478375e-08, "loss": 3.7656, "step": 2623500 }, { "epoch": 29.14, "learning_rate": 5.214834640072852e-08, "loss": 3.776, "step": 2624000 }, { "epoch": 29.15, "learning_rate": 5.213446460697866e-08, "loss": 3.7656, "step": 2624500 }, { "epoch": 29.15, "learning_rate": 5.2120582813228795e-08, "loss": 3.7611, "step": 2625000 }, { "epoch": 29.16, "learning_rate": 5.2106701019478924e-08, "loss": 3.7619, "step": 2625500 }, { "epoch": 29.16, "learning_rate": 5.2092819225729066e-08, "loss": 3.7549, "step": 2626000 }, { "epoch": 29.17, "learning_rate": 5.207893743197921e-08, "loss": 3.756, "step": 2626500 }, { "epoch": 29.17, "learning_rate": 5.206505563822935e-08, "loss": 3.775, "step": 2627000 }, { "epoch": 29.18, "learning_rate": 5.205117384447949e-08, "loss": 3.7486, "step": 2627500 }, { "epoch": 29.19, "learning_rate": 5.203729205072962e-08, "loss": 3.7472, "step": 2628000 }, { "epoch": 29.19, "learning_rate": 5.202341025697976e-08, "loss": 3.7592, "step": 2628500 }, { "epoch": 29.2, "learning_rate": 5.20095284632299e-08, "loss": 3.7796, "step": 2629000 }, { "epoch": 29.2, "learning_rate": 5.199564666948004e-08, "loss": 3.7748, "step": 2629500 }, { "epoch": 29.21, "learning_rate": 5.1981764875730185e-08, "loss": 3.7732, "step": 2630000 }, { "epoch": 29.21, "learning_rate": 5.196788308198033e-08, "loss": 3.7515, "step": 2630500 }, { "epoch": 29.22, "learning_rate": 5.1954001288230456e-08, "loss": 3.7697, "step": 2631000 }, { "epoch": 29.22, "learning_rate": 5.194011949448059e-08, "loss": 3.7469, "step": 2631500 }, { "epoch": 29.23, "learning_rate": 5.1926237700730734e-08, "loss": 3.7817, "step": 2632000 }, { "epoch": 29.24, "learning_rate": 5.1912355906980876e-08, "loss": 3.7532, "step": 2632500 }, { "epoch": 29.24, "learning_rate": 5.189847411323102e-08, "loss": 3.7805, "step": 2633000 }, { "epoch": 29.25, "learning_rate": 5.188459231948115e-08, "loss": 3.7704, "step": 2633500 }, { "epoch": 29.25, "learning_rate": 5.187071052573129e-08, "loss": 3.7754, "step": 2634000 }, { "epoch": 29.26, "learning_rate": 5.1856828731981425e-08, "loss": 3.7734, "step": 2634500 }, { "epoch": 29.26, "learning_rate": 5.184294693823157e-08, "loss": 3.7508, "step": 2635000 }, { "epoch": 29.27, "learning_rate": 5.182906514448171e-08, "loss": 3.771, "step": 2635500 }, { "epoch": 29.27, "learning_rate": 5.181518335073185e-08, "loss": 3.77, "step": 2636000 }, { "epoch": 29.28, "learning_rate": 5.180130155698198e-08, "loss": 3.7555, "step": 2636500 }, { "epoch": 29.29, "learning_rate": 5.1787419763232123e-08, "loss": 3.7738, "step": 2637000 }, { "epoch": 29.29, "learning_rate": 5.177353796948226e-08, "loss": 3.7749, "step": 2637500 }, { "epoch": 29.3, "learning_rate": 5.17596561757324e-08, "loss": 3.7692, "step": 2638000 }, { "epoch": 29.3, "learning_rate": 5.1745774381982544e-08, "loss": 3.7758, "step": 2638500 }, { "epoch": 29.31, "learning_rate": 5.1731892588232686e-08, "loss": 3.7551, "step": 2639000 }, { "epoch": 29.31, "learning_rate": 5.1718010794482815e-08, "loss": 3.7752, "step": 2639500 }, { "epoch": 29.32, "learning_rate": 5.170412900073296e-08, "loss": 3.7765, "step": 2640000 }, { "epoch": 29.32, "learning_rate": 5.169024720698309e-08, "loss": 3.7751, "step": 2640500 }, { "epoch": 29.33, "learning_rate": 5.1676365413233235e-08, "loss": 3.7584, "step": 2641000 }, { "epoch": 29.34, "learning_rate": 5.166248361948338e-08, "loss": 3.7634, "step": 2641500 }, { "epoch": 29.34, "learning_rate": 5.1648601825733506e-08, "loss": 3.781, "step": 2642000 }, { "epoch": 29.35, "learning_rate": 5.163472003198365e-08, "loss": 3.7671, "step": 2642500 }, { "epoch": 29.35, "learning_rate": 5.162083823823379e-08, "loss": 3.7683, "step": 2643000 }, { "epoch": 29.36, "learning_rate": 5.1606956444483927e-08, "loss": 3.7547, "step": 2643500 }, { "epoch": 29.36, "learning_rate": 5.159307465073407e-08, "loss": 3.7499, "step": 2644000 }, { "epoch": 29.37, "learning_rate": 5.157919285698421e-08, "loss": 3.781, "step": 2644500 }, { "epoch": 29.37, "learning_rate": 5.156531106323434e-08, "loss": 3.7786, "step": 2645000 }, { "epoch": 29.38, "learning_rate": 5.155142926948448e-08, "loss": 3.7796, "step": 2645500 }, { "epoch": 29.38, "learning_rate": 5.1537547475734625e-08, "loss": 3.7593, "step": 2646000 }, { "epoch": 29.39, "learning_rate": 5.152366568198476e-08, "loss": 3.7875, "step": 2646500 }, { "epoch": 29.4, "learning_rate": 5.15097838882349e-08, "loss": 3.7678, "step": 2647000 }, { "epoch": 29.4, "learning_rate": 5.1495902094485045e-08, "loss": 3.7732, "step": 2647500 }, { "epoch": 29.41, "learning_rate": 5.1482020300735174e-08, "loss": 3.7924, "step": 2648000 }, { "epoch": 29.41, "learning_rate": 5.1468138506985316e-08, "loss": 3.7593, "step": 2648500 }, { "epoch": 29.42, "learning_rate": 5.145425671323546e-08, "loss": 3.7542, "step": 2649000 }, { "epoch": 29.42, "learning_rate": 5.1440374919485594e-08, "loss": 3.7581, "step": 2649500 }, { "epoch": 29.43, "learning_rate": 5.1426493125735736e-08, "loss": 3.7624, "step": 2650000 }, { "epoch": 29.43, "learning_rate": 5.1412611331985865e-08, "loss": 3.7558, "step": 2650500 }, { "epoch": 29.44, "learning_rate": 5.139872953823601e-08, "loss": 3.7723, "step": 2651000 }, { "epoch": 29.45, "learning_rate": 5.138484774448615e-08, "loss": 3.7777, "step": 2651500 }, { "epoch": 29.45, "learning_rate": 5.137096595073629e-08, "loss": 3.7681, "step": 2652000 }, { "epoch": 29.46, "learning_rate": 5.135708415698643e-08, "loss": 3.7632, "step": 2652500 }, { "epoch": 29.46, "learning_rate": 5.134320236323657e-08, "loss": 3.7764, "step": 2653000 }, { "epoch": 29.47, "learning_rate": 5.13293205694867e-08, "loss": 3.7549, "step": 2653500 }, { "epoch": 29.47, "learning_rate": 5.131543877573684e-08, "loss": 3.78, "step": 2654000 }, { "epoch": 29.48, "learning_rate": 5.1301556981986984e-08, "loss": 3.7548, "step": 2654500 }, { "epoch": 29.48, "learning_rate": 5.1287675188237126e-08, "loss": 3.7605, "step": 2655000 }, { "epoch": 29.49, "learning_rate": 5.127379339448726e-08, "loss": 3.7659, "step": 2655500 }, { "epoch": 29.5, "learning_rate": 5.125991160073739e-08, "loss": 3.7814, "step": 2656000 }, { "epoch": 29.5, "learning_rate": 5.124602980698753e-08, "loss": 3.7644, "step": 2656500 }, { "epoch": 29.51, "learning_rate": 5.1232148013237675e-08, "loss": 3.7524, "step": 2657000 }, { "epoch": 29.51, "learning_rate": 5.121826621948782e-08, "loss": 3.7788, "step": 2657500 }, { "epoch": 29.52, "learning_rate": 5.120438442573796e-08, "loss": 3.7704, "step": 2658000 }, { "epoch": 29.52, "learning_rate": 5.1190502631988095e-08, "loss": 3.7571, "step": 2658500 }, { "epoch": 29.53, "learning_rate": 5.1176620838238224e-08, "loss": 3.7555, "step": 2659000 }, { "epoch": 29.53, "learning_rate": 5.1162739044488367e-08, "loss": 3.7691, "step": 2659500 }, { "epoch": 29.54, "learning_rate": 5.114885725073851e-08, "loss": 3.7753, "step": 2660000 }, { "epoch": 29.55, "learning_rate": 5.113497545698865e-08, "loss": 3.7795, "step": 2660500 }, { "epoch": 29.55, "learning_rate": 5.112109366323879e-08, "loss": 3.7595, "step": 2661000 }, { "epoch": 29.56, "learning_rate": 5.110721186948893e-08, "loss": 3.775, "step": 2661500 }, { "epoch": 29.56, "learning_rate": 5.109333007573906e-08, "loss": 3.7522, "step": 2662000 }, { "epoch": 29.57, "learning_rate": 5.10794482819892e-08, "loss": 3.7524, "step": 2662500 }, { "epoch": 29.57, "learning_rate": 5.106556648823934e-08, "loss": 3.7649, "step": 2663000 }, { "epoch": 29.58, "learning_rate": 5.1051684694489485e-08, "loss": 3.7763, "step": 2663500 }, { "epoch": 29.58, "learning_rate": 5.103780290073963e-08, "loss": 3.75, "step": 2664000 }, { "epoch": 29.59, "learning_rate": 5.1023921106989756e-08, "loss": 3.7703, "step": 2664500 }, { "epoch": 29.6, "learning_rate": 5.101003931323989e-08, "loss": 3.7751, "step": 2665000 }, { "epoch": 29.6, "learning_rate": 5.0996157519490034e-08, "loss": 3.7531, "step": 2665500 }, { "epoch": 29.61, "learning_rate": 5.0982275725740176e-08, "loss": 3.7502, "step": 2666000 }, { "epoch": 29.61, "learning_rate": 5.096839393199032e-08, "loss": 3.7666, "step": 2666500 }, { "epoch": 29.62, "learning_rate": 5.095451213824046e-08, "loss": 3.7664, "step": 2667000 }, { "epoch": 29.62, "learning_rate": 5.094063034449059e-08, "loss": 3.7618, "step": 2667500 }, { "epoch": 29.63, "learning_rate": 5.0926748550740725e-08, "loss": 3.7658, "step": 2668000 }, { "epoch": 29.63, "learning_rate": 5.091286675699087e-08, "loss": 3.7573, "step": 2668500 }, { "epoch": 29.64, "learning_rate": 5.089898496324101e-08, "loss": 3.7585, "step": 2669000 }, { "epoch": 29.65, "learning_rate": 5.088510316949115e-08, "loss": 3.7745, "step": 2669500 }, { "epoch": 29.65, "learning_rate": 5.087122137574128e-08, "loss": 3.7827, "step": 2670000 }, { "epoch": 29.66, "learning_rate": 5.0857339581991424e-08, "loss": 3.7803, "step": 2670500 }, { "epoch": 29.66, "learning_rate": 5.084345778824156e-08, "loss": 3.7764, "step": 2671000 }, { "epoch": 29.67, "learning_rate": 5.08295759944917e-08, "loss": 3.7813, "step": 2671500 }, { "epoch": 29.67, "learning_rate": 5.0815694200741844e-08, "loss": 3.7388, "step": 2672000 }, { "epoch": 29.68, "learning_rate": 5.0801812406991986e-08, "loss": 3.7636, "step": 2672500 }, { "epoch": 29.68, "learning_rate": 5.0787930613242115e-08, "loss": 3.7608, "step": 2673000 }, { "epoch": 29.69, "learning_rate": 5.077404881949226e-08, "loss": 3.7752, "step": 2673500 }, { "epoch": 29.7, "learning_rate": 5.076016702574239e-08, "loss": 3.7728, "step": 2674000 }, { "epoch": 29.7, "learning_rate": 5.0746285231992535e-08, "loss": 3.7556, "step": 2674500 }, { "epoch": 29.71, "learning_rate": 5.073240343824268e-08, "loss": 3.7533, "step": 2675000 }, { "epoch": 29.71, "learning_rate": 5.071852164449282e-08, "loss": 3.79, "step": 2675500 }, { "epoch": 29.72, "learning_rate": 5.070463985074295e-08, "loss": 3.7713, "step": 2676000 }, { "epoch": 29.72, "learning_rate": 5.069075805699309e-08, "loss": 3.7606, "step": 2676500 }, { "epoch": 29.73, "learning_rate": 5.0676876263243227e-08, "loss": 3.7742, "step": 2677000 }, { "epoch": 29.73, "learning_rate": 5.066299446949337e-08, "loss": 3.762, "step": 2677500 }, { "epoch": 29.74, "learning_rate": 5.064911267574351e-08, "loss": 3.7516, "step": 2678000 }, { "epoch": 29.75, "learning_rate": 5.063523088199364e-08, "loss": 3.774, "step": 2678500 }, { "epoch": 29.75, "learning_rate": 5.062134908824378e-08, "loss": 3.7729, "step": 2679000 }, { "epoch": 29.76, "learning_rate": 5.0607467294493925e-08, "loss": 3.7587, "step": 2679500 }, { "epoch": 29.76, "learning_rate": 5.059358550074406e-08, "loss": 3.7804, "step": 2680000 }, { "epoch": 29.77, "learning_rate": 5.05797037069942e-08, "loss": 3.7754, "step": 2680500 }, { "epoch": 29.77, "learning_rate": 5.0565821913244345e-08, "loss": 3.7708, "step": 2681000 }, { "epoch": 29.78, "learning_rate": 5.0551940119494474e-08, "loss": 3.7724, "step": 2681500 }, { "epoch": 29.78, "learning_rate": 5.0538058325744616e-08, "loss": 3.7744, "step": 2682000 }, { "epoch": 29.79, "learning_rate": 5.052417653199476e-08, "loss": 3.7566, "step": 2682500 }, { "epoch": 29.8, "learning_rate": 5.0510294738244894e-08, "loss": 3.7669, "step": 2683000 }, { "epoch": 29.8, "learning_rate": 5.0496412944495036e-08, "loss": 3.7627, "step": 2683500 }, { "epoch": 29.81, "learning_rate": 5.048253115074518e-08, "loss": 3.7537, "step": 2684000 }, { "epoch": 29.81, "learning_rate": 5.046864935699531e-08, "loss": 3.7803, "step": 2684500 }, { "epoch": 29.82, "learning_rate": 5.045476756324545e-08, "loss": 3.7685, "step": 2685000 }, { "epoch": 29.82, "learning_rate": 5.044088576949559e-08, "loss": 3.7589, "step": 2685500 }, { "epoch": 29.83, "learning_rate": 5.042700397574573e-08, "loss": 3.758, "step": 2686000 }, { "epoch": 29.83, "learning_rate": 5.041312218199587e-08, "loss": 3.772, "step": 2686500 }, { "epoch": 29.84, "learning_rate": 5.0399240388246e-08, "loss": 3.7625, "step": 2687000 }, { "epoch": 29.85, "learning_rate": 5.038535859449614e-08, "loss": 3.7611, "step": 2687500 }, { "epoch": 29.85, "learning_rate": 5.0371476800746284e-08, "loss": 3.7872, "step": 2688000 }, { "epoch": 29.86, "learning_rate": 5.0357595006996426e-08, "loss": 3.7626, "step": 2688500 }, { "epoch": 29.86, "learning_rate": 5.034371321324656e-08, "loss": 3.7752, "step": 2689000 }, { "epoch": 29.87, "learning_rate": 5.0329831419496704e-08, "loss": 3.7508, "step": 2689500 }, { "epoch": 29.87, "learning_rate": 5.031594962574683e-08, "loss": 3.7767, "step": 2690000 }, { "epoch": 29.88, "learning_rate": 5.0302067831996975e-08, "loss": 3.7667, "step": 2690500 }, { "epoch": 29.88, "learning_rate": 5.028818603824712e-08, "loss": 3.759, "step": 2691000 }, { "epoch": 29.89, "learning_rate": 5.027430424449726e-08, "loss": 3.7686, "step": 2691500 }, { "epoch": 29.9, "learning_rate": 5.0260422450747395e-08, "loss": 3.7747, "step": 2692000 }, { "epoch": 29.9, "learning_rate": 5.0246540656997524e-08, "loss": 3.7592, "step": 2692500 }, { "epoch": 29.91, "learning_rate": 5.0232658863247667e-08, "loss": 3.7826, "step": 2693000 }, { "epoch": 29.91, "learning_rate": 5.021877706949781e-08, "loss": 3.7786, "step": 2693500 }, { "epoch": 29.92, "learning_rate": 5.020489527574795e-08, "loss": 3.7856, "step": 2694000 }, { "epoch": 29.92, "learning_rate": 5.0191013481998093e-08, "loss": 3.7665, "step": 2694500 }, { "epoch": 29.93, "learning_rate": 5.017713168824823e-08, "loss": 3.7503, "step": 2695000 }, { "epoch": 29.93, "learning_rate": 5.016324989449836e-08, "loss": 3.77, "step": 2695500 }, { "epoch": 29.94, "learning_rate": 5.01493681007485e-08, "loss": 3.7702, "step": 2696000 }, { "epoch": 29.95, "learning_rate": 5.013548630699864e-08, "loss": 3.7729, "step": 2696500 }, { "epoch": 29.95, "learning_rate": 5.0121604513248785e-08, "loss": 3.7511, "step": 2697000 }, { "epoch": 29.96, "learning_rate": 5.010772271949893e-08, "loss": 3.7647, "step": 2697500 }, { "epoch": 29.96, "learning_rate": 5.009384092574906e-08, "loss": 3.7785, "step": 2698000 }, { "epoch": 29.97, "learning_rate": 5.007995913199919e-08, "loss": 3.7635, "step": 2698500 }, { "epoch": 29.97, "learning_rate": 5.0066077338249334e-08, "loss": 3.7817, "step": 2699000 }, { "epoch": 29.98, "learning_rate": 5.0052195544499476e-08, "loss": 3.7431, "step": 2699500 }, { "epoch": 29.98, "learning_rate": 5.003831375074962e-08, "loss": 3.751, "step": 2700000 }, { "epoch": 29.99, "learning_rate": 5.0024431956999754e-08, "loss": 3.7688, "step": 2700500 }, { "epoch": 30.0, "learning_rate": 5.001055016324989e-08, "loss": 3.7721, "step": 2701000 }, { "epoch": 30.0, "eval_loss": 3.8352878093719482, "eval_runtime": 6.305, "eval_samples_per_second": 246.469, "step": 2701380 }, { "epoch": 30.0, "learning_rate": 4.999666836950003e-08, "loss": 3.7702, "step": 2701500 }, { "epoch": 30.01, "learning_rate": 4.998278657575017e-08, "loss": 3.7899, "step": 2702000 }, { "epoch": 30.01, "learning_rate": 4.996890478200031e-08, "loss": 3.7609, "step": 2702500 }, { "epoch": 30.02, "learning_rate": 4.9955022988250446e-08, "loss": 3.7615, "step": 2703000 }, { "epoch": 30.02, "learning_rate": 4.994114119450059e-08, "loss": 3.7746, "step": 2703500 }, { "epoch": 30.03, "learning_rate": 4.9927259400750724e-08, "loss": 3.7662, "step": 2704000 }, { "epoch": 30.03, "learning_rate": 4.991337760700086e-08, "loss": 3.7601, "step": 2704500 }, { "epoch": 30.04, "learning_rate": 4.9899495813251e-08, "loss": 3.7351, "step": 2705000 }, { "epoch": 30.05, "learning_rate": 4.9885614019501144e-08, "loss": 3.77, "step": 2705500 }, { "epoch": 30.05, "learning_rate": 4.987173222575128e-08, "loss": 3.7664, "step": 2706000 }, { "epoch": 30.06, "learning_rate": 4.985785043200142e-08, "loss": 3.7691, "step": 2706500 }, { "epoch": 30.06, "learning_rate": 4.984396863825156e-08, "loss": 3.7685, "step": 2707000 }, { "epoch": 30.07, "learning_rate": 4.983008684450169e-08, "loss": 3.7541, "step": 2707500 }, { "epoch": 30.07, "learning_rate": 4.9816205050751835e-08, "loss": 3.7622, "step": 2708000 }, { "epoch": 30.08, "learning_rate": 4.980232325700198e-08, "loss": 3.7779, "step": 2708500 }, { "epoch": 30.08, "learning_rate": 4.978844146325211e-08, "loss": 3.7647, "step": 2709000 }, { "epoch": 30.09, "learning_rate": 4.9774559669502255e-08, "loss": 3.7736, "step": 2709500 }, { "epoch": 30.1, "learning_rate": 4.976067787575239e-08, "loss": 3.7728, "step": 2710000 }, { "epoch": 30.1, "learning_rate": 4.974679608200253e-08, "loss": 3.7479, "step": 2710500 }, { "epoch": 30.11, "learning_rate": 4.973291428825267e-08, "loss": 3.7517, "step": 2711000 }, { "epoch": 30.11, "learning_rate": 4.9719032494502805e-08, "loss": 3.7607, "step": 2711500 }, { "epoch": 30.12, "learning_rate": 4.970515070075295e-08, "loss": 3.7558, "step": 2712000 }, { "epoch": 30.12, "learning_rate": 4.969126890700309e-08, "loss": 3.7703, "step": 2712500 }, { "epoch": 30.13, "learning_rate": 4.9677387113253225e-08, "loss": 3.7723, "step": 2713000 }, { "epoch": 30.13, "learning_rate": 4.966350531950336e-08, "loss": 3.7644, "step": 2713500 }, { "epoch": 30.14, "learning_rate": 4.96496235257535e-08, "loss": 3.7544, "step": 2714000 }, { "epoch": 30.15, "learning_rate": 4.963574173200364e-08, "loss": 3.7594, "step": 2714500 }, { "epoch": 30.15, "learning_rate": 4.962185993825378e-08, "loss": 3.769, "step": 2715000 }, { "epoch": 30.16, "learning_rate": 4.960797814450392e-08, "loss": 3.7682, "step": 2715500 }, { "epoch": 30.16, "learning_rate": 4.959409635075406e-08, "loss": 3.7493, "step": 2716000 }, { "epoch": 30.17, "learning_rate": 4.9580214557004194e-08, "loss": 3.761, "step": 2716500 }, { "epoch": 30.17, "learning_rate": 4.9566332763254336e-08, "loss": 3.7622, "step": 2717000 }, { "epoch": 30.18, "learning_rate": 4.955245096950447e-08, "loss": 3.7678, "step": 2717500 }, { "epoch": 30.18, "learning_rate": 4.9538569175754614e-08, "loss": 3.7566, "step": 2718000 }, { "epoch": 30.19, "learning_rate": 4.952468738200475e-08, "loss": 3.773, "step": 2718500 }, { "epoch": 30.2, "learning_rate": 4.951080558825489e-08, "loss": 3.7724, "step": 2719000 }, { "epoch": 30.2, "learning_rate": 4.949692379450503e-08, "loss": 3.7571, "step": 2719500 }, { "epoch": 30.21, "learning_rate": 4.9483042000755164e-08, "loss": 3.787, "step": 2720000 }, { "epoch": 30.21, "learning_rate": 4.9469160207005306e-08, "loss": 3.7637, "step": 2720500 }, { "epoch": 30.22, "learning_rate": 4.945527841325545e-08, "loss": 3.7551, "step": 2721000 }, { "epoch": 30.22, "learning_rate": 4.9441396619505584e-08, "loss": 3.7619, "step": 2721500 }, { "epoch": 30.23, "learning_rate": 4.9427514825755726e-08, "loss": 3.7712, "step": 2722000 }, { "epoch": 30.23, "learning_rate": 4.941363303200586e-08, "loss": 3.751, "step": 2722500 }, { "epoch": 30.24, "learning_rate": 4.9399751238256e-08, "loss": 3.7662, "step": 2723000 }, { "epoch": 30.25, "learning_rate": 4.938586944450614e-08, "loss": 3.7664, "step": 2723500 }, { "epoch": 30.25, "learning_rate": 4.937198765075628e-08, "loss": 3.7866, "step": 2724000 }, { "epoch": 30.26, "learning_rate": 4.935810585700642e-08, "loss": 3.771, "step": 2724500 }, { "epoch": 30.26, "learning_rate": 4.934422406325656e-08, "loss": 3.7731, "step": 2725000 }, { "epoch": 30.27, "learning_rate": 4.933034226950669e-08, "loss": 3.7862, "step": 2725500 }, { "epoch": 30.27, "learning_rate": 4.931646047575683e-08, "loss": 3.7815, "step": 2726000 }, { "epoch": 30.28, "learning_rate": 4.930257868200697e-08, "loss": 3.7546, "step": 2726500 }, { "epoch": 30.28, "learning_rate": 4.928869688825711e-08, "loss": 3.7842, "step": 2727000 }, { "epoch": 30.29, "learning_rate": 4.927481509450725e-08, "loss": 3.7531, "step": 2727500 }, { "epoch": 30.3, "learning_rate": 4.9260933300757393e-08, "loss": 3.7476, "step": 2728000 }, { "epoch": 30.3, "learning_rate": 4.924705150700752e-08, "loss": 3.7653, "step": 2728500 }, { "epoch": 30.31, "learning_rate": 4.9233169713257665e-08, "loss": 3.7563, "step": 2729000 }, { "epoch": 30.31, "learning_rate": 4.921928791950781e-08, "loss": 3.7709, "step": 2729500 }, { "epoch": 30.32, "learning_rate": 4.920540612575794e-08, "loss": 3.7798, "step": 2730000 }, { "epoch": 30.32, "learning_rate": 4.9191524332008085e-08, "loss": 3.7673, "step": 2730500 }, { "epoch": 30.33, "learning_rate": 4.917764253825822e-08, "loss": 3.7492, "step": 2731000 }, { "epoch": 30.33, "learning_rate": 4.9163760744508356e-08, "loss": 3.7526, "step": 2731500 }, { "epoch": 30.34, "learning_rate": 4.91498789507585e-08, "loss": 3.763, "step": 2732000 }, { "epoch": 30.35, "learning_rate": 4.9135997157008634e-08, "loss": 3.7607, "step": 2732500 }, { "epoch": 30.35, "learning_rate": 4.9122115363258776e-08, "loss": 3.7733, "step": 2733000 }, { "epoch": 30.36, "learning_rate": 4.910823356950892e-08, "loss": 3.7695, "step": 2733500 }, { "epoch": 30.36, "learning_rate": 4.9094351775759054e-08, "loss": 3.7745, "step": 2734000 }, { "epoch": 30.37, "learning_rate": 4.908046998200919e-08, "loss": 3.7843, "step": 2734500 }, { "epoch": 30.37, "learning_rate": 4.906658818825933e-08, "loss": 3.7719, "step": 2735000 }, { "epoch": 30.38, "learning_rate": 4.905270639450947e-08, "loss": 3.7753, "step": 2735500 }, { "epoch": 30.38, "learning_rate": 4.903882460075961e-08, "loss": 3.7879, "step": 2736000 }, { "epoch": 30.39, "learning_rate": 4.902494280700975e-08, "loss": 3.7513, "step": 2736500 }, { "epoch": 30.4, "learning_rate": 4.901106101325989e-08, "loss": 3.7607, "step": 2737000 }, { "epoch": 30.4, "learning_rate": 4.8997179219510024e-08, "loss": 3.7629, "step": 2737500 }, { "epoch": 30.41, "learning_rate": 4.8983297425760166e-08, "loss": 3.7629, "step": 2738000 }, { "epoch": 30.41, "learning_rate": 4.89694156320103e-08, "loss": 3.7572, "step": 2738500 }, { "epoch": 30.42, "learning_rate": 4.8955533838260444e-08, "loss": 3.7711, "step": 2739000 }, { "epoch": 30.42, "learning_rate": 4.894165204451058e-08, "loss": 3.7762, "step": 2739500 }, { "epoch": 30.43, "learning_rate": 4.892777025076072e-08, "loss": 3.7607, "step": 2740000 }, { "epoch": 30.43, "learning_rate": 4.891388845701086e-08, "loss": 3.7783, "step": 2740500 }, { "epoch": 30.44, "learning_rate": 4.890000666326099e-08, "loss": 3.7566, "step": 2741000 }, { "epoch": 30.45, "learning_rate": 4.8886124869511135e-08, "loss": 3.7483, "step": 2741500 }, { "epoch": 30.45, "learning_rate": 4.887224307576128e-08, "loss": 3.765, "step": 2742000 }, { "epoch": 30.46, "learning_rate": 4.885836128201141e-08, "loss": 3.7673, "step": 2742500 }, { "epoch": 30.46, "learning_rate": 4.8844479488261556e-08, "loss": 3.7414, "step": 2743000 }, { "epoch": 30.47, "learning_rate": 4.883059769451169e-08, "loss": 3.7608, "step": 2743500 }, { "epoch": 30.47, "learning_rate": 4.881671590076183e-08, "loss": 3.7728, "step": 2744000 }, { "epoch": 30.48, "learning_rate": 4.880283410701197e-08, "loss": 3.7762, "step": 2744500 }, { "epoch": 30.48, "learning_rate": 4.878895231326211e-08, "loss": 3.7671, "step": 2745000 }, { "epoch": 30.49, "learning_rate": 4.877507051951225e-08, "loss": 3.7777, "step": 2745500 }, { "epoch": 30.5, "learning_rate": 4.876118872576239e-08, "loss": 3.7601, "step": 2746000 }, { "epoch": 30.5, "learning_rate": 4.8747306932012525e-08, "loss": 3.7615, "step": 2746500 }, { "epoch": 30.51, "learning_rate": 4.873342513826266e-08, "loss": 3.7783, "step": 2747000 }, { "epoch": 30.51, "learning_rate": 4.87195433445128e-08, "loss": 3.7406, "step": 2747500 }, { "epoch": 30.52, "learning_rate": 4.870566155076294e-08, "loss": 3.7574, "step": 2748000 }, { "epoch": 30.52, "learning_rate": 4.869177975701308e-08, "loss": 3.7689, "step": 2748500 }, { "epoch": 30.53, "learning_rate": 4.867789796326322e-08, "loss": 3.7399, "step": 2749000 }, { "epoch": 30.53, "learning_rate": 4.866401616951336e-08, "loss": 3.7592, "step": 2749500 }, { "epoch": 30.54, "learning_rate": 4.8650134375763494e-08, "loss": 3.7522, "step": 2750000 }, { "epoch": 30.55, "learning_rate": 4.8636252582013637e-08, "loss": 3.7614, "step": 2750500 }, { "epoch": 30.55, "learning_rate": 4.862237078826377e-08, "loss": 3.7633, "step": 2751000 }, { "epoch": 30.56, "learning_rate": 4.8608488994513914e-08, "loss": 3.7892, "step": 2751500 }, { "epoch": 30.56, "learning_rate": 4.859460720076406e-08, "loss": 3.753, "step": 2752000 }, { "epoch": 30.57, "learning_rate": 4.858072540701419e-08, "loss": 3.7537, "step": 2752500 }, { "epoch": 30.57, "learning_rate": 4.856684361326433e-08, "loss": 3.7605, "step": 2753000 }, { "epoch": 30.58, "learning_rate": 4.855296181951447e-08, "loss": 3.7797, "step": 2753500 }, { "epoch": 30.58, "learning_rate": 4.8539080025764606e-08, "loss": 3.7838, "step": 2754000 }, { "epoch": 30.59, "learning_rate": 4.852519823201475e-08, "loss": 3.7578, "step": 2754500 }, { "epoch": 30.6, "learning_rate": 4.8511316438264884e-08, "loss": 3.7689, "step": 2755000 }, { "epoch": 30.6, "learning_rate": 4.8497434644515026e-08, "loss": 3.7545, "step": 2755500 }, { "epoch": 30.61, "learning_rate": 4.848355285076516e-08, "loss": 3.7466, "step": 2756000 }, { "epoch": 30.61, "learning_rate": 4.84696710570153e-08, "loss": 3.7481, "step": 2756500 }, { "epoch": 30.62, "learning_rate": 4.845578926326544e-08, "loss": 3.7532, "step": 2757000 }, { "epoch": 30.62, "learning_rate": 4.844190746951558e-08, "loss": 3.764, "step": 2757500 }, { "epoch": 30.63, "learning_rate": 4.842802567576572e-08, "loss": 3.7788, "step": 2758000 }, { "epoch": 30.63, "learning_rate": 4.841414388201586e-08, "loss": 3.7984, "step": 2758500 }, { "epoch": 30.64, "learning_rate": 4.8400262088265995e-08, "loss": 3.7503, "step": 2759000 }, { "epoch": 30.65, "learning_rate": 4.838638029451613e-08, "loss": 3.7653, "step": 2759500 }, { "epoch": 30.65, "learning_rate": 4.8372498500766273e-08, "loss": 3.745, "step": 2760000 }, { "epoch": 30.66, "learning_rate": 4.8358616707016416e-08, "loss": 3.7655, "step": 2760500 }, { "epoch": 30.66, "learning_rate": 4.834473491326655e-08, "loss": 3.7547, "step": 2761000 }, { "epoch": 30.67, "learning_rate": 4.833085311951669e-08, "loss": 3.7802, "step": 2761500 }, { "epoch": 30.67, "learning_rate": 4.831697132576682e-08, "loss": 3.7676, "step": 2762000 }, { "epoch": 30.68, "learning_rate": 4.8303089532016965e-08, "loss": 3.7852, "step": 2762500 }, { "epoch": 30.68, "learning_rate": 4.828920773826711e-08, "loss": 3.7764, "step": 2763000 }, { "epoch": 30.69, "learning_rate": 4.827532594451724e-08, "loss": 3.7755, "step": 2763500 }, { "epoch": 30.7, "learning_rate": 4.8261444150767385e-08, "loss": 3.785, "step": 2764000 }, { "epoch": 30.7, "learning_rate": 4.824756235701752e-08, "loss": 3.7597, "step": 2764500 }, { "epoch": 30.71, "learning_rate": 4.8233680563267656e-08, "loss": 3.7547, "step": 2765000 }, { "epoch": 30.71, "learning_rate": 4.82197987695178e-08, "loss": 3.7777, "step": 2765500 }, { "epoch": 30.72, "learning_rate": 4.820591697576794e-08, "loss": 3.7606, "step": 2766000 }, { "epoch": 30.72, "learning_rate": 4.8192035182018076e-08, "loss": 3.7601, "step": 2766500 }, { "epoch": 30.73, "learning_rate": 4.817815338826822e-08, "loss": 3.7643, "step": 2767000 }, { "epoch": 30.73, "learning_rate": 4.8164271594518354e-08, "loss": 3.7669, "step": 2767500 }, { "epoch": 30.74, "learning_rate": 4.815038980076849e-08, "loss": 3.7455, "step": 2768000 }, { "epoch": 30.75, "learning_rate": 4.813650800701863e-08, "loss": 3.7513, "step": 2768500 }, { "epoch": 30.75, "learning_rate": 4.812262621326877e-08, "loss": 3.7447, "step": 2769000 }, { "epoch": 30.76, "learning_rate": 4.810874441951891e-08, "loss": 3.7422, "step": 2769500 }, { "epoch": 30.76, "learning_rate": 4.809486262576905e-08, "loss": 3.7514, "step": 2770000 }, { "epoch": 30.77, "learning_rate": 4.808098083201919e-08, "loss": 3.7671, "step": 2770500 }, { "epoch": 30.77, "learning_rate": 4.8067099038269324e-08, "loss": 3.7531, "step": 2771000 }, { "epoch": 30.78, "learning_rate": 4.8053217244519466e-08, "loss": 3.7625, "step": 2771500 }, { "epoch": 30.78, "learning_rate": 4.80393354507696e-08, "loss": 3.7783, "step": 2772000 }, { "epoch": 30.79, "learning_rate": 4.8025453657019744e-08, "loss": 3.7741, "step": 2772500 }, { "epoch": 30.8, "learning_rate": 4.8011571863269886e-08, "loss": 3.7691, "step": 2773000 }, { "epoch": 30.8, "learning_rate": 4.799769006952002e-08, "loss": 3.7629, "step": 2773500 }, { "epoch": 30.81, "learning_rate": 4.798380827577016e-08, "loss": 3.7543, "step": 2774000 }, { "epoch": 30.81, "learning_rate": 4.79699264820203e-08, "loss": 3.738, "step": 2774500 }, { "epoch": 30.82, "learning_rate": 4.7956044688270435e-08, "loss": 3.752, "step": 2775000 }, { "epoch": 30.82, "learning_rate": 4.794216289452058e-08, "loss": 3.7767, "step": 2775500 }, { "epoch": 30.83, "learning_rate": 4.792828110077072e-08, "loss": 3.7817, "step": 2776000 }, { "epoch": 30.83, "learning_rate": 4.7914399307020856e-08, "loss": 3.7611, "step": 2776500 }, { "epoch": 30.84, "learning_rate": 4.790051751327099e-08, "loss": 3.7582, "step": 2777000 }, { "epoch": 30.85, "learning_rate": 4.788663571952113e-08, "loss": 3.7605, "step": 2777500 }, { "epoch": 30.85, "learning_rate": 4.787275392577127e-08, "loss": 3.7772, "step": 2778000 }, { "epoch": 30.86, "learning_rate": 4.785887213202141e-08, "loss": 3.7713, "step": 2778500 }, { "epoch": 30.86, "learning_rate": 4.784499033827155e-08, "loss": 3.7601, "step": 2779000 }, { "epoch": 30.87, "learning_rate": 4.783110854452169e-08, "loss": 3.7735, "step": 2779500 }, { "epoch": 30.87, "learning_rate": 4.7817226750771825e-08, "loss": 3.7685, "step": 2780000 }, { "epoch": 30.88, "learning_rate": 4.780334495702196e-08, "loss": 3.7622, "step": 2780500 }, { "epoch": 30.88, "learning_rate": 4.77894631632721e-08, "loss": 3.7698, "step": 2781000 }, { "epoch": 30.89, "learning_rate": 4.7775581369522245e-08, "loss": 3.774, "step": 2781500 }, { "epoch": 30.9, "learning_rate": 4.776169957577238e-08, "loss": 3.7798, "step": 2782000 }, { "epoch": 30.9, "learning_rate": 4.774781778202252e-08, "loss": 3.7505, "step": 2782500 }, { "epoch": 30.91, "learning_rate": 4.773393598827266e-08, "loss": 3.7601, "step": 2783000 }, { "epoch": 30.91, "learning_rate": 4.7720054194522794e-08, "loss": 3.7691, "step": 2783500 }, { "epoch": 30.92, "learning_rate": 4.7706172400772937e-08, "loss": 3.7786, "step": 2784000 }, { "epoch": 30.92, "learning_rate": 4.769229060702307e-08, "loss": 3.7866, "step": 2784500 }, { "epoch": 30.93, "learning_rate": 4.7678408813273215e-08, "loss": 3.7668, "step": 2785000 }, { "epoch": 30.93, "learning_rate": 4.766452701952336e-08, "loss": 3.7619, "step": 2785500 }, { "epoch": 30.94, "learning_rate": 4.765064522577349e-08, "loss": 3.7586, "step": 2786000 }, { "epoch": 30.95, "learning_rate": 4.763676343202363e-08, "loss": 3.7724, "step": 2786500 }, { "epoch": 30.95, "learning_rate": 4.762288163827377e-08, "loss": 3.7729, "step": 2787000 }, { "epoch": 30.96, "learning_rate": 4.7608999844523906e-08, "loss": 3.762, "step": 2787500 }, { "epoch": 30.96, "learning_rate": 4.759511805077405e-08, "loss": 3.7745, "step": 2788000 }, { "epoch": 30.97, "learning_rate": 4.758123625702419e-08, "loss": 3.7544, "step": 2788500 }, { "epoch": 30.97, "learning_rate": 4.7567354463274326e-08, "loss": 3.7569, "step": 2789000 }, { "epoch": 30.98, "learning_rate": 4.755347266952446e-08, "loss": 3.7644, "step": 2789500 }, { "epoch": 30.98, "learning_rate": 4.7539590875774604e-08, "loss": 3.77, "step": 2790000 }, { "epoch": 30.99, "learning_rate": 4.752570908202474e-08, "loss": 3.7653, "step": 2790500 }, { "epoch": 31.0, "learning_rate": 4.751182728827488e-08, "loss": 3.7616, "step": 2791000 }, { "epoch": 31.0, "eval_loss": 3.8338966369628906, "eval_runtime": 6.3047, "eval_samples_per_second": 246.482, "step": 2791426 }, { "epoch": 31.0, "learning_rate": 4.749794549452502e-08, "loss": 3.7581, "step": 2791500 }, { "epoch": 31.01, "learning_rate": 4.748406370077515e-08, "loss": 3.7536, "step": 2792000 }, { "epoch": 31.01, "learning_rate": 4.7470181907025296e-08, "loss": 3.7805, "step": 2792500 }, { "epoch": 31.02, "learning_rate": 4.745630011327543e-08, "loss": 3.7684, "step": 2793000 }, { "epoch": 31.02, "learning_rate": 4.7442418319525573e-08, "loss": 3.7698, "step": 2793500 }, { "epoch": 31.03, "learning_rate": 4.7428536525775716e-08, "loss": 3.7578, "step": 2794000 }, { "epoch": 31.03, "learning_rate": 4.741465473202585e-08, "loss": 3.7723, "step": 2794500 }, { "epoch": 31.04, "learning_rate": 4.740077293827599e-08, "loss": 3.7905, "step": 2795000 }, { "epoch": 31.05, "learning_rate": 4.738689114452613e-08, "loss": 3.7624, "step": 2795500 }, { "epoch": 31.05, "learning_rate": 4.7373009350776265e-08, "loss": 3.7682, "step": 2796000 }, { "epoch": 31.06, "learning_rate": 4.735912755702641e-08, "loss": 3.7636, "step": 2796500 }, { "epoch": 31.06, "learning_rate": 4.734524576327655e-08, "loss": 3.7786, "step": 2797000 }, { "epoch": 31.07, "learning_rate": 4.7331363969526685e-08, "loss": 3.7601, "step": 2797500 }, { "epoch": 31.07, "learning_rate": 4.731748217577682e-08, "loss": 3.7745, "step": 2798000 }, { "epoch": 31.08, "learning_rate": 4.7303600382026956e-08, "loss": 3.7621, "step": 2798500 }, { "epoch": 31.08, "learning_rate": 4.72897185882771e-08, "loss": 3.783, "step": 2799000 }, { "epoch": 31.09, "learning_rate": 4.727583679452724e-08, "loss": 3.7673, "step": 2799500 }, { "epoch": 31.1, "learning_rate": 4.7261955000777377e-08, "loss": 3.7703, "step": 2800000 }, { "epoch": 31.1, "learning_rate": 4.724807320702752e-08, "loss": 3.7615, "step": 2800500 }, { "epoch": 31.11, "learning_rate": 4.7234191413277654e-08, "loss": 3.751, "step": 2801000 }, { "epoch": 31.11, "learning_rate": 4.722030961952779e-08, "loss": 3.7532, "step": 2801500 }, { "epoch": 31.12, "learning_rate": 4.720642782577793e-08, "loss": 3.7535, "step": 2802000 }, { "epoch": 31.12, "learning_rate": 4.7192546032028075e-08, "loss": 3.7627, "step": 2802500 }, { "epoch": 31.13, "learning_rate": 4.717866423827821e-08, "loss": 3.755, "step": 2803000 }, { "epoch": 31.13, "learning_rate": 4.716478244452835e-08, "loss": 3.7459, "step": 2803500 }, { "epoch": 31.14, "learning_rate": 4.715090065077849e-08, "loss": 3.755, "step": 2804000 }, { "epoch": 31.15, "learning_rate": 4.7137018857028624e-08, "loss": 3.7662, "step": 2804500 }, { "epoch": 31.15, "learning_rate": 4.7123137063278766e-08, "loss": 3.7722, "step": 2805000 }, { "epoch": 31.16, "learning_rate": 4.710925526952891e-08, "loss": 3.767, "step": 2805500 }, { "epoch": 31.16, "learning_rate": 4.7095373475779044e-08, "loss": 3.7613, "step": 2806000 }, { "epoch": 31.17, "learning_rate": 4.7081491682029186e-08, "loss": 3.7614, "step": 2806500 }, { "epoch": 31.17, "learning_rate": 4.706760988827932e-08, "loss": 3.7504, "step": 2807000 }, { "epoch": 31.18, "learning_rate": 4.705372809452946e-08, "loss": 3.7697, "step": 2807500 }, { "epoch": 31.18, "learning_rate": 4.70398463007796e-08, "loss": 3.7538, "step": 2808000 }, { "epoch": 31.19, "learning_rate": 4.7025964507029735e-08, "loss": 3.7673, "step": 2808500 }, { "epoch": 31.2, "learning_rate": 4.701208271327988e-08, "loss": 3.7548, "step": 2809000 }, { "epoch": 31.2, "learning_rate": 4.699820091953002e-08, "loss": 3.7716, "step": 2809500 }, { "epoch": 31.21, "learning_rate": 4.6984319125780156e-08, "loss": 3.7516, "step": 2810000 }, { "epoch": 31.21, "learning_rate": 4.697043733203029e-08, "loss": 3.7762, "step": 2810500 }, { "epoch": 31.22, "learning_rate": 4.6956555538280434e-08, "loss": 3.7706, "step": 2811000 }, { "epoch": 31.22, "learning_rate": 4.694267374453057e-08, "loss": 3.778, "step": 2811500 }, { "epoch": 31.23, "learning_rate": 4.692879195078071e-08, "loss": 3.7708, "step": 2812000 }, { "epoch": 31.23, "learning_rate": 4.6914910157030854e-08, "loss": 3.7671, "step": 2812500 }, { "epoch": 31.24, "learning_rate": 4.690102836328099e-08, "loss": 3.7641, "step": 2813000 }, { "epoch": 31.25, "learning_rate": 4.6887146569531125e-08, "loss": 3.7745, "step": 2813500 }, { "epoch": 31.25, "learning_rate": 4.687326477578126e-08, "loss": 3.7439, "step": 2814000 }, { "epoch": 31.26, "learning_rate": 4.68593829820314e-08, "loss": 3.7455, "step": 2814500 }, { "epoch": 31.26, "learning_rate": 4.6845501188281545e-08, "loss": 3.7705, "step": 2815000 }, { "epoch": 31.27, "learning_rate": 4.683161939453168e-08, "loss": 3.7672, "step": 2815500 }, { "epoch": 31.27, "learning_rate": 4.681773760078182e-08, "loss": 3.7728, "step": 2816000 }, { "epoch": 31.28, "learning_rate": 4.680385580703196e-08, "loss": 3.7733, "step": 2816500 }, { "epoch": 31.28, "learning_rate": 4.6789974013282094e-08, "loss": 3.7375, "step": 2817000 }, { "epoch": 31.29, "learning_rate": 4.677609221953224e-08, "loss": 3.7623, "step": 2817500 }, { "epoch": 31.3, "learning_rate": 4.676221042578238e-08, "loss": 3.7534, "step": 2818000 }, { "epoch": 31.3, "learning_rate": 4.6748328632032515e-08, "loss": 3.7799, "step": 2818500 }, { "epoch": 31.31, "learning_rate": 4.673444683828266e-08, "loss": 3.748, "step": 2819000 }, { "epoch": 31.31, "learning_rate": 4.672056504453279e-08, "loss": 3.7637, "step": 2819500 }, { "epoch": 31.32, "learning_rate": 4.670668325078293e-08, "loss": 3.7581, "step": 2820000 }, { "epoch": 31.32, "learning_rate": 4.669280145703307e-08, "loss": 3.7697, "step": 2820500 }, { "epoch": 31.33, "learning_rate": 4.6678919663283206e-08, "loss": 3.7616, "step": 2821000 }, { "epoch": 31.33, "learning_rate": 4.666503786953335e-08, "loss": 3.7594, "step": 2821500 }, { "epoch": 31.34, "learning_rate": 4.665115607578349e-08, "loss": 3.7485, "step": 2822000 }, { "epoch": 31.35, "learning_rate": 4.663727428203362e-08, "loss": 3.7614, "step": 2822500 }, { "epoch": 31.35, "learning_rate": 4.662339248828376e-08, "loss": 3.7604, "step": 2823000 }, { "epoch": 31.36, "learning_rate": 4.6609510694533904e-08, "loss": 3.7758, "step": 2823500 }, { "epoch": 31.36, "learning_rate": 4.659562890078404e-08, "loss": 3.7505, "step": 2824000 }, { "epoch": 31.37, "learning_rate": 4.658174710703418e-08, "loss": 3.7504, "step": 2824500 }, { "epoch": 31.37, "learning_rate": 4.6567865313284324e-08, "loss": 3.7532, "step": 2825000 }, { "epoch": 31.38, "learning_rate": 4.6553983519534453e-08, "loss": 3.7561, "step": 2825500 }, { "epoch": 31.38, "learning_rate": 4.6540101725784596e-08, "loss": 3.7645, "step": 2826000 }, { "epoch": 31.39, "learning_rate": 4.652621993203474e-08, "loss": 3.7773, "step": 2826500 }, { "epoch": 31.4, "learning_rate": 4.6512338138284874e-08, "loss": 3.769, "step": 2827000 }, { "epoch": 31.4, "learning_rate": 4.6498456344535016e-08, "loss": 3.7582, "step": 2827500 }, { "epoch": 31.41, "learning_rate": 4.648457455078515e-08, "loss": 3.7702, "step": 2828000 }, { "epoch": 31.41, "learning_rate": 4.647069275703529e-08, "loss": 3.7762, "step": 2828500 }, { "epoch": 31.42, "learning_rate": 4.645681096328543e-08, "loss": 3.7649, "step": 2829000 }, { "epoch": 31.42, "learning_rate": 4.6442929169535565e-08, "loss": 3.7741, "step": 2829500 }, { "epoch": 31.43, "learning_rate": 4.642904737578571e-08, "loss": 3.781, "step": 2830000 }, { "epoch": 31.43, "learning_rate": 4.641516558203585e-08, "loss": 3.7713, "step": 2830500 }, { "epoch": 31.44, "learning_rate": 4.6401283788285985e-08, "loss": 3.763, "step": 2831000 }, { "epoch": 31.45, "learning_rate": 4.638740199453612e-08, "loss": 3.7363, "step": 2831500 }, { "epoch": 31.45, "learning_rate": 4.637352020078626e-08, "loss": 3.773, "step": 2832000 }, { "epoch": 31.46, "learning_rate": 4.63596384070364e-08, "loss": 3.761, "step": 2832500 }, { "epoch": 31.46, "learning_rate": 4.634575661328654e-08, "loss": 3.7713, "step": 2833000 }, { "epoch": 31.47, "learning_rate": 4.633187481953668e-08, "loss": 3.7553, "step": 2833500 }, { "epoch": 31.47, "learning_rate": 4.631799302578682e-08, "loss": 3.7568, "step": 2834000 }, { "epoch": 31.48, "learning_rate": 4.6304111232036955e-08, "loss": 3.7464, "step": 2834500 }, { "epoch": 31.48, "learning_rate": 4.62902294382871e-08, "loss": 3.7585, "step": 2835000 }, { "epoch": 31.49, "learning_rate": 4.627634764453723e-08, "loss": 3.751, "step": 2835500 }, { "epoch": 31.5, "learning_rate": 4.6262465850787375e-08, "loss": 3.7632, "step": 2836000 }, { "epoch": 31.5, "learning_rate": 4.624858405703751e-08, "loss": 3.7599, "step": 2836500 }, { "epoch": 31.51, "learning_rate": 4.623470226328765e-08, "loss": 3.7642, "step": 2837000 }, { "epoch": 31.51, "learning_rate": 4.622082046953779e-08, "loss": 3.7762, "step": 2837500 }, { "epoch": 31.52, "learning_rate": 4.6206938675787924e-08, "loss": 3.7565, "step": 2838000 }, { "epoch": 31.52, "learning_rate": 4.6193056882038066e-08, "loss": 3.7649, "step": 2838500 }, { "epoch": 31.53, "learning_rate": 4.617917508828821e-08, "loss": 3.7484, "step": 2839000 }, { "epoch": 31.53, "learning_rate": 4.6165293294538344e-08, "loss": 3.7614, "step": 2839500 }, { "epoch": 31.54, "learning_rate": 4.6151411500788486e-08, "loss": 3.7485, "step": 2840000 }, { "epoch": 31.54, "learning_rate": 4.613752970703862e-08, "loss": 3.7689, "step": 2840500 }, { "epoch": 31.55, "learning_rate": 4.612364791328876e-08, "loss": 3.7763, "step": 2841000 }, { "epoch": 31.56, "learning_rate": 4.61097661195389e-08, "loss": 3.7628, "step": 2841500 }, { "epoch": 31.56, "learning_rate": 4.609588432578904e-08, "loss": 3.7629, "step": 2842000 }, { "epoch": 31.57, "learning_rate": 4.608200253203918e-08, "loss": 3.7501, "step": 2842500 }, { "epoch": 31.57, "learning_rate": 4.606812073828932e-08, "loss": 3.7719, "step": 2843000 }, { "epoch": 31.58, "learning_rate": 4.6054238944539456e-08, "loss": 3.7434, "step": 2843500 }, { "epoch": 31.58, "learning_rate": 4.604035715078959e-08, "loss": 3.7637, "step": 2844000 }, { "epoch": 31.59, "learning_rate": 4.6026475357039734e-08, "loss": 3.761, "step": 2844500 }, { "epoch": 31.59, "learning_rate": 4.601259356328987e-08, "loss": 3.7501, "step": 2845000 }, { "epoch": 31.6, "learning_rate": 4.599871176954001e-08, "loss": 3.7439, "step": 2845500 }, { "epoch": 31.61, "learning_rate": 4.5984829975790154e-08, "loss": 3.7883, "step": 2846000 }, { "epoch": 31.61, "learning_rate": 4.597094818204029e-08, "loss": 3.7409, "step": 2846500 }, { "epoch": 31.62, "learning_rate": 4.5957066388290425e-08, "loss": 3.76, "step": 2847000 }, { "epoch": 31.62, "learning_rate": 4.594318459454057e-08, "loss": 3.7451, "step": 2847500 }, { "epoch": 31.63, "learning_rate": 4.59293028007907e-08, "loss": 3.7725, "step": 2848000 }, { "epoch": 31.63, "learning_rate": 4.5915421007040845e-08, "loss": 3.7908, "step": 2848500 }, { "epoch": 31.64, "learning_rate": 4.590153921329099e-08, "loss": 3.7723, "step": 2849000 }, { "epoch": 31.64, "learning_rate": 4.588765741954112e-08, "loss": 3.7503, "step": 2849500 }, { "epoch": 31.65, "learning_rate": 4.587377562579126e-08, "loss": 3.7759, "step": 2850000 }, { "epoch": 31.66, "learning_rate": 4.5859893832041395e-08, "loss": 3.7487, "step": 2850500 }, { "epoch": 31.66, "learning_rate": 4.584601203829154e-08, "loss": 3.7704, "step": 2851000 }, { "epoch": 31.67, "learning_rate": 4.583213024454168e-08, "loss": 3.7585, "step": 2851500 }, { "epoch": 31.67, "learning_rate": 4.5818248450791815e-08, "loss": 3.7587, "step": 2852000 }, { "epoch": 31.68, "learning_rate": 4.580436665704196e-08, "loss": 3.7733, "step": 2852500 }, { "epoch": 31.68, "learning_rate": 4.579048486329209e-08, "loss": 3.77, "step": 2853000 }, { "epoch": 31.69, "learning_rate": 4.577660306954223e-08, "loss": 3.7599, "step": 2853500 }, { "epoch": 31.69, "learning_rate": 4.576272127579237e-08, "loss": 3.7651, "step": 2854000 }, { "epoch": 31.7, "learning_rate": 4.574883948204251e-08, "loss": 3.7514, "step": 2854500 }, { "epoch": 31.71, "learning_rate": 4.573495768829265e-08, "loss": 3.7541, "step": 2855000 }, { "epoch": 31.71, "learning_rate": 4.572107589454279e-08, "loss": 3.7595, "step": 2855500 }, { "epoch": 31.72, "learning_rate": 4.5707194100792926e-08, "loss": 3.7714, "step": 2856000 }, { "epoch": 31.72, "learning_rate": 4.569331230704306e-08, "loss": 3.7711, "step": 2856500 }, { "epoch": 31.73, "learning_rate": 4.5679430513293204e-08, "loss": 3.7599, "step": 2857000 }, { "epoch": 31.73, "learning_rate": 4.566554871954334e-08, "loss": 3.7643, "step": 2857500 }, { "epoch": 31.74, "learning_rate": 4.565166692579348e-08, "loss": 3.7623, "step": 2858000 }, { "epoch": 31.74, "learning_rate": 4.5637785132043624e-08, "loss": 3.7573, "step": 2858500 }, { "epoch": 31.75, "learning_rate": 4.5623903338293753e-08, "loss": 3.7769, "step": 2859000 }, { "epoch": 31.76, "learning_rate": 4.5610021544543896e-08, "loss": 3.7722, "step": 2859500 }, { "epoch": 31.76, "learning_rate": 4.559613975079404e-08, "loss": 3.7551, "step": 2860000 }, { "epoch": 31.77, "learning_rate": 4.5582257957044174e-08, "loss": 3.7694, "step": 2860500 }, { "epoch": 31.77, "learning_rate": 4.5568376163294316e-08, "loss": 3.7382, "step": 2861000 }, { "epoch": 31.78, "learning_rate": 4.555449436954446e-08, "loss": 3.7636, "step": 2861500 }, { "epoch": 31.78, "learning_rate": 4.554061257579459e-08, "loss": 3.7406, "step": 2862000 }, { "epoch": 31.79, "learning_rate": 4.552673078204473e-08, "loss": 3.7686, "step": 2862500 }, { "epoch": 31.79, "learning_rate": 4.551284898829487e-08, "loss": 3.7664, "step": 2863000 }, { "epoch": 31.8, "learning_rate": 4.549896719454501e-08, "loss": 3.7655, "step": 2863500 }, { "epoch": 31.81, "learning_rate": 4.548508540079515e-08, "loss": 3.7701, "step": 2864000 }, { "epoch": 31.81, "learning_rate": 4.5471203607045285e-08, "loss": 3.7878, "step": 2864500 }, { "epoch": 31.82, "learning_rate": 4.545732181329542e-08, "loss": 3.7815, "step": 2865000 }, { "epoch": 31.82, "learning_rate": 4.544344001954556e-08, "loss": 3.7646, "step": 2865500 }, { "epoch": 31.83, "learning_rate": 4.54295582257957e-08, "loss": 3.7722, "step": 2866000 }, { "epoch": 31.83, "learning_rate": 4.541567643204584e-08, "loss": 3.7622, "step": 2866500 }, { "epoch": 31.84, "learning_rate": 4.5401794638295983e-08, "loss": 3.7374, "step": 2867000 }, { "epoch": 31.84, "learning_rate": 4.538791284454612e-08, "loss": 3.778, "step": 2867500 }, { "epoch": 31.85, "learning_rate": 4.5374031050796255e-08, "loss": 3.7671, "step": 2868000 }, { "epoch": 31.86, "learning_rate": 4.53601492570464e-08, "loss": 3.7698, "step": 2868500 }, { "epoch": 31.86, "learning_rate": 4.534626746329653e-08, "loss": 3.7464, "step": 2869000 }, { "epoch": 31.87, "learning_rate": 4.5332385669546675e-08, "loss": 3.7634, "step": 2869500 }, { "epoch": 31.87, "learning_rate": 4.531850387579682e-08, "loss": 3.7616, "step": 2870000 }, { "epoch": 31.88, "learning_rate": 4.530462208204695e-08, "loss": 3.7455, "step": 2870500 }, { "epoch": 31.88, "learning_rate": 4.529074028829709e-08, "loss": 3.7615, "step": 2871000 }, { "epoch": 31.89, "learning_rate": 4.527685849454723e-08, "loss": 3.7356, "step": 2871500 }, { "epoch": 31.89, "learning_rate": 4.5262976700797366e-08, "loss": 3.7739, "step": 2872000 }, { "epoch": 31.9, "learning_rate": 4.524909490704751e-08, "loss": 3.7543, "step": 2872500 }, { "epoch": 31.91, "learning_rate": 4.5235213113297644e-08, "loss": 3.7721, "step": 2873000 }, { "epoch": 31.91, "learning_rate": 4.5221331319547786e-08, "loss": 3.7474, "step": 2873500 }, { "epoch": 31.92, "learning_rate": 4.520744952579792e-08, "loss": 3.7654, "step": 2874000 }, { "epoch": 31.92, "learning_rate": 4.519356773204806e-08, "loss": 3.7854, "step": 2874500 }, { "epoch": 31.93, "learning_rate": 4.51796859382982e-08, "loss": 3.7678, "step": 2875000 }, { "epoch": 31.93, "learning_rate": 4.516580414454834e-08, "loss": 3.7674, "step": 2875500 }, { "epoch": 31.94, "learning_rate": 4.515192235079848e-08, "loss": 3.7666, "step": 2876000 }, { "epoch": 31.94, "learning_rate": 4.513804055704862e-08, "loss": 3.7623, "step": 2876500 }, { "epoch": 31.95, "learning_rate": 4.5124158763298756e-08, "loss": 3.7535, "step": 2877000 }, { "epoch": 31.96, "learning_rate": 4.511027696954889e-08, "loss": 3.7513, "step": 2877500 }, { "epoch": 31.96, "learning_rate": 4.5096395175799034e-08, "loss": 3.7576, "step": 2878000 }, { "epoch": 31.97, "learning_rate": 4.5082513382049176e-08, "loss": 3.7583, "step": 2878500 }, { "epoch": 31.97, "learning_rate": 4.506863158829931e-08, "loss": 3.7719, "step": 2879000 }, { "epoch": 31.98, "learning_rate": 4.5054749794549454e-08, "loss": 3.7655, "step": 2879500 }, { "epoch": 31.98, "learning_rate": 4.504086800079959e-08, "loss": 3.7603, "step": 2880000 }, { "epoch": 31.99, "learning_rate": 4.5026986207049725e-08, "loss": 3.761, "step": 2880500 }, { "epoch": 31.99, "learning_rate": 4.501310441329987e-08, "loss": 3.7671, "step": 2881000 }, { "epoch": 32.0, "eval_loss": 3.8329031467437744, "eval_runtime": 6.2965, "eval_samples_per_second": 246.805, "step": 2881472 }, { "epoch": 32.0, "learning_rate": 4.499922261955e-08, "loss": 3.7718, "step": 2881500 }, { "epoch": 32.01, "learning_rate": 4.4985340825800145e-08, "loss": 3.7558, "step": 2882000 }, { "epoch": 32.01, "learning_rate": 4.497145903205029e-08, "loss": 3.7661, "step": 2882500 }, { "epoch": 32.02, "learning_rate": 4.495757723830042e-08, "loss": 3.7756, "step": 2883000 }, { "epoch": 32.02, "learning_rate": 4.494369544455056e-08, "loss": 3.7572, "step": 2883500 }, { "epoch": 32.03, "learning_rate": 4.49298136508007e-08, "loss": 3.7516, "step": 2884000 }, { "epoch": 32.03, "learning_rate": 4.491593185705084e-08, "loss": 3.7526, "step": 2884500 }, { "epoch": 32.04, "learning_rate": 4.490205006330098e-08, "loss": 3.7636, "step": 2885000 }, { "epoch": 32.04, "learning_rate": 4.488816826955112e-08, "loss": 3.7707, "step": 2885500 }, { "epoch": 32.05, "learning_rate": 4.487428647580126e-08, "loss": 3.7604, "step": 2886000 }, { "epoch": 32.06, "learning_rate": 4.486040468205139e-08, "loss": 3.7571, "step": 2886500 }, { "epoch": 32.06, "learning_rate": 4.484652288830153e-08, "loss": 3.7739, "step": 2887000 }, { "epoch": 32.07, "learning_rate": 4.483264109455167e-08, "loss": 3.7697, "step": 2887500 }, { "epoch": 32.07, "learning_rate": 4.481875930080181e-08, "loss": 3.7586, "step": 2888000 }, { "epoch": 32.08, "learning_rate": 4.480487750705195e-08, "loss": 3.7649, "step": 2888500 }, { "epoch": 32.08, "learning_rate": 4.479099571330209e-08, "loss": 3.7708, "step": 2889000 }, { "epoch": 32.09, "learning_rate": 4.4777113919552226e-08, "loss": 3.7477, "step": 2889500 }, { "epoch": 32.09, "learning_rate": 4.476323212580236e-08, "loss": 3.7805, "step": 2890000 }, { "epoch": 32.1, "learning_rate": 4.4749350332052504e-08, "loss": 3.7628, "step": 2890500 }, { "epoch": 32.11, "learning_rate": 4.4735468538302647e-08, "loss": 3.7618, "step": 2891000 }, { "epoch": 32.11, "learning_rate": 4.472158674455278e-08, "loss": 3.7862, "step": 2891500 }, { "epoch": 32.12, "learning_rate": 4.4707704950802924e-08, "loss": 3.7491, "step": 2892000 }, { "epoch": 32.12, "learning_rate": 4.469382315705306e-08, "loss": 3.755, "step": 2892500 }, { "epoch": 32.13, "learning_rate": 4.4679941363303196e-08, "loss": 3.757, "step": 2893000 }, { "epoch": 32.13, "learning_rate": 4.466605956955334e-08, "loss": 3.771, "step": 2893500 }, { "epoch": 32.14, "learning_rate": 4.4652177775803474e-08, "loss": 3.7449, "step": 2894000 }, { "epoch": 32.14, "learning_rate": 4.4638295982053616e-08, "loss": 3.7597, "step": 2894500 }, { "epoch": 32.15, "learning_rate": 4.462441418830376e-08, "loss": 3.7508, "step": 2895000 }, { "epoch": 32.16, "learning_rate": 4.461053239455389e-08, "loss": 3.7661, "step": 2895500 }, { "epoch": 32.16, "learning_rate": 4.459665060080403e-08, "loss": 3.758, "step": 2896000 }, { "epoch": 32.17, "learning_rate": 4.458276880705417e-08, "loss": 3.7647, "step": 2896500 }, { "epoch": 32.17, "learning_rate": 4.456888701330431e-08, "loss": 3.7464, "step": 2897000 }, { "epoch": 32.18, "learning_rate": 4.455500521955445e-08, "loss": 3.7596, "step": 2897500 }, { "epoch": 32.18, "learning_rate": 4.454112342580459e-08, "loss": 3.756, "step": 2898000 }, { "epoch": 32.19, "learning_rate": 4.452724163205472e-08, "loss": 3.764, "step": 2898500 }, { "epoch": 32.19, "learning_rate": 4.451335983830486e-08, "loss": 3.7585, "step": 2899000 }, { "epoch": 32.2, "learning_rate": 4.4499478044555006e-08, "loss": 3.7627, "step": 2899500 }, { "epoch": 32.21, "learning_rate": 4.448559625080514e-08, "loss": 3.7478, "step": 2900000 }, { "epoch": 32.21, "learning_rate": 4.4471714457055283e-08, "loss": 3.7645, "step": 2900500 }, { "epoch": 32.22, "learning_rate": 4.445783266330542e-08, "loss": 3.7541, "step": 2901000 }, { "epoch": 32.22, "learning_rate": 4.4443950869555555e-08, "loss": 3.7643, "step": 2901500 }, { "epoch": 32.23, "learning_rate": 4.44300690758057e-08, "loss": 3.7763, "step": 2902000 }, { "epoch": 32.23, "learning_rate": 4.441618728205583e-08, "loss": 3.7658, "step": 2902500 }, { "epoch": 32.24, "learning_rate": 4.4402305488305975e-08, "loss": 3.7637, "step": 2903000 }, { "epoch": 32.24, "learning_rate": 4.438842369455612e-08, "loss": 3.7652, "step": 2903500 }, { "epoch": 32.25, "learning_rate": 4.437454190080625e-08, "loss": 3.7585, "step": 2904000 }, { "epoch": 32.26, "learning_rate": 4.436066010705639e-08, "loss": 3.7384, "step": 2904500 }, { "epoch": 32.26, "learning_rate": 4.434677831330653e-08, "loss": 3.7683, "step": 2905000 }, { "epoch": 32.27, "learning_rate": 4.4332896519556666e-08, "loss": 3.7554, "step": 2905500 }, { "epoch": 32.27, "learning_rate": 4.431901472580681e-08, "loss": 3.7596, "step": 2906000 }, { "epoch": 32.28, "learning_rate": 4.430513293205695e-08, "loss": 3.7407, "step": 2906500 }, { "epoch": 32.28, "learning_rate": 4.4291251138307087e-08, "loss": 3.7728, "step": 2907000 }, { "epoch": 32.29, "learning_rate": 4.427736934455722e-08, "loss": 3.7572, "step": 2907500 }, { "epoch": 32.29, "learning_rate": 4.4263487550807364e-08, "loss": 3.737, "step": 2908000 }, { "epoch": 32.3, "learning_rate": 4.42496057570575e-08, "loss": 3.7615, "step": 2908500 }, { "epoch": 32.31, "learning_rate": 4.423572396330764e-08, "loss": 3.7604, "step": 2909000 }, { "epoch": 32.31, "learning_rate": 4.422184216955778e-08, "loss": 3.7491, "step": 2909500 }, { "epoch": 32.32, "learning_rate": 4.420796037580792e-08, "loss": 3.7687, "step": 2910000 }, { "epoch": 32.32, "learning_rate": 4.4194078582058056e-08, "loss": 3.7661, "step": 2910500 }, { "epoch": 32.33, "learning_rate": 4.418019678830819e-08, "loss": 3.7766, "step": 2911000 }, { "epoch": 32.33, "learning_rate": 4.4166314994558334e-08, "loss": 3.765, "step": 2911500 }, { "epoch": 32.34, "learning_rate": 4.4152433200808476e-08, "loss": 3.7821, "step": 2912000 }, { "epoch": 32.34, "learning_rate": 4.413855140705861e-08, "loss": 3.7942, "step": 2912500 }, { "epoch": 32.35, "learning_rate": 4.4124669613308754e-08, "loss": 3.767, "step": 2913000 }, { "epoch": 32.36, "learning_rate": 4.411078781955889e-08, "loss": 3.7776, "step": 2913500 }, { "epoch": 32.36, "learning_rate": 4.4096906025809025e-08, "loss": 3.7549, "step": 2914000 }, { "epoch": 32.37, "learning_rate": 4.408302423205917e-08, "loss": 3.7848, "step": 2914500 }, { "epoch": 32.37, "learning_rate": 4.406914243830931e-08, "loss": 3.7703, "step": 2915000 }, { "epoch": 32.38, "learning_rate": 4.4055260644559445e-08, "loss": 3.7769, "step": 2915500 }, { "epoch": 32.38, "learning_rate": 4.404137885080959e-08, "loss": 3.7403, "step": 2916000 }, { "epoch": 32.39, "learning_rate": 4.4027497057059723e-08, "loss": 3.7579, "step": 2916500 }, { "epoch": 32.39, "learning_rate": 4.401361526330986e-08, "loss": 3.7853, "step": 2917000 }, { "epoch": 32.4, "learning_rate": 4.399973346956e-08, "loss": 3.7661, "step": 2917500 }, { "epoch": 32.41, "learning_rate": 4.398585167581014e-08, "loss": 3.7605, "step": 2918000 }, { "epoch": 32.41, "learning_rate": 4.397196988206028e-08, "loss": 3.7604, "step": 2918500 }, { "epoch": 32.42, "learning_rate": 4.395808808831042e-08, "loss": 3.7866, "step": 2919000 }, { "epoch": 32.42, "learning_rate": 4.394420629456056e-08, "loss": 3.7694, "step": 2919500 }, { "epoch": 32.43, "learning_rate": 4.393032450081069e-08, "loss": 3.7735, "step": 2920000 }, { "epoch": 32.43, "learning_rate": 4.3916442707060835e-08, "loss": 3.7436, "step": 2920500 }, { "epoch": 32.44, "learning_rate": 4.390256091331097e-08, "loss": 3.7456, "step": 2921000 }, { "epoch": 32.44, "learning_rate": 4.388867911956111e-08, "loss": 3.7682, "step": 2921500 }, { "epoch": 32.45, "learning_rate": 4.3874797325811255e-08, "loss": 3.7458, "step": 2922000 }, { "epoch": 32.46, "learning_rate": 4.386091553206139e-08, "loss": 3.7691, "step": 2922500 }, { "epoch": 32.46, "learning_rate": 4.3847033738311526e-08, "loss": 3.7418, "step": 2923000 }, { "epoch": 32.47, "learning_rate": 4.383315194456166e-08, "loss": 3.7662, "step": 2923500 }, { "epoch": 32.47, "learning_rate": 4.3819270150811804e-08, "loss": 3.7551, "step": 2924000 }, { "epoch": 32.48, "learning_rate": 4.3805388357061947e-08, "loss": 3.7528, "step": 2924500 }, { "epoch": 32.48, "learning_rate": 4.379150656331208e-08, "loss": 3.7577, "step": 2925000 }, { "epoch": 32.49, "learning_rate": 4.3777624769562225e-08, "loss": 3.7692, "step": 2925500 }, { "epoch": 32.49, "learning_rate": 4.376374297581236e-08, "loss": 3.7541, "step": 2926000 }, { "epoch": 32.5, "learning_rate": 4.3749861182062496e-08, "loss": 3.7732, "step": 2926500 }, { "epoch": 32.51, "learning_rate": 4.373597938831264e-08, "loss": 3.7502, "step": 2927000 }, { "epoch": 32.51, "learning_rate": 4.372209759456278e-08, "loss": 3.7676, "step": 2927500 }, { "epoch": 32.52, "learning_rate": 4.3708215800812916e-08, "loss": 3.7692, "step": 2928000 }, { "epoch": 32.52, "learning_rate": 4.369433400706306e-08, "loss": 3.7638, "step": 2928500 }, { "epoch": 32.53, "learning_rate": 4.3680452213313194e-08, "loss": 3.7326, "step": 2929000 }, { "epoch": 32.53, "learning_rate": 4.366657041956333e-08, "loss": 3.7494, "step": 2929500 }, { "epoch": 32.54, "learning_rate": 4.365268862581347e-08, "loss": 3.7645, "step": 2930000 }, { "epoch": 32.54, "learning_rate": 4.3638806832063614e-08, "loss": 3.7549, "step": 2930500 }, { "epoch": 32.55, "learning_rate": 4.362492503831375e-08, "loss": 3.7675, "step": 2931000 }, { "epoch": 32.56, "learning_rate": 4.3611043244563885e-08, "loss": 3.7538, "step": 2931500 }, { "epoch": 32.56, "learning_rate": 4.359716145081402e-08, "loss": 3.78, "step": 2932000 }, { "epoch": 32.57, "learning_rate": 4.3583279657064163e-08, "loss": 3.7514, "step": 2932500 }, { "epoch": 32.57, "learning_rate": 4.3569397863314306e-08, "loss": 3.7748, "step": 2933000 }, { "epoch": 32.58, "learning_rate": 4.355551606956444e-08, "loss": 3.7552, "step": 2933500 }, { "epoch": 32.58, "learning_rate": 4.3541634275814584e-08, "loss": 3.7586, "step": 2934000 }, { "epoch": 32.59, "learning_rate": 4.352775248206472e-08, "loss": 3.7554, "step": 2934500 }, { "epoch": 32.59, "learning_rate": 4.3513870688314855e-08, "loss": 3.7595, "step": 2935000 }, { "epoch": 32.6, "learning_rate": 4.3499988894565e-08, "loss": 3.7534, "step": 2935500 }, { "epoch": 32.61, "learning_rate": 4.348610710081514e-08, "loss": 3.7545, "step": 2936000 }, { "epoch": 32.61, "learning_rate": 4.3472225307065275e-08, "loss": 3.7649, "step": 2936500 }, { "epoch": 32.62, "learning_rate": 4.345834351331542e-08, "loss": 3.748, "step": 2937000 }, { "epoch": 32.62, "learning_rate": 4.344446171956555e-08, "loss": 3.7764, "step": 2937500 }, { "epoch": 32.63, "learning_rate": 4.343057992581569e-08, "loss": 3.7488, "step": 2938000 }, { "epoch": 32.63, "learning_rate": 4.341669813206583e-08, "loss": 3.7843, "step": 2938500 }, { "epoch": 32.64, "learning_rate": 4.3402816338315966e-08, "loss": 3.7544, "step": 2939000 }, { "epoch": 32.64, "learning_rate": 4.338893454456611e-08, "loss": 3.7571, "step": 2939500 }, { "epoch": 32.65, "learning_rate": 4.337505275081625e-08, "loss": 3.7696, "step": 2940000 }, { "epoch": 32.66, "learning_rate": 4.3361170957066387e-08, "loss": 3.7596, "step": 2940500 }, { "epoch": 32.66, "learning_rate": 4.334728916331652e-08, "loss": 3.7791, "step": 2941000 }, { "epoch": 32.67, "learning_rate": 4.3333407369566665e-08, "loss": 3.7632, "step": 2941500 }, { "epoch": 32.67, "learning_rate": 4.33195255758168e-08, "loss": 3.7645, "step": 2942000 }, { "epoch": 32.68, "learning_rate": 4.330564378206694e-08, "loss": 3.769, "step": 2942500 }, { "epoch": 32.68, "learning_rate": 4.3291761988317085e-08, "loss": 3.763, "step": 2943000 }, { "epoch": 32.69, "learning_rate": 4.327788019456722e-08, "loss": 3.763, "step": 2943500 }, { "epoch": 32.69, "learning_rate": 4.3263998400817356e-08, "loss": 3.7635, "step": 2944000 }, { "epoch": 32.7, "learning_rate": 4.32501166070675e-08, "loss": 3.7683, "step": 2944500 }, { "epoch": 32.71, "learning_rate": 4.3236234813317634e-08, "loss": 3.7428, "step": 2945000 }, { "epoch": 32.71, "learning_rate": 4.3222353019567776e-08, "loss": 3.7381, "step": 2945500 }, { "epoch": 32.72, "learning_rate": 4.320847122581791e-08, "loss": 3.7751, "step": 2946000 }, { "epoch": 32.72, "learning_rate": 4.3194589432068054e-08, "loss": 3.7728, "step": 2946500 }, { "epoch": 32.73, "learning_rate": 4.318070763831819e-08, "loss": 3.7699, "step": 2947000 }, { "epoch": 32.73, "learning_rate": 4.3166825844568325e-08, "loss": 3.7628, "step": 2947500 }, { "epoch": 32.74, "learning_rate": 4.315294405081847e-08, "loss": 3.757, "step": 2948000 }, { "epoch": 32.74, "learning_rate": 4.313906225706861e-08, "loss": 3.7636, "step": 2948500 }, { "epoch": 32.75, "learning_rate": 4.3125180463318746e-08, "loss": 3.7597, "step": 2949000 }, { "epoch": 32.76, "learning_rate": 4.311129866956889e-08, "loss": 3.7664, "step": 2949500 }, { "epoch": 32.76, "learning_rate": 4.3097416875819023e-08, "loss": 3.7609, "step": 2950000 }, { "epoch": 32.77, "learning_rate": 4.308353508206916e-08, "loss": 3.7506, "step": 2950500 }, { "epoch": 32.77, "learning_rate": 4.30696532883193e-08, "loss": 3.7589, "step": 2951000 }, { "epoch": 32.78, "learning_rate": 4.3055771494569444e-08, "loss": 3.7634, "step": 2951500 }, { "epoch": 32.78, "learning_rate": 4.304188970081958e-08, "loss": 3.757, "step": 2952000 }, { "epoch": 32.79, "learning_rate": 4.302800790706972e-08, "loss": 3.7416, "step": 2952500 }, { "epoch": 32.79, "learning_rate": 4.301412611331986e-08, "loss": 3.7597, "step": 2953000 }, { "epoch": 32.8, "learning_rate": 4.300024431956999e-08, "loss": 3.7543, "step": 2953500 }, { "epoch": 32.81, "learning_rate": 4.2986362525820135e-08, "loss": 3.7661, "step": 2954000 }, { "epoch": 32.81, "learning_rate": 4.297248073207027e-08, "loss": 3.7557, "step": 2954500 }, { "epoch": 32.82, "learning_rate": 4.295859893832041e-08, "loss": 3.7564, "step": 2955000 }, { "epoch": 32.82, "learning_rate": 4.2944717144570555e-08, "loss": 3.7538, "step": 2955500 }, { "epoch": 32.83, "learning_rate": 4.293083535082069e-08, "loss": 3.7671, "step": 2956000 }, { "epoch": 32.83, "learning_rate": 4.2916953557070827e-08, "loss": 3.7622, "step": 2956500 }, { "epoch": 32.84, "learning_rate": 4.290307176332097e-08, "loss": 3.7508, "step": 2957000 }, { "epoch": 32.84, "learning_rate": 4.2889189969571104e-08, "loss": 3.7583, "step": 2957500 }, { "epoch": 32.85, "learning_rate": 4.287530817582125e-08, "loss": 3.7672, "step": 2958000 }, { "epoch": 32.86, "learning_rate": 4.286142638207139e-08, "loss": 3.7523, "step": 2958500 }, { "epoch": 32.86, "learning_rate": 4.2847544588321525e-08, "loss": 3.7561, "step": 2959000 }, { "epoch": 32.87, "learning_rate": 4.283366279457166e-08, "loss": 3.7496, "step": 2959500 }, { "epoch": 32.87, "learning_rate": 4.28197810008218e-08, "loss": 3.7694, "step": 2960000 }, { "epoch": 32.88, "learning_rate": 4.280589920707194e-08, "loss": 3.7495, "step": 2960500 }, { "epoch": 32.88, "learning_rate": 4.279201741332208e-08, "loss": 3.7545, "step": 2961000 }, { "epoch": 32.89, "learning_rate": 4.2778135619572216e-08, "loss": 3.7625, "step": 2961500 }, { "epoch": 32.89, "learning_rate": 4.276425382582235e-08, "loss": 3.7661, "step": 2962000 }, { "epoch": 32.9, "learning_rate": 4.2750372032072494e-08, "loss": 3.7644, "step": 2962500 }, { "epoch": 32.91, "learning_rate": 4.273649023832263e-08, "loss": 3.7606, "step": 2963000 }, { "epoch": 32.91, "learning_rate": 4.272260844457277e-08, "loss": 3.7532, "step": 2963500 }, { "epoch": 32.92, "learning_rate": 4.2708726650822914e-08, "loss": 3.7812, "step": 2964000 }, { "epoch": 32.92, "learning_rate": 4.269484485707305e-08, "loss": 3.7498, "step": 2964500 }, { "epoch": 32.93, "learning_rate": 4.2680963063323186e-08, "loss": 3.7682, "step": 2965000 }, { "epoch": 32.93, "learning_rate": 4.266708126957333e-08, "loss": 3.7603, "step": 2965500 }, { "epoch": 32.94, "learning_rate": 4.2653199475823463e-08, "loss": 3.7575, "step": 2966000 }, { "epoch": 32.94, "learning_rate": 4.2639317682073606e-08, "loss": 3.7574, "step": 2966500 }, { "epoch": 32.95, "learning_rate": 4.262543588832375e-08, "loss": 3.7667, "step": 2967000 }, { "epoch": 32.96, "learning_rate": 4.2611554094573884e-08, "loss": 3.767, "step": 2967500 }, { "epoch": 32.96, "learning_rate": 4.259767230082402e-08, "loss": 3.7593, "step": 2968000 }, { "epoch": 32.97, "learning_rate": 4.2583790507074155e-08, "loss": 3.7385, "step": 2968500 }, { "epoch": 32.97, "learning_rate": 4.25699087133243e-08, "loss": 3.7377, "step": 2969000 }, { "epoch": 32.98, "learning_rate": 4.255602691957444e-08, "loss": 3.7575, "step": 2969500 }, { "epoch": 32.98, "learning_rate": 4.2542145125824575e-08, "loss": 3.7583, "step": 2970000 }, { "epoch": 32.99, "learning_rate": 4.252826333207472e-08, "loss": 3.7637, "step": 2970500 }, { "epoch": 32.99, "learning_rate": 4.251438153832485e-08, "loss": 3.7625, "step": 2971000 }, { "epoch": 33.0, "learning_rate": 4.250049974457499e-08, "loss": 3.7628, "step": 2971500 }, { "epoch": 33.0, "eval_loss": 3.8318238258361816, "eval_runtime": 6.2995, "eval_samples_per_second": 246.685, "step": 2971518 }, { "epoch": 33.01, "learning_rate": 4.248661795082513e-08, "loss": 3.7672, "step": 2972000 }, { "epoch": 33.01, "learning_rate": 4.247273615707527e-08, "loss": 3.7588, "step": 2972500 }, { "epoch": 33.02, "learning_rate": 4.245885436332541e-08, "loss": 3.7506, "step": 2973000 }, { "epoch": 33.02, "learning_rate": 4.244497256957555e-08, "loss": 3.7592, "step": 2973500 }, { "epoch": 33.03, "learning_rate": 4.243109077582569e-08, "loss": 3.7514, "step": 2974000 }, { "epoch": 33.03, "learning_rate": 4.241720898207582e-08, "loss": 3.7465, "step": 2974500 }, { "epoch": 33.04, "learning_rate": 4.2403327188325965e-08, "loss": 3.7638, "step": 2975000 }, { "epoch": 33.04, "learning_rate": 4.23894453945761e-08, "loss": 3.7594, "step": 2975500 }, { "epoch": 33.05, "learning_rate": 4.237556360082624e-08, "loss": 3.751, "step": 2976000 }, { "epoch": 33.06, "learning_rate": 4.2361681807076385e-08, "loss": 3.7603, "step": 2976500 }, { "epoch": 33.06, "learning_rate": 4.234780001332652e-08, "loss": 3.7406, "step": 2977000 }, { "epoch": 33.07, "learning_rate": 4.2333918219576656e-08, "loss": 3.7563, "step": 2977500 }, { "epoch": 33.07, "learning_rate": 4.23200364258268e-08, "loss": 3.7728, "step": 2978000 }, { "epoch": 33.08, "learning_rate": 4.2306154632076934e-08, "loss": 3.7653, "step": 2978500 }, { "epoch": 33.08, "learning_rate": 4.2292272838327076e-08, "loss": 3.7594, "step": 2979000 }, { "epoch": 33.09, "learning_rate": 4.227839104457722e-08, "loss": 3.7651, "step": 2979500 }, { "epoch": 33.09, "learning_rate": 4.2264509250827354e-08, "loss": 3.7488, "step": 2980000 }, { "epoch": 33.1, "learning_rate": 4.225062745707749e-08, "loss": 3.7471, "step": 2980500 }, { "epoch": 33.11, "learning_rate": 4.223674566332763e-08, "loss": 3.7588, "step": 2981000 }, { "epoch": 33.11, "learning_rate": 4.222286386957777e-08, "loss": 3.7564, "step": 2981500 }, { "epoch": 33.12, "learning_rate": 4.220898207582791e-08, "loss": 3.7624, "step": 2982000 }, { "epoch": 33.12, "learning_rate": 4.2195100282078046e-08, "loss": 3.7513, "step": 2982500 }, { "epoch": 33.13, "learning_rate": 4.218121848832819e-08, "loss": 3.7534, "step": 2983000 }, { "epoch": 33.13, "learning_rate": 4.2167336694578324e-08, "loss": 3.7573, "step": 2983500 }, { "epoch": 33.14, "learning_rate": 4.215345490082846e-08, "loss": 3.746, "step": 2984000 }, { "epoch": 33.14, "learning_rate": 4.21395731070786e-08, "loss": 3.7505, "step": 2984500 }, { "epoch": 33.15, "learning_rate": 4.2125691313328744e-08, "loss": 3.7718, "step": 2985000 }, { "epoch": 33.16, "learning_rate": 4.211180951957888e-08, "loss": 3.7519, "step": 2985500 }, { "epoch": 33.16, "learning_rate": 4.209792772582902e-08, "loss": 3.7223, "step": 2986000 }, { "epoch": 33.17, "learning_rate": 4.208404593207916e-08, "loss": 3.762, "step": 2986500 }, { "epoch": 33.17, "learning_rate": 4.207016413832929e-08, "loss": 3.7671, "step": 2987000 }, { "epoch": 33.18, "learning_rate": 4.2056282344579435e-08, "loss": 3.7652, "step": 2987500 }, { "epoch": 33.18, "learning_rate": 4.204240055082958e-08, "loss": 3.7673, "step": 2988000 }, { "epoch": 33.19, "learning_rate": 4.202851875707971e-08, "loss": 3.7631, "step": 2988500 }, { "epoch": 33.19, "learning_rate": 4.2014636963329855e-08, "loss": 3.7392, "step": 2989000 }, { "epoch": 33.2, "learning_rate": 4.200075516957999e-08, "loss": 3.7444, "step": 2989500 }, { "epoch": 33.21, "learning_rate": 4.1986873375830127e-08, "loss": 3.7564, "step": 2990000 }, { "epoch": 33.21, "learning_rate": 4.197299158208027e-08, "loss": 3.7541, "step": 2990500 }, { "epoch": 33.22, "learning_rate": 4.1959109788330405e-08, "loss": 3.7734, "step": 2991000 }, { "epoch": 33.22, "learning_rate": 4.194522799458055e-08, "loss": 3.7489, "step": 2991500 }, { "epoch": 33.23, "learning_rate": 4.193134620083069e-08, "loss": 3.7701, "step": 2992000 }, { "epoch": 33.23, "learning_rate": 4.1917464407080825e-08, "loss": 3.7663, "step": 2992500 }, { "epoch": 33.24, "learning_rate": 4.190358261333096e-08, "loss": 3.7604, "step": 2993000 }, { "epoch": 33.24, "learning_rate": 4.18897008195811e-08, "loss": 3.7661, "step": 2993500 }, { "epoch": 33.25, "learning_rate": 4.187581902583124e-08, "loss": 3.7687, "step": 2994000 }, { "epoch": 33.26, "learning_rate": 4.186193723208138e-08, "loss": 3.7297, "step": 2994500 }, { "epoch": 33.26, "learning_rate": 4.184805543833152e-08, "loss": 3.7729, "step": 2995000 }, { "epoch": 33.27, "learning_rate": 4.183417364458165e-08, "loss": 3.7532, "step": 2995500 }, { "epoch": 33.27, "learning_rate": 4.1820291850831794e-08, "loss": 3.7651, "step": 2996000 }, { "epoch": 33.28, "learning_rate": 4.1806410057081936e-08, "loss": 3.7615, "step": 2996500 }, { "epoch": 33.28, "learning_rate": 4.179252826333207e-08, "loss": 3.7717, "step": 2997000 }, { "epoch": 33.29, "learning_rate": 4.1778646469582214e-08, "loss": 3.7716, "step": 2997500 }, { "epoch": 33.29, "learning_rate": 4.176476467583235e-08, "loss": 3.7686, "step": 2998000 }, { "epoch": 33.3, "learning_rate": 4.1750882882082486e-08, "loss": 3.7659, "step": 2998500 }, { "epoch": 33.31, "learning_rate": 4.173700108833263e-08, "loss": 3.765, "step": 2999000 }, { "epoch": 33.31, "learning_rate": 4.1723119294582763e-08, "loss": 3.759, "step": 2999500 }, { "epoch": 33.32, "learning_rate": 4.1709237500832906e-08, "loss": 3.7735, "step": 3000000 }, { "epoch": 33.32, "learning_rate": 4.169535570708305e-08, "loss": 3.7671, "step": 3000500 }, { "epoch": 33.33, "learning_rate": 4.1681473913333184e-08, "loss": 3.7485, "step": 3001000 }, { "epoch": 33.33, "learning_rate": 4.166759211958332e-08, "loss": 3.7685, "step": 3001500 }, { "epoch": 33.34, "learning_rate": 4.165371032583346e-08, "loss": 3.7545, "step": 3002000 }, { "epoch": 33.34, "learning_rate": 4.16398285320836e-08, "loss": 3.7649, "step": 3002500 }, { "epoch": 33.35, "learning_rate": 4.162594673833374e-08, "loss": 3.7882, "step": 3003000 }, { "epoch": 33.36, "learning_rate": 4.161206494458388e-08, "loss": 3.7508, "step": 3003500 }, { "epoch": 33.36, "learning_rate": 4.159818315083402e-08, "loss": 3.7391, "step": 3004000 }, { "epoch": 33.37, "learning_rate": 4.158430135708415e-08, "loss": 3.7653, "step": 3004500 }, { "epoch": 33.37, "learning_rate": 4.157041956333429e-08, "loss": 3.769, "step": 3005000 }, { "epoch": 33.38, "learning_rate": 4.155653776958443e-08, "loss": 3.7645, "step": 3005500 }, { "epoch": 33.38, "learning_rate": 4.154265597583457e-08, "loss": 3.7727, "step": 3006000 }, { "epoch": 33.39, "learning_rate": 4.152877418208471e-08, "loss": 3.7668, "step": 3006500 }, { "epoch": 33.39, "learning_rate": 4.151489238833485e-08, "loss": 3.747, "step": 3007000 }, { "epoch": 33.4, "learning_rate": 4.150101059458499e-08, "loss": 3.7493, "step": 3007500 }, { "epoch": 33.41, "learning_rate": 4.148712880083512e-08, "loss": 3.7632, "step": 3008000 }, { "epoch": 33.41, "learning_rate": 4.1473247007085265e-08, "loss": 3.7494, "step": 3008500 }, { "epoch": 33.42, "learning_rate": 4.145936521333541e-08, "loss": 3.7536, "step": 3009000 }, { "epoch": 33.42, "learning_rate": 4.144548341958554e-08, "loss": 3.7584, "step": 3009500 }, { "epoch": 33.43, "learning_rate": 4.1431601625835685e-08, "loss": 3.7868, "step": 3010000 }, { "epoch": 33.43, "learning_rate": 4.141771983208582e-08, "loss": 3.7545, "step": 3010500 }, { "epoch": 33.44, "learning_rate": 4.1403838038335956e-08, "loss": 3.7691, "step": 3011000 }, { "epoch": 33.44, "learning_rate": 4.13899562445861e-08, "loss": 3.7371, "step": 3011500 }, { "epoch": 33.45, "learning_rate": 4.1376074450836234e-08, "loss": 3.7668, "step": 3012000 }, { "epoch": 33.46, "learning_rate": 4.1362192657086376e-08, "loss": 3.7444, "step": 3012500 }, { "epoch": 33.46, "learning_rate": 4.134831086333652e-08, "loss": 3.747, "step": 3013000 }, { "epoch": 33.47, "learning_rate": 4.1334429069586654e-08, "loss": 3.7682, "step": 3013500 }, { "epoch": 33.47, "learning_rate": 4.132054727583679e-08, "loss": 3.7794, "step": 3014000 }, { "epoch": 33.48, "learning_rate": 4.130666548208693e-08, "loss": 3.7429, "step": 3014500 }, { "epoch": 33.48, "learning_rate": 4.129278368833707e-08, "loss": 3.7653, "step": 3015000 }, { "epoch": 33.49, "learning_rate": 4.127890189458721e-08, "loss": 3.7317, "step": 3015500 }, { "epoch": 33.49, "learning_rate": 4.126502010083735e-08, "loss": 3.7666, "step": 3016000 }, { "epoch": 33.5, "learning_rate": 4.125113830708749e-08, "loss": 3.7334, "step": 3016500 }, { "epoch": 33.51, "learning_rate": 4.1237256513337624e-08, "loss": 3.7695, "step": 3017000 }, { "epoch": 33.51, "learning_rate": 4.1223374719587766e-08, "loss": 3.7699, "step": 3017500 }, { "epoch": 33.52, "learning_rate": 4.12094929258379e-08, "loss": 3.7669, "step": 3018000 }, { "epoch": 33.52, "learning_rate": 4.1195611132088044e-08, "loss": 3.7645, "step": 3018500 }, { "epoch": 33.53, "learning_rate": 4.118172933833818e-08, "loss": 3.7424, "step": 3019000 }, { "epoch": 33.53, "learning_rate": 4.116784754458832e-08, "loss": 3.7601, "step": 3019500 }, { "epoch": 33.54, "learning_rate": 4.115396575083846e-08, "loss": 3.7823, "step": 3020000 }, { "epoch": 33.54, "learning_rate": 4.114008395708859e-08, "loss": 3.7645, "step": 3020500 }, { "epoch": 33.55, "learning_rate": 4.1126202163338735e-08, "loss": 3.7502, "step": 3021000 }, { "epoch": 33.56, "learning_rate": 4.111232036958888e-08, "loss": 3.7572, "step": 3021500 }, { "epoch": 33.56, "learning_rate": 4.109843857583901e-08, "loss": 3.7665, "step": 3022000 }, { "epoch": 33.57, "learning_rate": 4.1084556782089155e-08, "loss": 3.7527, "step": 3022500 }, { "epoch": 33.57, "learning_rate": 4.107067498833929e-08, "loss": 3.758, "step": 3023000 }, { "epoch": 33.58, "learning_rate": 4.105679319458943e-08, "loss": 3.7579, "step": 3023500 }, { "epoch": 33.58, "learning_rate": 4.104291140083957e-08, "loss": 3.7492, "step": 3024000 }, { "epoch": 33.59, "learning_rate": 4.102902960708971e-08, "loss": 3.7726, "step": 3024500 }, { "epoch": 33.59, "learning_rate": 4.101514781333985e-08, "loss": 3.7638, "step": 3025000 }, { "epoch": 33.6, "learning_rate": 4.100126601958999e-08, "loss": 3.7738, "step": 3025500 }, { "epoch": 33.61, "learning_rate": 4.0987384225840125e-08, "loss": 3.7373, "step": 3026000 }, { "epoch": 33.61, "learning_rate": 4.097350243209026e-08, "loss": 3.7331, "step": 3026500 }, { "epoch": 33.62, "learning_rate": 4.09596206383404e-08, "loss": 3.7807, "step": 3027000 }, { "epoch": 33.62, "learning_rate": 4.094573884459054e-08, "loss": 3.7439, "step": 3027500 }, { "epoch": 33.63, "learning_rate": 4.093185705084068e-08, "loss": 3.7509, "step": 3028000 }, { "epoch": 33.63, "learning_rate": 4.091797525709082e-08, "loss": 3.7572, "step": 3028500 }, { "epoch": 33.64, "learning_rate": 4.090409346334095e-08, "loss": 3.7641, "step": 3029000 }, { "epoch": 33.64, "learning_rate": 4.0890211669591094e-08, "loss": 3.7662, "step": 3029500 }, { "epoch": 33.65, "learning_rate": 4.0876329875841236e-08, "loss": 3.7678, "step": 3030000 }, { "epoch": 33.66, "learning_rate": 4.086244808209137e-08, "loss": 3.7716, "step": 3030500 }, { "epoch": 33.66, "learning_rate": 4.0848566288341514e-08, "loss": 3.7802, "step": 3031000 }, { "epoch": 33.67, "learning_rate": 4.0834684494591657e-08, "loss": 3.7705, "step": 3031500 }, { "epoch": 33.67, "learning_rate": 4.0820802700841786e-08, "loss": 3.7669, "step": 3032000 }, { "epoch": 33.68, "learning_rate": 4.080692090709193e-08, "loss": 3.7599, "step": 3032500 }, { "epoch": 33.68, "learning_rate": 4.079303911334207e-08, "loss": 3.7531, "step": 3033000 }, { "epoch": 33.69, "learning_rate": 4.0779157319592206e-08, "loss": 3.767, "step": 3033500 }, { "epoch": 33.69, "learning_rate": 4.076527552584235e-08, "loss": 3.7454, "step": 3034000 }, { "epoch": 33.7, "learning_rate": 4.0751393732092484e-08, "loss": 3.7582, "step": 3034500 }, { "epoch": 33.7, "learning_rate": 4.073751193834262e-08, "loss": 3.7813, "step": 3035000 }, { "epoch": 33.71, "learning_rate": 4.072363014459276e-08, "loss": 3.778, "step": 3035500 }, { "epoch": 33.72, "learning_rate": 4.07097483508429e-08, "loss": 3.7557, "step": 3036000 }, { "epoch": 33.72, "learning_rate": 4.069586655709304e-08, "loss": 3.7444, "step": 3036500 }, { "epoch": 33.73, "learning_rate": 4.068198476334318e-08, "loss": 3.7545, "step": 3037000 }, { "epoch": 33.73, "learning_rate": 4.066810296959332e-08, "loss": 3.7755, "step": 3037500 }, { "epoch": 33.74, "learning_rate": 4.065422117584345e-08, "loss": 3.7734, "step": 3038000 }, { "epoch": 33.74, "learning_rate": 4.0640339382093595e-08, "loss": 3.7723, "step": 3038500 }, { "epoch": 33.75, "learning_rate": 4.062645758834373e-08, "loss": 3.7541, "step": 3039000 }, { "epoch": 33.75, "learning_rate": 4.061257579459387e-08, "loss": 3.7627, "step": 3039500 }, { "epoch": 33.76, "learning_rate": 4.0598694000844016e-08, "loss": 3.7728, "step": 3040000 }, { "epoch": 33.77, "learning_rate": 4.058481220709415e-08, "loss": 3.7335, "step": 3040500 }, { "epoch": 33.77, "learning_rate": 4.057093041334429e-08, "loss": 3.7653, "step": 3041000 }, { "epoch": 33.78, "learning_rate": 4.055704861959442e-08, "loss": 3.767, "step": 3041500 }, { "epoch": 33.78, "learning_rate": 4.0543166825844565e-08, "loss": 3.782, "step": 3042000 }, { "epoch": 33.79, "learning_rate": 4.052928503209471e-08, "loss": 3.7644, "step": 3042500 }, { "epoch": 33.79, "learning_rate": 4.051540323834484e-08, "loss": 3.7563, "step": 3043000 }, { "epoch": 33.8, "learning_rate": 4.0501521444594985e-08, "loss": 3.7428, "step": 3043500 }, { "epoch": 33.8, "learning_rate": 4.048763965084512e-08, "loss": 3.7448, "step": 3044000 }, { "epoch": 33.81, "learning_rate": 4.0473757857095256e-08, "loss": 3.758, "step": 3044500 }, { "epoch": 33.82, "learning_rate": 4.04598760633454e-08, "loss": 3.7472, "step": 3045000 }, { "epoch": 33.82, "learning_rate": 4.044599426959554e-08, "loss": 3.751, "step": 3045500 }, { "epoch": 33.83, "learning_rate": 4.0432112475845676e-08, "loss": 3.755, "step": 3046000 }, { "epoch": 33.83, "learning_rate": 4.041823068209582e-08, "loss": 3.7539, "step": 3046500 }, { "epoch": 33.84, "learning_rate": 4.0404348888345954e-08, "loss": 3.7658, "step": 3047000 }, { "epoch": 33.84, "learning_rate": 4.039046709459609e-08, "loss": 3.7604, "step": 3047500 }, { "epoch": 33.85, "learning_rate": 4.037658530084623e-08, "loss": 3.7448, "step": 3048000 }, { "epoch": 33.85, "learning_rate": 4.036270350709637e-08, "loss": 3.7496, "step": 3048500 }, { "epoch": 33.86, "learning_rate": 4.034882171334651e-08, "loss": 3.7755, "step": 3049000 }, { "epoch": 33.87, "learning_rate": 4.033493991959665e-08, "loss": 3.7601, "step": 3049500 }, { "epoch": 33.87, "learning_rate": 4.032105812584679e-08, "loss": 3.7645, "step": 3050000 }, { "epoch": 33.88, "learning_rate": 4.0307176332096924e-08, "loss": 3.7533, "step": 3050500 }, { "epoch": 33.88, "learning_rate": 4.0293294538347066e-08, "loss": 3.7591, "step": 3051000 }, { "epoch": 33.89, "learning_rate": 4.02794127445972e-08, "loss": 3.7464, "step": 3051500 }, { "epoch": 33.89, "learning_rate": 4.0265530950847344e-08, "loss": 3.7609, "step": 3052000 }, { "epoch": 33.9, "learning_rate": 4.0251649157097486e-08, "loss": 3.7481, "step": 3052500 }, { "epoch": 33.9, "learning_rate": 4.023776736334762e-08, "loss": 3.7715, "step": 3053000 }, { "epoch": 33.91, "learning_rate": 4.022388556959776e-08, "loss": 3.7559, "step": 3053500 }, { "epoch": 33.92, "learning_rate": 4.02100037758479e-08, "loss": 3.7524, "step": 3054000 }, { "epoch": 33.92, "learning_rate": 4.0196121982098035e-08, "loss": 3.7324, "step": 3054500 }, { "epoch": 33.93, "learning_rate": 4.018224018834818e-08, "loss": 3.7638, "step": 3055000 }, { "epoch": 33.93, "learning_rate": 4.016835839459832e-08, "loss": 3.7416, "step": 3055500 }, { "epoch": 33.94, "learning_rate": 4.0154476600848456e-08, "loss": 3.7695, "step": 3056000 }, { "epoch": 33.94, "learning_rate": 4.014059480709859e-08, "loss": 3.7618, "step": 3056500 }, { "epoch": 33.95, "learning_rate": 4.012671301334873e-08, "loss": 3.7486, "step": 3057000 }, { "epoch": 33.95, "learning_rate": 4.011283121959887e-08, "loss": 3.7653, "step": 3057500 }, { "epoch": 33.96, "learning_rate": 4.009894942584901e-08, "loss": 3.7647, "step": 3058000 }, { "epoch": 33.97, "learning_rate": 4.008506763209915e-08, "loss": 3.7524, "step": 3058500 }, { "epoch": 33.97, "learning_rate": 4.007118583834929e-08, "loss": 3.7609, "step": 3059000 }, { "epoch": 33.98, "learning_rate": 4.0057304044599425e-08, "loss": 3.7714, "step": 3059500 }, { "epoch": 33.98, "learning_rate": 4.004342225084956e-08, "loss": 3.7555, "step": 3060000 }, { "epoch": 33.99, "learning_rate": 4.00295404570997e-08, "loss": 3.792, "step": 3060500 }, { "epoch": 33.99, "learning_rate": 4.0015658663349845e-08, "loss": 3.762, "step": 3061000 }, { "epoch": 34.0, "learning_rate": 4.000177686959998e-08, "loss": 3.7541, "step": 3061500 }, { "epoch": 34.0, "eval_loss": 3.8307807445526123, "eval_runtime": 6.3013, "eval_samples_per_second": 246.615, "step": 3061564 }, { "epoch": 34.0, "learning_rate": 3.998789507585012e-08, "loss": 3.7586, "step": 3062000 }, { "epoch": 34.01, "learning_rate": 3.997401328210026e-08, "loss": 3.7673, "step": 3062500 }, { "epoch": 34.02, "learning_rate": 3.9960131488350394e-08, "loss": 3.7677, "step": 3063000 }, { "epoch": 34.02, "learning_rate": 3.9946249694600537e-08, "loss": 3.7803, "step": 3063500 }, { "epoch": 34.03, "learning_rate": 3.993236790085067e-08, "loss": 3.7327, "step": 3064000 }, { "epoch": 34.03, "learning_rate": 3.9918486107100814e-08, "loss": 3.7614, "step": 3064500 }, { "epoch": 34.04, "learning_rate": 3.990460431335096e-08, "loss": 3.7546, "step": 3065000 }, { "epoch": 34.04, "learning_rate": 3.9890722519601086e-08, "loss": 3.7374, "step": 3065500 }, { "epoch": 34.05, "learning_rate": 3.987684072585123e-08, "loss": 3.7578, "step": 3066000 }, { "epoch": 34.05, "learning_rate": 3.986295893210137e-08, "loss": 3.7645, "step": 3066500 }, { "epoch": 34.06, "learning_rate": 3.9849077138351506e-08, "loss": 3.7533, "step": 3067000 }, { "epoch": 34.07, "learning_rate": 3.983519534460165e-08, "loss": 3.7707, "step": 3067500 }, { "epoch": 34.07, "learning_rate": 3.982131355085179e-08, "loss": 3.77, "step": 3068000 }, { "epoch": 34.08, "learning_rate": 3.980743175710192e-08, "loss": 3.7595, "step": 3068500 }, { "epoch": 34.08, "learning_rate": 3.979354996335206e-08, "loss": 3.7671, "step": 3069000 }, { "epoch": 34.09, "learning_rate": 3.9779668169602204e-08, "loss": 3.7681, "step": 3069500 }, { "epoch": 34.09, "learning_rate": 3.976578637585234e-08, "loss": 3.7587, "step": 3070000 }, { "epoch": 34.1, "learning_rate": 3.975190458210248e-08, "loss": 3.7568, "step": 3070500 }, { "epoch": 34.1, "learning_rate": 3.973802278835262e-08, "loss": 3.7506, "step": 3071000 }, { "epoch": 34.11, "learning_rate": 3.972414099460275e-08, "loss": 3.7685, "step": 3071500 }, { "epoch": 34.12, "learning_rate": 3.9710259200852895e-08, "loss": 3.7535, "step": 3072000 }, { "epoch": 34.12, "learning_rate": 3.969637740710303e-08, "loss": 3.7549, "step": 3072500 }, { "epoch": 34.13, "learning_rate": 3.9682495613353173e-08, "loss": 3.738, "step": 3073000 }, { "epoch": 34.13, "learning_rate": 3.9668613819603316e-08, "loss": 3.7708, "step": 3073500 }, { "epoch": 34.14, "learning_rate": 3.965473202585345e-08, "loss": 3.76, "step": 3074000 }, { "epoch": 34.14, "learning_rate": 3.964085023210359e-08, "loss": 3.7726, "step": 3074500 }, { "epoch": 34.15, "learning_rate": 3.962696843835373e-08, "loss": 3.7713, "step": 3075000 }, { "epoch": 34.15, "learning_rate": 3.9613086644603865e-08, "loss": 3.7556, "step": 3075500 }, { "epoch": 34.16, "learning_rate": 3.959920485085401e-08, "loss": 3.7419, "step": 3076000 }, { "epoch": 34.17, "learning_rate": 3.958532305710415e-08, "loss": 3.7425, "step": 3076500 }, { "epoch": 34.17, "learning_rate": 3.9571441263354285e-08, "loss": 3.7538, "step": 3077000 }, { "epoch": 34.18, "learning_rate": 3.955755946960442e-08, "loss": 3.7672, "step": 3077500 }, { "epoch": 34.18, "learning_rate": 3.9543677675854556e-08, "loss": 3.7443, "step": 3078000 }, { "epoch": 34.19, "learning_rate": 3.95297958821047e-08, "loss": 3.7455, "step": 3078500 }, { "epoch": 34.19, "learning_rate": 3.951591408835484e-08, "loss": 3.7496, "step": 3079000 }, { "epoch": 34.2, "learning_rate": 3.9502032294604976e-08, "loss": 3.7551, "step": 3079500 }, { "epoch": 34.2, "learning_rate": 3.948815050085512e-08, "loss": 3.7451, "step": 3080000 }, { "epoch": 34.21, "learning_rate": 3.9474268707105254e-08, "loss": 3.746, "step": 3080500 }, { "epoch": 34.22, "learning_rate": 3.946038691335539e-08, "loss": 3.7701, "step": 3081000 }, { "epoch": 34.22, "learning_rate": 3.944650511960553e-08, "loss": 3.7606, "step": 3081500 }, { "epoch": 34.23, "learning_rate": 3.9432623325855675e-08, "loss": 3.7756, "step": 3082000 }, { "epoch": 34.23, "learning_rate": 3.941874153210581e-08, "loss": 3.7615, "step": 3082500 }, { "epoch": 34.24, "learning_rate": 3.940485973835595e-08, "loss": 3.7339, "step": 3083000 }, { "epoch": 34.24, "learning_rate": 3.939097794460609e-08, "loss": 3.7747, "step": 3083500 }, { "epoch": 34.25, "learning_rate": 3.9377096150856224e-08, "loss": 3.7732, "step": 3084000 }, { "epoch": 34.25, "learning_rate": 3.9363214357106366e-08, "loss": 3.7539, "step": 3084500 }, { "epoch": 34.26, "learning_rate": 3.934933256335651e-08, "loss": 3.7646, "step": 3085000 }, { "epoch": 34.27, "learning_rate": 3.9335450769606644e-08, "loss": 3.7511, "step": 3085500 }, { "epoch": 34.27, "learning_rate": 3.9321568975856786e-08, "loss": 3.751, "step": 3086000 }, { "epoch": 34.28, "learning_rate": 3.930768718210692e-08, "loss": 3.7548, "step": 3086500 }, { "epoch": 34.28, "learning_rate": 3.929380538835706e-08, "loss": 3.7585, "step": 3087000 }, { "epoch": 34.29, "learning_rate": 3.92799235946072e-08, "loss": 3.7645, "step": 3087500 }, { "epoch": 34.29, "learning_rate": 3.9266041800857335e-08, "loss": 3.7591, "step": 3088000 }, { "epoch": 34.3, "learning_rate": 3.925216000710748e-08, "loss": 3.7538, "step": 3088500 }, { "epoch": 34.3, "learning_rate": 3.923827821335762e-08, "loss": 3.7505, "step": 3089000 }, { "epoch": 34.31, "learning_rate": 3.9224396419607756e-08, "loss": 3.7544, "step": 3089500 }, { "epoch": 34.32, "learning_rate": 3.921051462585789e-08, "loss": 3.765, "step": 3090000 }, { "epoch": 34.32, "learning_rate": 3.9196632832108034e-08, "loss": 3.7463, "step": 3090500 }, { "epoch": 34.33, "learning_rate": 3.918275103835817e-08, "loss": 3.7681, "step": 3091000 }, { "epoch": 34.33, "learning_rate": 3.916886924460831e-08, "loss": 3.7592, "step": 3091500 }, { "epoch": 34.34, "learning_rate": 3.9154987450858454e-08, "loss": 3.7626, "step": 3092000 }, { "epoch": 34.34, "learning_rate": 3.914110565710859e-08, "loss": 3.7506, "step": 3092500 }, { "epoch": 34.35, "learning_rate": 3.9127223863358725e-08, "loss": 3.7755, "step": 3093000 }, { "epoch": 34.35, "learning_rate": 3.911334206960886e-08, "loss": 3.7635, "step": 3093500 }, { "epoch": 34.36, "learning_rate": 3.9099460275859e-08, "loss": 3.7514, "step": 3094000 }, { "epoch": 34.37, "learning_rate": 3.9085578482109145e-08, "loss": 3.7677, "step": 3094500 }, { "epoch": 34.37, "learning_rate": 3.907169668835928e-08, "loss": 3.7773, "step": 3095000 }, { "epoch": 34.38, "learning_rate": 3.905781489460942e-08, "loss": 3.7555, "step": 3095500 }, { "epoch": 34.38, "learning_rate": 3.904393310085956e-08, "loss": 3.735, "step": 3096000 }, { "epoch": 34.39, "learning_rate": 3.9030051307109694e-08, "loss": 3.7562, "step": 3096500 }, { "epoch": 34.39, "learning_rate": 3.9016169513359837e-08, "loss": 3.7827, "step": 3097000 }, { "epoch": 34.4, "learning_rate": 3.900228771960998e-08, "loss": 3.7614, "step": 3097500 }, { "epoch": 34.4, "learning_rate": 3.8988405925860115e-08, "loss": 3.759, "step": 3098000 }, { "epoch": 34.41, "learning_rate": 3.897452413211026e-08, "loss": 3.7534, "step": 3098500 }, { "epoch": 34.42, "learning_rate": 3.896064233836039e-08, "loss": 3.7772, "step": 3099000 }, { "epoch": 34.42, "learning_rate": 3.894676054461053e-08, "loss": 3.7426, "step": 3099500 }, { "epoch": 34.43, "learning_rate": 3.893287875086067e-08, "loss": 3.7605, "step": 3100000 }, { "epoch": 34.43, "learning_rate": 3.8918996957110806e-08, "loss": 3.7555, "step": 3100500 }, { "epoch": 34.44, "learning_rate": 3.890511516336095e-08, "loss": 3.7471, "step": 3101000 }, { "epoch": 34.44, "learning_rate": 3.889123336961109e-08, "loss": 3.7554, "step": 3101500 }, { "epoch": 34.45, "learning_rate": 3.887735157586122e-08, "loss": 3.7529, "step": 3102000 }, { "epoch": 34.45, "learning_rate": 3.886346978211136e-08, "loss": 3.767, "step": 3102500 }, { "epoch": 34.46, "learning_rate": 3.8849587988361504e-08, "loss": 3.7566, "step": 3103000 }, { "epoch": 34.47, "learning_rate": 3.883570619461164e-08, "loss": 3.7764, "step": 3103500 }, { "epoch": 34.47, "learning_rate": 3.882182440086178e-08, "loss": 3.7602, "step": 3104000 }, { "epoch": 34.48, "learning_rate": 3.880794260711192e-08, "loss": 3.7342, "step": 3104500 }, { "epoch": 34.48, "learning_rate": 3.879406081336205e-08, "loss": 3.7563, "step": 3105000 }, { "epoch": 34.49, "learning_rate": 3.8780179019612196e-08, "loss": 3.755, "step": 3105500 }, { "epoch": 34.49, "learning_rate": 3.876629722586234e-08, "loss": 3.7632, "step": 3106000 }, { "epoch": 34.5, "learning_rate": 3.8752415432112473e-08, "loss": 3.7747, "step": 3106500 }, { "epoch": 34.5, "learning_rate": 3.8738533638362616e-08, "loss": 3.7706, "step": 3107000 }, { "epoch": 34.51, "learning_rate": 3.872465184461275e-08, "loss": 3.7522, "step": 3107500 }, { "epoch": 34.52, "learning_rate": 3.871077005086289e-08, "loss": 3.7508, "step": 3108000 }, { "epoch": 34.52, "learning_rate": 3.869688825711303e-08, "loss": 3.7621, "step": 3108500 }, { "epoch": 34.53, "learning_rate": 3.8683006463363165e-08, "loss": 3.7503, "step": 3109000 }, { "epoch": 34.53, "learning_rate": 3.866912466961331e-08, "loss": 3.7479, "step": 3109500 }, { "epoch": 34.54, "learning_rate": 3.865524287586345e-08, "loss": 3.7465, "step": 3110000 }, { "epoch": 34.54, "learning_rate": 3.8641361082113585e-08, "loss": 3.7474, "step": 3110500 }, { "epoch": 34.55, "learning_rate": 3.862747928836372e-08, "loss": 3.7562, "step": 3111000 }, { "epoch": 34.55, "learning_rate": 3.861359749461386e-08, "loss": 3.7551, "step": 3111500 }, { "epoch": 34.56, "learning_rate": 3.8599715700864e-08, "loss": 3.7683, "step": 3112000 }, { "epoch": 34.57, "learning_rate": 3.858583390711414e-08, "loss": 3.7422, "step": 3112500 }, { "epoch": 34.57, "learning_rate": 3.857195211336428e-08, "loss": 3.7582, "step": 3113000 }, { "epoch": 34.58, "learning_rate": 3.855807031961442e-08, "loss": 3.7583, "step": 3113500 }, { "epoch": 34.58, "learning_rate": 3.8544188525864554e-08, "loss": 3.764, "step": 3114000 }, { "epoch": 34.59, "learning_rate": 3.85303067321147e-08, "loss": 3.7664, "step": 3114500 }, { "epoch": 34.59, "learning_rate": 3.851642493836483e-08, "loss": 3.7628, "step": 3115000 }, { "epoch": 34.6, "learning_rate": 3.8502543144614975e-08, "loss": 3.7654, "step": 3115500 }, { "epoch": 34.6, "learning_rate": 3.848866135086511e-08, "loss": 3.7717, "step": 3116000 }, { "epoch": 34.61, "learning_rate": 3.847477955711525e-08, "loss": 3.755, "step": 3116500 }, { "epoch": 34.62, "learning_rate": 3.846089776336539e-08, "loss": 3.768, "step": 3117000 }, { "epoch": 34.62, "learning_rate": 3.8447015969615524e-08, "loss": 3.7646, "step": 3117500 }, { "epoch": 34.63, "learning_rate": 3.8433134175865666e-08, "loss": 3.7688, "step": 3118000 }, { "epoch": 34.63, "learning_rate": 3.841925238211581e-08, "loss": 3.7484, "step": 3118500 }, { "epoch": 34.64, "learning_rate": 3.8405370588365944e-08, "loss": 3.7559, "step": 3119000 }, { "epoch": 34.64, "learning_rate": 3.8391488794616086e-08, "loss": 3.7616, "step": 3119500 }, { "epoch": 34.65, "learning_rate": 3.837760700086622e-08, "loss": 3.763, "step": 3120000 }, { "epoch": 34.65, "learning_rate": 3.836372520711636e-08, "loss": 3.7592, "step": 3120500 }, { "epoch": 34.66, "learning_rate": 3.83498434133665e-08, "loss": 3.7625, "step": 3121000 }, { "epoch": 34.67, "learning_rate": 3.833596161961664e-08, "loss": 3.7424, "step": 3121500 }, { "epoch": 34.67, "learning_rate": 3.832207982586678e-08, "loss": 3.7649, "step": 3122000 }, { "epoch": 34.68, "learning_rate": 3.830819803211692e-08, "loss": 3.7609, "step": 3122500 }, { "epoch": 34.68, "learning_rate": 3.8294316238367056e-08, "loss": 3.767, "step": 3123000 }, { "epoch": 34.69, "learning_rate": 3.828043444461719e-08, "loss": 3.752, "step": 3123500 }, { "epoch": 34.69, "learning_rate": 3.8266552650867334e-08, "loss": 3.7593, "step": 3124000 }, { "epoch": 34.7, "learning_rate": 3.825267085711747e-08, "loss": 3.7542, "step": 3124500 }, { "epoch": 34.7, "learning_rate": 3.823878906336761e-08, "loss": 3.7822, "step": 3125000 }, { "epoch": 34.71, "learning_rate": 3.8224907269617754e-08, "loss": 3.7662, "step": 3125500 }, { "epoch": 34.72, "learning_rate": 3.821102547586789e-08, "loss": 3.7438, "step": 3126000 }, { "epoch": 34.72, "learning_rate": 3.8197143682118025e-08, "loss": 3.7587, "step": 3126500 }, { "epoch": 34.73, "learning_rate": 3.818326188836817e-08, "loss": 3.7748, "step": 3127000 }, { "epoch": 34.73, "learning_rate": 3.81693800946183e-08, "loss": 3.7873, "step": 3127500 }, { "epoch": 34.74, "learning_rate": 3.8155498300868445e-08, "loss": 3.7582, "step": 3128000 }, { "epoch": 34.74, "learning_rate": 3.814161650711859e-08, "loss": 3.7499, "step": 3128500 }, { "epoch": 34.75, "learning_rate": 3.812773471336872e-08, "loss": 3.7235, "step": 3129000 }, { "epoch": 34.75, "learning_rate": 3.811385291961886e-08, "loss": 3.7482, "step": 3129500 }, { "epoch": 34.76, "learning_rate": 3.8099971125868994e-08, "loss": 3.7629, "step": 3130000 }, { "epoch": 34.77, "learning_rate": 3.808608933211914e-08, "loss": 3.7643, "step": 3130500 }, { "epoch": 34.77, "learning_rate": 3.807220753836928e-08, "loss": 3.751, "step": 3131000 }, { "epoch": 34.78, "learning_rate": 3.8058325744619415e-08, "loss": 3.7472, "step": 3131500 }, { "epoch": 34.78, "learning_rate": 3.804444395086956e-08, "loss": 3.7704, "step": 3132000 }, { "epoch": 34.79, "learning_rate": 3.803056215711969e-08, "loss": 3.7546, "step": 3132500 }, { "epoch": 34.79, "learning_rate": 3.801668036336983e-08, "loss": 3.7607, "step": 3133000 }, { "epoch": 34.8, "learning_rate": 3.800279856961997e-08, "loss": 3.7579, "step": 3133500 }, { "epoch": 34.8, "learning_rate": 3.798891677587011e-08, "loss": 3.7418, "step": 3134000 }, { "epoch": 34.81, "learning_rate": 3.797503498212025e-08, "loss": 3.765, "step": 3134500 }, { "epoch": 34.82, "learning_rate": 3.7961153188370384e-08, "loss": 3.7546, "step": 3135000 }, { "epoch": 34.82, "learning_rate": 3.7947271394620526e-08, "loss": 3.7518, "step": 3135500 }, { "epoch": 34.83, "learning_rate": 3.793338960087066e-08, "loss": 3.7348, "step": 3136000 }, { "epoch": 34.83, "learning_rate": 3.7919507807120804e-08, "loss": 3.7574, "step": 3136500 }, { "epoch": 34.84, "learning_rate": 3.790562601337094e-08, "loss": 3.7515, "step": 3137000 }, { "epoch": 34.84, "learning_rate": 3.789174421962108e-08, "loss": 3.7448, "step": 3137500 }, { "epoch": 34.85, "learning_rate": 3.787786242587122e-08, "loss": 3.7546, "step": 3138000 }, { "epoch": 34.85, "learning_rate": 3.7863980632121353e-08, "loss": 3.7513, "step": 3138500 }, { "epoch": 34.86, "learning_rate": 3.7850098838371496e-08, "loss": 3.7429, "step": 3139000 }, { "epoch": 34.87, "learning_rate": 3.783621704462164e-08, "loss": 3.7597, "step": 3139500 }, { "epoch": 34.87, "learning_rate": 3.7822335250871774e-08, "loss": 3.7675, "step": 3140000 }, { "epoch": 34.88, "learning_rate": 3.7808453457121916e-08, "loss": 3.7708, "step": 3140500 }, { "epoch": 34.88, "learning_rate": 3.779457166337205e-08, "loss": 3.7486, "step": 3141000 }, { "epoch": 34.89, "learning_rate": 3.778068986962219e-08, "loss": 3.7719, "step": 3141500 }, { "epoch": 34.89, "learning_rate": 3.776680807587233e-08, "loss": 3.7507, "step": 3142000 }, { "epoch": 34.9, "learning_rate": 3.775292628212247e-08, "loss": 3.7551, "step": 3142500 }, { "epoch": 34.9, "learning_rate": 3.773904448837261e-08, "loss": 3.7521, "step": 3143000 }, { "epoch": 34.91, "learning_rate": 3.772516269462275e-08, "loss": 3.7683, "step": 3143500 }, { "epoch": 34.92, "learning_rate": 3.7711280900872885e-08, "loss": 3.7665, "step": 3144000 }, { "epoch": 34.92, "learning_rate": 3.769739910712302e-08, "loss": 3.7494, "step": 3144500 }, { "epoch": 34.93, "learning_rate": 3.768351731337316e-08, "loss": 3.7558, "step": 3145000 }, { "epoch": 34.93, "learning_rate": 3.76696355196233e-08, "loss": 3.7423, "step": 3145500 }, { "epoch": 34.94, "learning_rate": 3.765575372587344e-08, "loss": 3.7395, "step": 3146000 }, { "epoch": 34.94, "learning_rate": 3.764187193212358e-08, "loss": 3.7369, "step": 3146500 }, { "epoch": 34.95, "learning_rate": 3.762799013837372e-08, "loss": 3.7865, "step": 3147000 }, { "epoch": 34.95, "learning_rate": 3.7614108344623855e-08, "loss": 3.7693, "step": 3147500 }, { "epoch": 34.96, "learning_rate": 3.7600226550874e-08, "loss": 3.7596, "step": 3148000 }, { "epoch": 34.97, "learning_rate": 3.758634475712413e-08, "loss": 3.7511, "step": 3148500 }, { "epoch": 34.97, "learning_rate": 3.7572462963374275e-08, "loss": 3.7729, "step": 3149000 }, { "epoch": 34.98, "learning_rate": 3.755858116962442e-08, "loss": 3.7677, "step": 3149500 }, { "epoch": 34.98, "learning_rate": 3.754469937587455e-08, "loss": 3.7457, "step": 3150000 }, { "epoch": 34.99, "learning_rate": 3.753081758212469e-08, "loss": 3.7405, "step": 3150500 }, { "epoch": 34.99, "learning_rate": 3.751693578837483e-08, "loss": 3.7475, "step": 3151000 }, { "epoch": 35.0, "learning_rate": 3.7503053994624966e-08, "loss": 3.7551, "step": 3151500 }, { "epoch": 35.0, "eval_loss": 3.830213785171509, "eval_runtime": 6.3055, "eval_samples_per_second": 246.453, "step": 3151610 }, { "epoch": 35.0, "learning_rate": 3.748917220087511e-08, "loss": 3.7463, "step": 3152000 }, { "epoch": 35.01, "learning_rate": 3.7475290407125244e-08, "loss": 3.7487, "step": 3152500 }, { "epoch": 35.02, "learning_rate": 3.7461408613375386e-08, "loss": 3.7578, "step": 3153000 }, { "epoch": 35.02, "learning_rate": 3.744752681962552e-08, "loss": 3.7618, "step": 3153500 }, { "epoch": 35.03, "learning_rate": 3.743364502587566e-08, "loss": 3.7607, "step": 3154000 }, { "epoch": 35.03, "learning_rate": 3.74197632321258e-08, "loss": 3.7737, "step": 3154500 }, { "epoch": 35.04, "learning_rate": 3.740588143837594e-08, "loss": 3.7488, "step": 3155000 }, { "epoch": 35.04, "learning_rate": 3.739199964462608e-08, "loss": 3.7399, "step": 3155500 }, { "epoch": 35.05, "learning_rate": 3.737811785087622e-08, "loss": 3.7491, "step": 3156000 }, { "epoch": 35.05, "learning_rate": 3.7364236057126356e-08, "loss": 3.7565, "step": 3156500 }, { "epoch": 35.06, "learning_rate": 3.735035426337649e-08, "loss": 3.756, "step": 3157000 }, { "epoch": 35.07, "learning_rate": 3.7336472469626634e-08, "loss": 3.7755, "step": 3157500 }, { "epoch": 35.07, "learning_rate": 3.7322590675876776e-08, "loss": 3.7493, "step": 3158000 }, { "epoch": 35.08, "learning_rate": 3.730870888212691e-08, "loss": 3.7618, "step": 3158500 }, { "epoch": 35.08, "learning_rate": 3.7294827088377054e-08, "loss": 3.7549, "step": 3159000 }, { "epoch": 35.09, "learning_rate": 3.728094529462719e-08, "loss": 3.7502, "step": 3159500 }, { "epoch": 35.09, "learning_rate": 3.7267063500877325e-08, "loss": 3.7477, "step": 3160000 }, { "epoch": 35.1, "learning_rate": 3.725318170712747e-08, "loss": 3.734, "step": 3160500 }, { "epoch": 35.1, "learning_rate": 3.72392999133776e-08, "loss": 3.741, "step": 3161000 }, { "epoch": 35.11, "learning_rate": 3.7225418119627745e-08, "loss": 3.7469, "step": 3161500 }, { "epoch": 35.12, "learning_rate": 3.721153632587789e-08, "loss": 3.7429, "step": 3162000 }, { "epoch": 35.12, "learning_rate": 3.719765453212802e-08, "loss": 3.7518, "step": 3162500 }, { "epoch": 35.13, "learning_rate": 3.718377273837816e-08, "loss": 3.7553, "step": 3163000 }, { "epoch": 35.13, "learning_rate": 3.71698909446283e-08, "loss": 3.7537, "step": 3163500 }, { "epoch": 35.14, "learning_rate": 3.715600915087844e-08, "loss": 3.7698, "step": 3164000 }, { "epoch": 35.14, "learning_rate": 3.714212735712858e-08, "loss": 3.7763, "step": 3164500 }, { "epoch": 35.15, "learning_rate": 3.712824556337872e-08, "loss": 3.7581, "step": 3165000 }, { "epoch": 35.15, "learning_rate": 3.711436376962885e-08, "loss": 3.773, "step": 3165500 }, { "epoch": 35.16, "learning_rate": 3.710048197587899e-08, "loss": 3.7381, "step": 3166000 }, { "epoch": 35.17, "learning_rate": 3.708660018212913e-08, "loss": 3.7466, "step": 3166500 }, { "epoch": 35.17, "learning_rate": 3.707271838837927e-08, "loss": 3.7495, "step": 3167000 }, { "epoch": 35.18, "learning_rate": 3.705883659462941e-08, "loss": 3.7502, "step": 3167500 }, { "epoch": 35.18, "learning_rate": 3.704495480087955e-08, "loss": 3.7641, "step": 3168000 }, { "epoch": 35.19, "learning_rate": 3.7031073007129684e-08, "loss": 3.7522, "step": 3168500 }, { "epoch": 35.19, "learning_rate": 3.7017191213379826e-08, "loss": 3.759, "step": 3169000 }, { "epoch": 35.2, "learning_rate": 3.700330941962996e-08, "loss": 3.7438, "step": 3169500 }, { "epoch": 35.2, "learning_rate": 3.6989427625880104e-08, "loss": 3.7538, "step": 3170000 }, { "epoch": 35.21, "learning_rate": 3.6975545832130247e-08, "loss": 3.7502, "step": 3170500 }, { "epoch": 35.22, "learning_rate": 3.696166403838038e-08, "loss": 3.7547, "step": 3171000 }, { "epoch": 35.22, "learning_rate": 3.694778224463052e-08, "loss": 3.7578, "step": 3171500 }, { "epoch": 35.23, "learning_rate": 3.693390045088066e-08, "loss": 3.7497, "step": 3172000 }, { "epoch": 35.23, "learning_rate": 3.6920018657130796e-08, "loss": 3.7629, "step": 3172500 }, { "epoch": 35.24, "learning_rate": 3.690613686338094e-08, "loss": 3.7435, "step": 3173000 }, { "epoch": 35.24, "learning_rate": 3.6892255069631074e-08, "loss": 3.7534, "step": 3173500 }, { "epoch": 35.25, "learning_rate": 3.6878373275881216e-08, "loss": 3.7687, "step": 3174000 }, { "epoch": 35.25, "learning_rate": 3.686449148213135e-08, "loss": 3.7672, "step": 3174500 }, { "epoch": 35.26, "learning_rate": 3.685060968838149e-08, "loss": 3.7835, "step": 3175000 }, { "epoch": 35.27, "learning_rate": 3.683672789463163e-08, "loss": 3.7439, "step": 3175500 }, { "epoch": 35.27, "learning_rate": 3.682284610088177e-08, "loss": 3.744, "step": 3176000 }, { "epoch": 35.28, "learning_rate": 3.680896430713191e-08, "loss": 3.7513, "step": 3176500 }, { "epoch": 35.28, "learning_rate": 3.679508251338205e-08, "loss": 3.7625, "step": 3177000 }, { "epoch": 35.29, "learning_rate": 3.6781200719632185e-08, "loss": 3.7385, "step": 3177500 }, { "epoch": 35.29, "learning_rate": 3.676731892588232e-08, "loss": 3.7522, "step": 3178000 }, { "epoch": 35.3, "learning_rate": 3.675343713213246e-08, "loss": 3.7561, "step": 3178500 }, { "epoch": 35.3, "learning_rate": 3.6739555338382605e-08, "loss": 3.7456, "step": 3179000 }, { "epoch": 35.31, "learning_rate": 3.672567354463274e-08, "loss": 3.7454, "step": 3179500 }, { "epoch": 35.32, "learning_rate": 3.6711791750882883e-08, "loss": 3.7511, "step": 3180000 }, { "epoch": 35.32, "learning_rate": 3.669790995713302e-08, "loss": 3.7618, "step": 3180500 }, { "epoch": 35.33, "learning_rate": 3.6684028163383155e-08, "loss": 3.7713, "step": 3181000 }, { "epoch": 35.33, "learning_rate": 3.66701463696333e-08, "loss": 3.7798, "step": 3181500 }, { "epoch": 35.34, "learning_rate": 3.665626457588343e-08, "loss": 3.7554, "step": 3182000 }, { "epoch": 35.34, "learning_rate": 3.6642382782133575e-08, "loss": 3.7655, "step": 3182500 }, { "epoch": 35.35, "learning_rate": 3.662850098838372e-08, "loss": 3.7565, "step": 3183000 }, { "epoch": 35.35, "learning_rate": 3.661461919463385e-08, "loss": 3.7595, "step": 3183500 }, { "epoch": 35.36, "learning_rate": 3.660073740088399e-08, "loss": 3.764, "step": 3184000 }, { "epoch": 35.37, "learning_rate": 3.658685560713413e-08, "loss": 3.7733, "step": 3184500 }, { "epoch": 35.37, "learning_rate": 3.6572973813384266e-08, "loss": 3.7582, "step": 3185000 }, { "epoch": 35.38, "learning_rate": 3.655909201963441e-08, "loss": 3.7737, "step": 3185500 }, { "epoch": 35.38, "learning_rate": 3.654521022588455e-08, "loss": 3.7383, "step": 3186000 }, { "epoch": 35.39, "learning_rate": 3.6531328432134686e-08, "loss": 3.7705, "step": 3186500 }, { "epoch": 35.39, "learning_rate": 3.651744663838482e-08, "loss": 3.7517, "step": 3187000 }, { "epoch": 35.4, "learning_rate": 3.6503564844634964e-08, "loss": 3.7594, "step": 3187500 }, { "epoch": 35.4, "learning_rate": 3.64896830508851e-08, "loss": 3.7564, "step": 3188000 }, { "epoch": 35.41, "learning_rate": 3.647580125713524e-08, "loss": 3.7401, "step": 3188500 }, { "epoch": 35.42, "learning_rate": 3.646191946338538e-08, "loss": 3.7408, "step": 3189000 }, { "epoch": 35.42, "learning_rate": 3.644803766963552e-08, "loss": 3.7641, "step": 3189500 }, { "epoch": 35.43, "learning_rate": 3.6434155875885656e-08, "loss": 3.7475, "step": 3190000 }, { "epoch": 35.43, "learning_rate": 3.642027408213579e-08, "loss": 3.7772, "step": 3190500 }, { "epoch": 35.44, "learning_rate": 3.6406392288385934e-08, "loss": 3.7566, "step": 3191000 }, { "epoch": 35.44, "learning_rate": 3.6392510494636076e-08, "loss": 3.7662, "step": 3191500 }, { "epoch": 35.45, "learning_rate": 3.637862870088621e-08, "loss": 3.7541, "step": 3192000 }, { "epoch": 35.45, "learning_rate": 3.6364746907136354e-08, "loss": 3.77, "step": 3192500 }, { "epoch": 35.46, "learning_rate": 3.635086511338649e-08, "loss": 3.7598, "step": 3193000 }, { "epoch": 35.47, "learning_rate": 3.6336983319636625e-08, "loss": 3.761, "step": 3193500 }, { "epoch": 35.47, "learning_rate": 3.632310152588677e-08, "loss": 3.7398, "step": 3194000 }, { "epoch": 35.48, "learning_rate": 3.630921973213691e-08, "loss": 3.748, "step": 3194500 }, { "epoch": 35.48, "learning_rate": 3.6295337938387045e-08, "loss": 3.753, "step": 3195000 }, { "epoch": 35.49, "learning_rate": 3.628145614463719e-08, "loss": 3.769, "step": 3195500 }, { "epoch": 35.49, "learning_rate": 3.626757435088732e-08, "loss": 3.7737, "step": 3196000 }, { "epoch": 35.5, "learning_rate": 3.625369255713746e-08, "loss": 3.7464, "step": 3196500 }, { "epoch": 35.5, "learning_rate": 3.62398107633876e-08, "loss": 3.7455, "step": 3197000 }, { "epoch": 35.51, "learning_rate": 3.622592896963774e-08, "loss": 3.7634, "step": 3197500 }, { "epoch": 35.52, "learning_rate": 3.621204717588788e-08, "loss": 3.7662, "step": 3198000 }, { "epoch": 35.52, "learning_rate": 3.619816538213802e-08, "loss": 3.7411, "step": 3198500 }, { "epoch": 35.53, "learning_rate": 3.618428358838815e-08, "loss": 3.7484, "step": 3199000 }, { "epoch": 35.53, "learning_rate": 3.617040179463829e-08, "loss": 3.7596, "step": 3199500 }, { "epoch": 35.54, "learning_rate": 3.6156520000888435e-08, "loss": 3.748, "step": 3200000 }, { "epoch": 35.54, "learning_rate": 3.614263820713857e-08, "loss": 3.7569, "step": 3200500 }, { "epoch": 35.55, "learning_rate": 3.612875641338871e-08, "loss": 3.7558, "step": 3201000 }, { "epoch": 35.55, "learning_rate": 3.6114874619638855e-08, "loss": 3.7606, "step": 3201500 }, { "epoch": 35.56, "learning_rate": 3.6100992825888984e-08, "loss": 3.7583, "step": 3202000 }, { "epoch": 35.57, "learning_rate": 3.6087111032139126e-08, "loss": 3.7683, "step": 3202500 }, { "epoch": 35.57, "learning_rate": 3.607322923838926e-08, "loss": 3.7568, "step": 3203000 }, { "epoch": 35.58, "learning_rate": 3.6059347444639404e-08, "loss": 3.769, "step": 3203500 }, { "epoch": 35.58, "learning_rate": 3.6045465650889547e-08, "loss": 3.7725, "step": 3204000 }, { "epoch": 35.59, "learning_rate": 3.603158385713968e-08, "loss": 3.757, "step": 3204500 }, { "epoch": 35.59, "learning_rate": 3.601770206338982e-08, "loss": 3.7577, "step": 3205000 }, { "epoch": 35.6, "learning_rate": 3.600382026963996e-08, "loss": 3.76, "step": 3205500 }, { "epoch": 35.6, "learning_rate": 3.5989938475890096e-08, "loss": 3.7528, "step": 3206000 }, { "epoch": 35.61, "learning_rate": 3.597605668214024e-08, "loss": 3.7507, "step": 3206500 }, { "epoch": 35.62, "learning_rate": 3.596217488839038e-08, "loss": 3.7656, "step": 3207000 }, { "epoch": 35.62, "learning_rate": 3.5948293094640516e-08, "loss": 3.7352, "step": 3207500 }, { "epoch": 35.63, "learning_rate": 3.593441130089065e-08, "loss": 3.759, "step": 3208000 }, { "epoch": 35.63, "learning_rate": 3.5920529507140794e-08, "loss": 3.7655, "step": 3208500 }, { "epoch": 35.64, "learning_rate": 3.590664771339093e-08, "loss": 3.754, "step": 3209000 }, { "epoch": 35.64, "learning_rate": 3.589276591964107e-08, "loss": 3.73, "step": 3209500 }, { "epoch": 35.65, "learning_rate": 3.5878884125891214e-08, "loss": 3.747, "step": 3210000 }, { "epoch": 35.65, "learning_rate": 3.586500233214135e-08, "loss": 3.7393, "step": 3210500 }, { "epoch": 35.66, "learning_rate": 3.5851120538391485e-08, "loss": 3.7643, "step": 3211000 }, { "epoch": 35.67, "learning_rate": 3.583723874464162e-08, "loss": 3.7619, "step": 3211500 }, { "epoch": 35.67, "learning_rate": 3.582335695089176e-08, "loss": 3.7634, "step": 3212000 }, { "epoch": 35.68, "learning_rate": 3.5809475157141906e-08, "loss": 3.7449, "step": 3212500 }, { "epoch": 35.68, "learning_rate": 3.579559336339204e-08, "loss": 3.7526, "step": 3213000 }, { "epoch": 35.69, "learning_rate": 3.5781711569642183e-08, "loss": 3.7451, "step": 3213500 }, { "epoch": 35.69, "learning_rate": 3.576782977589232e-08, "loss": 3.7564, "step": 3214000 }, { "epoch": 35.7, "learning_rate": 3.5753947982142455e-08, "loss": 3.7436, "step": 3214500 }, { "epoch": 35.7, "learning_rate": 3.57400661883926e-08, "loss": 3.7375, "step": 3215000 }, { "epoch": 35.71, "learning_rate": 3.572618439464274e-08, "loss": 3.7577, "step": 3215500 }, { "epoch": 35.72, "learning_rate": 3.5712302600892875e-08, "loss": 3.7375, "step": 3216000 }, { "epoch": 35.72, "learning_rate": 3.569842080714302e-08, "loss": 3.7672, "step": 3216500 }, { "epoch": 35.73, "learning_rate": 3.568453901339315e-08, "loss": 3.7586, "step": 3217000 }, { "epoch": 35.73, "learning_rate": 3.567065721964329e-08, "loss": 3.7487, "step": 3217500 }, { "epoch": 35.74, "learning_rate": 3.565677542589343e-08, "loss": 3.7439, "step": 3218000 }, { "epoch": 35.74, "learning_rate": 3.5642893632143566e-08, "loss": 3.7588, "step": 3218500 }, { "epoch": 35.75, "learning_rate": 3.562901183839371e-08, "loss": 3.7543, "step": 3219000 }, { "epoch": 35.75, "learning_rate": 3.561513004464385e-08, "loss": 3.7582, "step": 3219500 }, { "epoch": 35.76, "learning_rate": 3.5601248250893987e-08, "loss": 3.7641, "step": 3220000 }, { "epoch": 35.77, "learning_rate": 3.558736645714412e-08, "loss": 3.7602, "step": 3220500 }, { "epoch": 35.77, "learning_rate": 3.5573484663394264e-08, "loss": 3.7685, "step": 3221000 }, { "epoch": 35.78, "learning_rate": 3.55596028696444e-08, "loss": 3.7328, "step": 3221500 }, { "epoch": 35.78, "learning_rate": 3.554572107589454e-08, "loss": 3.7602, "step": 3222000 }, { "epoch": 35.79, "learning_rate": 3.5531839282144685e-08, "loss": 3.742, "step": 3222500 }, { "epoch": 35.79, "learning_rate": 3.551795748839482e-08, "loss": 3.7656, "step": 3223000 }, { "epoch": 35.8, "learning_rate": 3.5504075694644956e-08, "loss": 3.7798, "step": 3223500 }, { "epoch": 35.8, "learning_rate": 3.54901939008951e-08, "loss": 3.7635, "step": 3224000 }, { "epoch": 35.81, "learning_rate": 3.5476312107145234e-08, "loss": 3.7718, "step": 3224500 }, { "epoch": 35.82, "learning_rate": 3.5462430313395376e-08, "loss": 3.7622, "step": 3225000 }, { "epoch": 35.82, "learning_rate": 3.544854851964551e-08, "loss": 3.7606, "step": 3225500 }, { "epoch": 35.83, "learning_rate": 3.5434666725895654e-08, "loss": 3.7641, "step": 3226000 }, { "epoch": 35.83, "learning_rate": 3.542078493214579e-08, "loss": 3.7644, "step": 3226500 }, { "epoch": 35.84, "learning_rate": 3.5406903138395925e-08, "loss": 3.7601, "step": 3227000 }, { "epoch": 35.84, "learning_rate": 3.539302134464607e-08, "loss": 3.7502, "step": 3227500 }, { "epoch": 35.85, "learning_rate": 3.537913955089621e-08, "loss": 3.7508, "step": 3228000 }, { "epoch": 35.85, "learning_rate": 3.5365257757146345e-08, "loss": 3.7565, "step": 3228500 }, { "epoch": 35.86, "learning_rate": 3.535137596339649e-08, "loss": 3.76, "step": 3229000 }, { "epoch": 35.87, "learning_rate": 3.5337494169646623e-08, "loss": 3.7603, "step": 3229500 }, { "epoch": 35.87, "learning_rate": 3.532361237589676e-08, "loss": 3.7698, "step": 3230000 }, { "epoch": 35.88, "learning_rate": 3.53097305821469e-08, "loss": 3.7701, "step": 3230500 }, { "epoch": 35.88, "learning_rate": 3.5295848788397044e-08, "loss": 3.7565, "step": 3231000 }, { "epoch": 35.89, "learning_rate": 3.528196699464718e-08, "loss": 3.7539, "step": 3231500 }, { "epoch": 35.89, "learning_rate": 3.526808520089732e-08, "loss": 3.7697, "step": 3232000 }, { "epoch": 35.9, "learning_rate": 3.525420340714745e-08, "loss": 3.7601, "step": 3232500 }, { "epoch": 35.9, "learning_rate": 3.524032161339759e-08, "loss": 3.7525, "step": 3233000 }, { "epoch": 35.91, "learning_rate": 3.5226439819647735e-08, "loss": 3.7544, "step": 3233500 }, { "epoch": 35.91, "learning_rate": 3.521255802589787e-08, "loss": 3.773, "step": 3234000 }, { "epoch": 35.92, "learning_rate": 3.519867623214801e-08, "loss": 3.7572, "step": 3234500 }, { "epoch": 35.93, "learning_rate": 3.5184794438398155e-08, "loss": 3.7558, "step": 3235000 }, { "epoch": 35.93, "learning_rate": 3.5170912644648284e-08, "loss": 3.756, "step": 3235500 }, { "epoch": 35.94, "learning_rate": 3.5157030850898427e-08, "loss": 3.7628, "step": 3236000 }, { "epoch": 35.94, "learning_rate": 3.514314905714857e-08, "loss": 3.7487, "step": 3236500 }, { "epoch": 35.95, "learning_rate": 3.5129267263398704e-08, "loss": 3.7499, "step": 3237000 }, { "epoch": 35.95, "learning_rate": 3.5115385469648847e-08, "loss": 3.7341, "step": 3237500 }, { "epoch": 35.96, "learning_rate": 3.510150367589899e-08, "loss": 3.7722, "step": 3238000 }, { "epoch": 35.96, "learning_rate": 3.508762188214912e-08, "loss": 3.7618, "step": 3238500 }, { "epoch": 35.97, "learning_rate": 3.507374008839926e-08, "loss": 3.7518, "step": 3239000 }, { "epoch": 35.98, "learning_rate": 3.50598582946494e-08, "loss": 3.7574, "step": 3239500 }, { "epoch": 35.98, "learning_rate": 3.504597650089954e-08, "loss": 3.7626, "step": 3240000 }, { "epoch": 35.99, "learning_rate": 3.503209470714968e-08, "loss": 3.7609, "step": 3240500 }, { "epoch": 35.99, "learning_rate": 3.5018212913399816e-08, "loss": 3.7792, "step": 3241000 }, { "epoch": 36.0, "learning_rate": 3.500433111964995e-08, "loss": 3.7314, "step": 3241500 }, { "epoch": 36.0, "eval_loss": 3.829056978225708, "eval_runtime": 6.3058, "eval_samples_per_second": 246.44, "step": 3241656 }, { "epoch": 36.0, "learning_rate": 3.4990449325900094e-08, "loss": 3.7589, "step": 3242000 }, { "epoch": 36.01, "learning_rate": 3.497656753215023e-08, "loss": 3.7573, "step": 3242500 }, { "epoch": 36.01, "learning_rate": 3.496268573840037e-08, "loss": 3.755, "step": 3243000 }, { "epoch": 36.02, "learning_rate": 3.4948803944650514e-08, "loss": 3.7551, "step": 3243500 }, { "epoch": 36.03, "learning_rate": 3.493492215090065e-08, "loss": 3.7473, "step": 3244000 }, { "epoch": 36.03, "learning_rate": 3.4921040357150785e-08, "loss": 3.7427, "step": 3244500 }, { "epoch": 36.04, "learning_rate": 3.490715856340093e-08, "loss": 3.7479, "step": 3245000 }, { "epoch": 36.04, "learning_rate": 3.4893276769651063e-08, "loss": 3.7498, "step": 3245500 }, { "epoch": 36.05, "learning_rate": 3.4879394975901206e-08, "loss": 3.76, "step": 3246000 }, { "epoch": 36.05, "learning_rate": 3.486551318215135e-08, "loss": 3.7631, "step": 3246500 }, { "epoch": 36.06, "learning_rate": 3.4851631388401484e-08, "loss": 3.7717, "step": 3247000 }, { "epoch": 36.06, "learning_rate": 3.483774959465162e-08, "loss": 3.7592, "step": 3247500 }, { "epoch": 36.07, "learning_rate": 3.4823867800901755e-08, "loss": 3.7684, "step": 3248000 }, { "epoch": 36.08, "learning_rate": 3.48099860071519e-08, "loss": 3.7549, "step": 3248500 }, { "epoch": 36.08, "learning_rate": 3.479610421340204e-08, "loss": 3.7557, "step": 3249000 }, { "epoch": 36.09, "learning_rate": 3.4782222419652175e-08, "loss": 3.7463, "step": 3249500 }, { "epoch": 36.09, "learning_rate": 3.476834062590232e-08, "loss": 3.7789, "step": 3250000 }, { "epoch": 36.1, "learning_rate": 3.475445883215245e-08, "loss": 3.7622, "step": 3250500 }, { "epoch": 36.1, "learning_rate": 3.474057703840259e-08, "loss": 3.7508, "step": 3251000 }, { "epoch": 36.11, "learning_rate": 3.472669524465273e-08, "loss": 3.764, "step": 3251500 }, { "epoch": 36.11, "learning_rate": 3.471281345090287e-08, "loss": 3.7582, "step": 3252000 }, { "epoch": 36.12, "learning_rate": 3.469893165715301e-08, "loss": 3.747, "step": 3252500 }, { "epoch": 36.13, "learning_rate": 3.468504986340315e-08, "loss": 3.7286, "step": 3253000 }, { "epoch": 36.13, "learning_rate": 3.4671168069653287e-08, "loss": 3.7528, "step": 3253500 }, { "epoch": 36.14, "learning_rate": 3.465728627590342e-08, "loss": 3.7389, "step": 3254000 }, { "epoch": 36.14, "learning_rate": 3.4643404482153565e-08, "loss": 3.7494, "step": 3254500 }, { "epoch": 36.15, "learning_rate": 3.46295226884037e-08, "loss": 3.7599, "step": 3255000 }, { "epoch": 36.15, "learning_rate": 3.461564089465384e-08, "loss": 3.7641, "step": 3255500 }, { "epoch": 36.16, "learning_rate": 3.4601759100903985e-08, "loss": 3.7531, "step": 3256000 }, { "epoch": 36.16, "learning_rate": 3.458787730715412e-08, "loss": 3.7652, "step": 3256500 }, { "epoch": 36.17, "learning_rate": 3.4573995513404256e-08, "loss": 3.76, "step": 3257000 }, { "epoch": 36.18, "learning_rate": 3.45601137196544e-08, "loss": 3.7339, "step": 3257500 }, { "epoch": 36.18, "learning_rate": 3.4546231925904534e-08, "loss": 3.7297, "step": 3258000 }, { "epoch": 36.19, "learning_rate": 3.4532350132154676e-08, "loss": 3.7563, "step": 3258500 }, { "epoch": 36.19, "learning_rate": 3.451846833840482e-08, "loss": 3.7446, "step": 3259000 }, { "epoch": 36.2, "learning_rate": 3.4504586544654954e-08, "loss": 3.761, "step": 3259500 }, { "epoch": 36.2, "learning_rate": 3.449070475090509e-08, "loss": 3.7579, "step": 3260000 }, { "epoch": 36.21, "learning_rate": 3.447682295715523e-08, "loss": 3.7581, "step": 3260500 }, { "epoch": 36.21, "learning_rate": 3.446294116340537e-08, "loss": 3.7704, "step": 3261000 }, { "epoch": 36.22, "learning_rate": 3.444905936965551e-08, "loss": 3.754, "step": 3261500 }, { "epoch": 36.23, "learning_rate": 3.4435177575905646e-08, "loss": 3.7573, "step": 3262000 }, { "epoch": 36.23, "learning_rate": 3.442129578215579e-08, "loss": 3.7276, "step": 3262500 }, { "epoch": 36.24, "learning_rate": 3.4407413988405923e-08, "loss": 3.7663, "step": 3263000 }, { "epoch": 36.24, "learning_rate": 3.439353219465606e-08, "loss": 3.7356, "step": 3263500 }, { "epoch": 36.25, "learning_rate": 3.43796504009062e-08, "loss": 3.7452, "step": 3264000 }, { "epoch": 36.25, "learning_rate": 3.4365768607156344e-08, "loss": 3.756, "step": 3264500 }, { "epoch": 36.26, "learning_rate": 3.435188681340648e-08, "loss": 3.7943, "step": 3265000 }, { "epoch": 36.26, "learning_rate": 3.433800501965662e-08, "loss": 3.7502, "step": 3265500 }, { "epoch": 36.27, "learning_rate": 3.432412322590676e-08, "loss": 3.7672, "step": 3266000 }, { "epoch": 36.28, "learning_rate": 3.431024143215689e-08, "loss": 3.7641, "step": 3266500 }, { "epoch": 36.28, "learning_rate": 3.4296359638407035e-08, "loss": 3.7484, "step": 3267000 }, { "epoch": 36.29, "learning_rate": 3.428247784465718e-08, "loss": 3.7545, "step": 3267500 }, { "epoch": 36.29, "learning_rate": 3.426859605090731e-08, "loss": 3.7515, "step": 3268000 }, { "epoch": 36.3, "learning_rate": 3.4254714257157455e-08, "loss": 3.7541, "step": 3268500 }, { "epoch": 36.3, "learning_rate": 3.4240832463407584e-08, "loss": 3.7685, "step": 3269000 }, { "epoch": 36.31, "learning_rate": 3.4226950669657727e-08, "loss": 3.7552, "step": 3269500 }, { "epoch": 36.31, "learning_rate": 3.421306887590787e-08, "loss": 3.7673, "step": 3270000 }, { "epoch": 36.32, "learning_rate": 3.4199187082158004e-08, "loss": 3.7421, "step": 3270500 }, { "epoch": 36.33, "learning_rate": 3.418530528840815e-08, "loss": 3.7581, "step": 3271000 }, { "epoch": 36.33, "learning_rate": 3.417142349465829e-08, "loss": 3.7389, "step": 3271500 }, { "epoch": 36.34, "learning_rate": 3.415754170090842e-08, "loss": 3.7568, "step": 3272000 }, { "epoch": 36.34, "learning_rate": 3.414365990715856e-08, "loss": 3.7645, "step": 3272500 }, { "epoch": 36.35, "learning_rate": 3.41297781134087e-08, "loss": 3.7609, "step": 3273000 }, { "epoch": 36.35, "learning_rate": 3.411589631965884e-08, "loss": 3.7816, "step": 3273500 }, { "epoch": 36.36, "learning_rate": 3.410201452590898e-08, "loss": 3.7398, "step": 3274000 }, { "epoch": 36.36, "learning_rate": 3.4088132732159116e-08, "loss": 3.7734, "step": 3274500 }, { "epoch": 36.37, "learning_rate": 3.407425093840925e-08, "loss": 3.76, "step": 3275000 }, { "epoch": 36.38, "learning_rate": 3.4060369144659394e-08, "loss": 3.7396, "step": 3275500 }, { "epoch": 36.38, "learning_rate": 3.4046487350909536e-08, "loss": 3.7485, "step": 3276000 }, { "epoch": 36.39, "learning_rate": 3.403260555715967e-08, "loss": 3.7517, "step": 3276500 }, { "epoch": 36.39, "learning_rate": 3.4018723763409814e-08, "loss": 3.7601, "step": 3277000 }, { "epoch": 36.4, "learning_rate": 3.400484196965995e-08, "loss": 3.7581, "step": 3277500 }, { "epoch": 36.4, "learning_rate": 3.3990960175910086e-08, "loss": 3.7576, "step": 3278000 }, { "epoch": 36.41, "learning_rate": 3.397707838216023e-08, "loss": 3.7629, "step": 3278500 }, { "epoch": 36.41, "learning_rate": 3.3963196588410363e-08, "loss": 3.7686, "step": 3279000 }, { "epoch": 36.42, "learning_rate": 3.3949314794660506e-08, "loss": 3.757, "step": 3279500 }, { "epoch": 36.43, "learning_rate": 3.393543300091065e-08, "loss": 3.7583, "step": 3280000 }, { "epoch": 36.43, "learning_rate": 3.3921551207160784e-08, "loss": 3.7525, "step": 3280500 }, { "epoch": 36.44, "learning_rate": 3.390766941341092e-08, "loss": 3.7442, "step": 3281000 }, { "epoch": 36.44, "learning_rate": 3.389378761966106e-08, "loss": 3.7442, "step": 3281500 }, { "epoch": 36.45, "learning_rate": 3.38799058259112e-08, "loss": 3.777, "step": 3282000 }, { "epoch": 36.45, "learning_rate": 3.386602403216134e-08, "loss": 3.7565, "step": 3282500 }, { "epoch": 36.46, "learning_rate": 3.385214223841148e-08, "loss": 3.7799, "step": 3283000 }, { "epoch": 36.46, "learning_rate": 3.383826044466162e-08, "loss": 3.7727, "step": 3283500 }, { "epoch": 36.47, "learning_rate": 3.382437865091175e-08, "loss": 3.7478, "step": 3284000 }, { "epoch": 36.48, "learning_rate": 3.381049685716189e-08, "loss": 3.749, "step": 3284500 }, { "epoch": 36.48, "learning_rate": 3.379661506341203e-08, "loss": 3.7545, "step": 3285000 }, { "epoch": 36.49, "learning_rate": 3.378273326966217e-08, "loss": 3.7678, "step": 3285500 }, { "epoch": 36.49, "learning_rate": 3.376885147591231e-08, "loss": 3.7347, "step": 3286000 }, { "epoch": 36.5, "learning_rate": 3.375496968216245e-08, "loss": 3.7648, "step": 3286500 }, { "epoch": 36.5, "learning_rate": 3.374108788841259e-08, "loss": 3.7561, "step": 3287000 }, { "epoch": 36.51, "learning_rate": 3.372720609466272e-08, "loss": 3.7523, "step": 3287500 }, { "epoch": 36.51, "learning_rate": 3.3713324300912865e-08, "loss": 3.7595, "step": 3288000 }, { "epoch": 36.52, "learning_rate": 3.369944250716301e-08, "loss": 3.7514, "step": 3288500 }, { "epoch": 36.53, "learning_rate": 3.368556071341314e-08, "loss": 3.7581, "step": 3289000 }, { "epoch": 36.53, "learning_rate": 3.3671678919663285e-08, "loss": 3.7511, "step": 3289500 }, { "epoch": 36.54, "learning_rate": 3.365779712591342e-08, "loss": 3.7545, "step": 3290000 }, { "epoch": 36.54, "learning_rate": 3.3643915332163556e-08, "loss": 3.7466, "step": 3290500 }, { "epoch": 36.55, "learning_rate": 3.36300335384137e-08, "loss": 3.7731, "step": 3291000 }, { "epoch": 36.55, "learning_rate": 3.3616151744663834e-08, "loss": 3.7558, "step": 3291500 }, { "epoch": 36.56, "learning_rate": 3.3602269950913976e-08, "loss": 3.7731, "step": 3292000 }, { "epoch": 36.56, "learning_rate": 3.358838815716412e-08, "loss": 3.761, "step": 3292500 }, { "epoch": 36.57, "learning_rate": 3.3574506363414254e-08, "loss": 3.7406, "step": 3293000 }, { "epoch": 36.58, "learning_rate": 3.356062456966439e-08, "loss": 3.7477, "step": 3293500 }, { "epoch": 36.58, "learning_rate": 3.354674277591453e-08, "loss": 3.7609, "step": 3294000 }, { "epoch": 36.59, "learning_rate": 3.353286098216467e-08, "loss": 3.7491, "step": 3294500 }, { "epoch": 36.59, "learning_rate": 3.351897918841481e-08, "loss": 3.7619, "step": 3295000 }, { "epoch": 36.6, "learning_rate": 3.350509739466495e-08, "loss": 3.7434, "step": 3295500 }, { "epoch": 36.6, "learning_rate": 3.349121560091509e-08, "loss": 3.7527, "step": 3296000 }, { "epoch": 36.61, "learning_rate": 3.3477333807165224e-08, "loss": 3.75, "step": 3296500 }, { "epoch": 36.61, "learning_rate": 3.3463452013415366e-08, "loss": 3.7352, "step": 3297000 }, { "epoch": 36.62, "learning_rate": 3.34495702196655e-08, "loss": 3.7685, "step": 3297500 }, { "epoch": 36.63, "learning_rate": 3.3435688425915644e-08, "loss": 3.759, "step": 3298000 }, { "epoch": 36.63, "learning_rate": 3.342180663216578e-08, "loss": 3.758, "step": 3298500 }, { "epoch": 36.64, "learning_rate": 3.340792483841592e-08, "loss": 3.7538, "step": 3299000 }, { "epoch": 36.64, "learning_rate": 3.339404304466606e-08, "loss": 3.7657, "step": 3299500 }, { "epoch": 36.65, "learning_rate": 3.338016125091619e-08, "loss": 3.7775, "step": 3300000 }, { "epoch": 36.65, "learning_rate": 3.3366279457166335e-08, "loss": 3.7829, "step": 3300500 }, { "epoch": 36.66, "learning_rate": 3.335239766341648e-08, "loss": 3.7693, "step": 3301000 }, { "epoch": 36.66, "learning_rate": 3.333851586966661e-08, "loss": 3.7751, "step": 3301500 }, { "epoch": 36.67, "learning_rate": 3.3324634075916755e-08, "loss": 3.741, "step": 3302000 }, { "epoch": 36.68, "learning_rate": 3.331075228216689e-08, "loss": 3.7541, "step": 3302500 }, { "epoch": 36.68, "learning_rate": 3.3296870488417027e-08, "loss": 3.7384, "step": 3303000 }, { "epoch": 36.69, "learning_rate": 3.328298869466717e-08, "loss": 3.7503, "step": 3303500 }, { "epoch": 36.69, "learning_rate": 3.326910690091731e-08, "loss": 3.7439, "step": 3304000 }, { "epoch": 36.7, "learning_rate": 3.325522510716745e-08, "loss": 3.7484, "step": 3304500 }, { "epoch": 36.7, "learning_rate": 3.324134331341759e-08, "loss": 3.7363, "step": 3305000 }, { "epoch": 36.71, "learning_rate": 3.3227461519667725e-08, "loss": 3.7508, "step": 3305500 }, { "epoch": 36.71, "learning_rate": 3.321357972591786e-08, "loss": 3.7536, "step": 3306000 }, { "epoch": 36.72, "learning_rate": 3.3199697932168e-08, "loss": 3.7773, "step": 3306500 }, { "epoch": 36.73, "learning_rate": 3.318581613841814e-08, "loss": 3.7575, "step": 3307000 }, { "epoch": 36.73, "learning_rate": 3.317193434466828e-08, "loss": 3.7656, "step": 3307500 }, { "epoch": 36.74, "learning_rate": 3.3158052550918416e-08, "loss": 3.7407, "step": 3308000 }, { "epoch": 36.74, "learning_rate": 3.314417075716855e-08, "loss": 3.7642, "step": 3308500 }, { "epoch": 36.75, "learning_rate": 3.3130288963418694e-08, "loss": 3.749, "step": 3309000 }, { "epoch": 36.75, "learning_rate": 3.3116407169668836e-08, "loss": 3.7476, "step": 3309500 }, { "epoch": 36.76, "learning_rate": 3.310252537591897e-08, "loss": 3.74, "step": 3310000 }, { "epoch": 36.76, "learning_rate": 3.3088643582169114e-08, "loss": 3.7618, "step": 3310500 }, { "epoch": 36.77, "learning_rate": 3.307476178841925e-08, "loss": 3.7678, "step": 3311000 }, { "epoch": 36.78, "learning_rate": 3.3060879994669386e-08, "loss": 3.7612, "step": 3311500 }, { "epoch": 36.78, "learning_rate": 3.304699820091953e-08, "loss": 3.7596, "step": 3312000 }, { "epoch": 36.79, "learning_rate": 3.303311640716967e-08, "loss": 3.7429, "step": 3312500 }, { "epoch": 36.79, "learning_rate": 3.3019234613419806e-08, "loss": 3.7553, "step": 3313000 }, { "epoch": 36.8, "learning_rate": 3.300535281966995e-08, "loss": 3.748, "step": 3313500 }, { "epoch": 36.8, "learning_rate": 3.2991471025920084e-08, "loss": 3.7643, "step": 3314000 }, { "epoch": 36.81, "learning_rate": 3.297758923217022e-08, "loss": 3.7431, "step": 3314500 }, { "epoch": 36.81, "learning_rate": 3.296370743842036e-08, "loss": 3.7361, "step": 3315000 }, { "epoch": 36.82, "learning_rate": 3.29498256446705e-08, "loss": 3.729, "step": 3315500 }, { "epoch": 36.83, "learning_rate": 3.293594385092064e-08, "loss": 3.7562, "step": 3316000 }, { "epoch": 36.83, "learning_rate": 3.292206205717078e-08, "loss": 3.7377, "step": 3316500 }, { "epoch": 36.84, "learning_rate": 3.290818026342092e-08, "loss": 3.7573, "step": 3317000 }, { "epoch": 36.84, "learning_rate": 3.289429846967105e-08, "loss": 3.7563, "step": 3317500 }, { "epoch": 36.85, "learning_rate": 3.2880416675921195e-08, "loss": 3.7548, "step": 3318000 }, { "epoch": 36.85, "learning_rate": 3.286653488217133e-08, "loss": 3.757, "step": 3318500 }, { "epoch": 36.86, "learning_rate": 3.285265308842147e-08, "loss": 3.7806, "step": 3319000 }, { "epoch": 36.86, "learning_rate": 3.2838771294671615e-08, "loss": 3.7635, "step": 3319500 }, { "epoch": 36.87, "learning_rate": 3.282488950092175e-08, "loss": 3.7515, "step": 3320000 }, { "epoch": 36.88, "learning_rate": 3.281100770717189e-08, "loss": 3.7389, "step": 3320500 }, { "epoch": 36.88, "learning_rate": 3.279712591342202e-08, "loss": 3.7553, "step": 3321000 }, { "epoch": 36.89, "learning_rate": 3.2783244119672165e-08, "loss": 3.7461, "step": 3321500 }, { "epoch": 36.89, "learning_rate": 3.276936232592231e-08, "loss": 3.7716, "step": 3322000 }, { "epoch": 36.9, "learning_rate": 3.275548053217244e-08, "loss": 3.7601, "step": 3322500 }, { "epoch": 36.9, "learning_rate": 3.2741598738422585e-08, "loss": 3.7518, "step": 3323000 }, { "epoch": 36.91, "learning_rate": 3.272771694467272e-08, "loss": 3.7527, "step": 3323500 }, { "epoch": 36.91, "learning_rate": 3.2713835150922856e-08, "loss": 3.7434, "step": 3324000 }, { "epoch": 36.92, "learning_rate": 3.2699953357173e-08, "loss": 3.7581, "step": 3324500 }, { "epoch": 36.93, "learning_rate": 3.268607156342314e-08, "loss": 3.7519, "step": 3325000 }, { "epoch": 36.93, "learning_rate": 3.2672189769673276e-08, "loss": 3.753, "step": 3325500 }, { "epoch": 36.94, "learning_rate": 3.265830797592342e-08, "loss": 3.7534, "step": 3326000 }, { "epoch": 36.94, "learning_rate": 3.2644426182173554e-08, "loss": 3.7557, "step": 3326500 }, { "epoch": 36.95, "learning_rate": 3.263054438842369e-08, "loss": 3.7656, "step": 3327000 }, { "epoch": 36.95, "learning_rate": 3.261666259467383e-08, "loss": 3.7492, "step": 3327500 }, { "epoch": 36.96, "learning_rate": 3.260278080092397e-08, "loss": 3.7355, "step": 3328000 }, { "epoch": 36.96, "learning_rate": 3.258889900717411e-08, "loss": 3.7599, "step": 3328500 }, { "epoch": 36.97, "learning_rate": 3.257501721342425e-08, "loss": 3.761, "step": 3329000 }, { "epoch": 36.98, "learning_rate": 3.256113541967439e-08, "loss": 3.7553, "step": 3329500 }, { "epoch": 36.98, "learning_rate": 3.2547253625924524e-08, "loss": 3.7394, "step": 3330000 }, { "epoch": 36.99, "learning_rate": 3.2533371832174666e-08, "loss": 3.7374, "step": 3330500 }, { "epoch": 36.99, "learning_rate": 3.25194900384248e-08, "loss": 3.7448, "step": 3331000 }, { "epoch": 37.0, "learning_rate": 3.2505608244674944e-08, "loss": 3.7357, "step": 3331500 }, { "epoch": 37.0, "eval_loss": 3.8285868167877197, "eval_runtime": 6.3016, "eval_samples_per_second": 246.605, "step": 3331702 }, { "epoch": 37.0, "learning_rate": 3.2491726450925086e-08, "loss": 3.7674, "step": 3332000 }, { "epoch": 37.01, "learning_rate": 3.247784465717522e-08, "loss": 3.7627, "step": 3332500 }, { "epoch": 37.01, "learning_rate": 3.246396286342536e-08, "loss": 3.7528, "step": 3333000 }, { "epoch": 37.02, "learning_rate": 3.24500810696755e-08, "loss": 3.758, "step": 3333500 }, { "epoch": 37.03, "learning_rate": 3.2436199275925635e-08, "loss": 3.7433, "step": 3334000 }, { "epoch": 37.03, "learning_rate": 3.242231748217578e-08, "loss": 3.7402, "step": 3334500 }, { "epoch": 37.04, "learning_rate": 3.240843568842592e-08, "loss": 3.7674, "step": 3335000 }, { "epoch": 37.04, "learning_rate": 3.2394553894676055e-08, "loss": 3.7388, "step": 3335500 }, { "epoch": 37.05, "learning_rate": 3.238067210092619e-08, "loss": 3.7443, "step": 3336000 }, { "epoch": 37.05, "learning_rate": 3.236679030717633e-08, "loss": 3.7599, "step": 3336500 }, { "epoch": 37.06, "learning_rate": 3.235290851342647e-08, "loss": 3.7855, "step": 3337000 }, { "epoch": 37.06, "learning_rate": 3.233902671967661e-08, "loss": 3.7608, "step": 3337500 }, { "epoch": 37.07, "learning_rate": 3.232514492592675e-08, "loss": 3.7549, "step": 3338000 }, { "epoch": 37.08, "learning_rate": 3.231126313217688e-08, "loss": 3.748, "step": 3338500 }, { "epoch": 37.08, "learning_rate": 3.2297381338427025e-08, "loss": 3.7428, "step": 3339000 }, { "epoch": 37.09, "learning_rate": 3.228349954467716e-08, "loss": 3.7516, "step": 3339500 }, { "epoch": 37.09, "learning_rate": 3.22696177509273e-08, "loss": 3.7639, "step": 3340000 }, { "epoch": 37.1, "learning_rate": 3.2255735957177445e-08, "loss": 3.752, "step": 3340500 }, { "epoch": 37.1, "learning_rate": 3.224185416342758e-08, "loss": 3.7565, "step": 3341000 }, { "epoch": 37.11, "learning_rate": 3.2227972369677716e-08, "loss": 3.7648, "step": 3341500 }, { "epoch": 37.11, "learning_rate": 3.221409057592786e-08, "loss": 3.7706, "step": 3342000 }, { "epoch": 37.12, "learning_rate": 3.2200208782177994e-08, "loss": 3.7527, "step": 3342500 }, { "epoch": 37.13, "learning_rate": 3.2186326988428136e-08, "loss": 3.7359, "step": 3343000 }, { "epoch": 37.13, "learning_rate": 3.217244519467827e-08, "loss": 3.7478, "step": 3343500 }, { "epoch": 37.14, "learning_rate": 3.2158563400928414e-08, "loss": 3.7665, "step": 3344000 }, { "epoch": 37.14, "learning_rate": 3.214468160717855e-08, "loss": 3.7741, "step": 3344500 }, { "epoch": 37.15, "learning_rate": 3.2130799813428686e-08, "loss": 3.7599, "step": 3345000 }, { "epoch": 37.15, "learning_rate": 3.211691801967883e-08, "loss": 3.7647, "step": 3345500 }, { "epoch": 37.16, "learning_rate": 3.210303622592897e-08, "loss": 3.7407, "step": 3346000 }, { "epoch": 37.16, "learning_rate": 3.2089154432179106e-08, "loss": 3.7566, "step": 3346500 }, { "epoch": 37.17, "learning_rate": 3.207527263842925e-08, "loss": 3.7688, "step": 3347000 }, { "epoch": 37.18, "learning_rate": 3.2061390844679384e-08, "loss": 3.7577, "step": 3347500 }, { "epoch": 37.18, "learning_rate": 3.204750905092952e-08, "loss": 3.7662, "step": 3348000 }, { "epoch": 37.19, "learning_rate": 3.203362725717966e-08, "loss": 3.759, "step": 3348500 }, { "epoch": 37.19, "learning_rate": 3.2019745463429804e-08, "loss": 3.749, "step": 3349000 }, { "epoch": 37.2, "learning_rate": 3.200586366967994e-08, "loss": 3.7605, "step": 3349500 }, { "epoch": 37.2, "learning_rate": 3.199198187593008e-08, "loss": 3.7458, "step": 3350000 }, { "epoch": 37.21, "learning_rate": 3.197810008218022e-08, "loss": 3.7652, "step": 3350500 }, { "epoch": 37.21, "learning_rate": 3.196421828843035e-08, "loss": 3.7664, "step": 3351000 }, { "epoch": 37.22, "learning_rate": 3.1950336494680495e-08, "loss": 3.759, "step": 3351500 }, { "epoch": 37.23, "learning_rate": 3.193645470093063e-08, "loss": 3.7555, "step": 3352000 }, { "epoch": 37.23, "learning_rate": 3.1922572907180773e-08, "loss": 3.7478, "step": 3352500 }, { "epoch": 37.24, "learning_rate": 3.1908691113430916e-08, "loss": 3.7581, "step": 3353000 }, { "epoch": 37.24, "learning_rate": 3.189480931968105e-08, "loss": 3.7562, "step": 3353500 }, { "epoch": 37.25, "learning_rate": 3.188092752593119e-08, "loss": 3.7527, "step": 3354000 }, { "epoch": 37.25, "learning_rate": 3.186704573218133e-08, "loss": 3.762, "step": 3354500 }, { "epoch": 37.26, "learning_rate": 3.1853163938431465e-08, "loss": 3.7496, "step": 3355000 }, { "epoch": 37.26, "learning_rate": 3.183928214468161e-08, "loss": 3.7467, "step": 3355500 }, { "epoch": 37.27, "learning_rate": 3.182540035093175e-08, "loss": 3.7363, "step": 3356000 }, { "epoch": 37.28, "learning_rate": 3.1811518557181885e-08, "loss": 3.7561, "step": 3356500 }, { "epoch": 37.28, "learning_rate": 3.179763676343202e-08, "loss": 3.7629, "step": 3357000 }, { "epoch": 37.29, "learning_rate": 3.1783754969682156e-08, "loss": 3.7565, "step": 3357500 }, { "epoch": 37.29, "learning_rate": 3.17698731759323e-08, "loss": 3.7675, "step": 3358000 }, { "epoch": 37.3, "learning_rate": 3.175599138218244e-08, "loss": 3.7512, "step": 3358500 }, { "epoch": 37.3, "learning_rate": 3.1742109588432576e-08, "loss": 3.755, "step": 3359000 }, { "epoch": 37.31, "learning_rate": 3.172822779468272e-08, "loss": 3.7664, "step": 3359500 }, { "epoch": 37.31, "learning_rate": 3.1714346000932854e-08, "loss": 3.7526, "step": 3360000 }, { "epoch": 37.32, "learning_rate": 3.170046420718299e-08, "loss": 3.7588, "step": 3360500 }, { "epoch": 37.33, "learning_rate": 3.168658241343313e-08, "loss": 3.7548, "step": 3361000 }, { "epoch": 37.33, "learning_rate": 3.1672700619683275e-08, "loss": 3.7603, "step": 3361500 }, { "epoch": 37.34, "learning_rate": 3.165881882593341e-08, "loss": 3.7507, "step": 3362000 }, { "epoch": 37.34, "learning_rate": 3.164493703218355e-08, "loss": 3.7634, "step": 3362500 }, { "epoch": 37.35, "learning_rate": 3.163105523843369e-08, "loss": 3.7463, "step": 3363000 }, { "epoch": 37.35, "learning_rate": 3.1617173444683824e-08, "loss": 3.7564, "step": 3363500 }, { "epoch": 37.36, "learning_rate": 3.1603291650933966e-08, "loss": 3.7643, "step": 3364000 }, { "epoch": 37.36, "learning_rate": 3.158940985718411e-08, "loss": 3.7553, "step": 3364500 }, { "epoch": 37.37, "learning_rate": 3.1575528063434244e-08, "loss": 3.7384, "step": 3365000 }, { "epoch": 37.38, "learning_rate": 3.1561646269684386e-08, "loss": 3.7471, "step": 3365500 }, { "epoch": 37.38, "learning_rate": 3.154776447593452e-08, "loss": 3.7494, "step": 3366000 }, { "epoch": 37.39, "learning_rate": 3.153388268218466e-08, "loss": 3.7442, "step": 3366500 }, { "epoch": 37.39, "learning_rate": 3.15200008884348e-08, "loss": 3.7769, "step": 3367000 }, { "epoch": 37.4, "learning_rate": 3.1506119094684935e-08, "loss": 3.7337, "step": 3367500 }, { "epoch": 37.4, "learning_rate": 3.149223730093508e-08, "loss": 3.7813, "step": 3368000 }, { "epoch": 37.41, "learning_rate": 3.147835550718522e-08, "loss": 3.7316, "step": 3368500 }, { "epoch": 37.41, "learning_rate": 3.146447371343535e-08, "loss": 3.7523, "step": 3369000 }, { "epoch": 37.42, "learning_rate": 3.145059191968549e-08, "loss": 3.7453, "step": 3369500 }, { "epoch": 37.43, "learning_rate": 3.1436710125935633e-08, "loss": 3.7436, "step": 3370000 }, { "epoch": 37.43, "learning_rate": 3.142282833218577e-08, "loss": 3.7641, "step": 3370500 }, { "epoch": 37.44, "learning_rate": 3.140894653843591e-08, "loss": 3.7519, "step": 3371000 }, { "epoch": 37.44, "learning_rate": 3.1395064744686054e-08, "loss": 3.7592, "step": 3371500 }, { "epoch": 37.45, "learning_rate": 3.138118295093618e-08, "loss": 3.7397, "step": 3372000 }, { "epoch": 37.45, "learning_rate": 3.1367301157186325e-08, "loss": 3.7388, "step": 3372500 }, { "epoch": 37.46, "learning_rate": 3.135341936343646e-08, "loss": 3.7509, "step": 3373000 }, { "epoch": 37.46, "learning_rate": 3.13395375696866e-08, "loss": 3.7596, "step": 3373500 }, { "epoch": 37.47, "learning_rate": 3.1325655775936745e-08, "loss": 3.7439, "step": 3374000 }, { "epoch": 37.48, "learning_rate": 3.131177398218688e-08, "loss": 3.7576, "step": 3374500 }, { "epoch": 37.48, "learning_rate": 3.1297892188437016e-08, "loss": 3.7385, "step": 3375000 }, { "epoch": 37.49, "learning_rate": 3.128401039468716e-08, "loss": 3.7527, "step": 3375500 }, { "epoch": 37.49, "learning_rate": 3.1270128600937294e-08, "loss": 3.7459, "step": 3376000 }, { "epoch": 37.5, "learning_rate": 3.1256246807187437e-08, "loss": 3.7583, "step": 3376500 }, { "epoch": 37.5, "learning_rate": 3.124236501343758e-08, "loss": 3.7548, "step": 3377000 }, { "epoch": 37.51, "learning_rate": 3.1228483219687714e-08, "loss": 3.7378, "step": 3377500 }, { "epoch": 37.51, "learning_rate": 3.121460142593785e-08, "loss": 3.7376, "step": 3378000 }, { "epoch": 37.52, "learning_rate": 3.120071963218799e-08, "loss": 3.7718, "step": 3378500 }, { "epoch": 37.53, "learning_rate": 3.118683783843813e-08, "loss": 3.746, "step": 3379000 }, { "epoch": 37.53, "learning_rate": 3.117295604468827e-08, "loss": 3.7418, "step": 3379500 }, { "epoch": 37.54, "learning_rate": 3.1159074250938406e-08, "loss": 3.745, "step": 3380000 }, { "epoch": 37.54, "learning_rate": 3.114519245718855e-08, "loss": 3.7314, "step": 3380500 }, { "epoch": 37.55, "learning_rate": 3.1131310663438684e-08, "loss": 3.7621, "step": 3381000 }, { "epoch": 37.55, "learning_rate": 3.111742886968882e-08, "loss": 3.7492, "step": 3381500 }, { "epoch": 37.56, "learning_rate": 3.110354707593896e-08, "loss": 3.7492, "step": 3382000 }, { "epoch": 37.56, "learning_rate": 3.1089665282189104e-08, "loss": 3.7376, "step": 3382500 }, { "epoch": 37.57, "learning_rate": 3.107578348843924e-08, "loss": 3.7621, "step": 3383000 }, { "epoch": 37.58, "learning_rate": 3.106190169468938e-08, "loss": 3.7644, "step": 3383500 }, { "epoch": 37.58, "learning_rate": 3.104801990093952e-08, "loss": 3.7444, "step": 3384000 }, { "epoch": 37.59, "learning_rate": 3.103413810718965e-08, "loss": 3.7507, "step": 3384500 }, { "epoch": 37.59, "learning_rate": 3.1020256313439795e-08, "loss": 3.7446, "step": 3385000 }, { "epoch": 37.6, "learning_rate": 3.100637451968994e-08, "loss": 3.7623, "step": 3385500 }, { "epoch": 37.6, "learning_rate": 3.0992492725940073e-08, "loss": 3.7418, "step": 3386000 }, { "epoch": 37.61, "learning_rate": 3.0978610932190216e-08, "loss": 3.7487, "step": 3386500 }, { "epoch": 37.61, "learning_rate": 3.096472913844035e-08, "loss": 3.7393, "step": 3387000 }, { "epoch": 37.62, "learning_rate": 3.095084734469049e-08, "loss": 3.7549, "step": 3387500 }, { "epoch": 37.63, "learning_rate": 3.093696555094063e-08, "loss": 3.7511, "step": 3388000 }, { "epoch": 37.63, "learning_rate": 3.0923083757190765e-08, "loss": 3.754, "step": 3388500 }, { "epoch": 37.64, "learning_rate": 3.090920196344091e-08, "loss": 3.7522, "step": 3389000 }, { "epoch": 37.64, "learning_rate": 3.089532016969105e-08, "loss": 3.7507, "step": 3389500 }, { "epoch": 37.65, "learning_rate": 3.0881438375941185e-08, "loss": 3.757, "step": 3390000 }, { "epoch": 37.65, "learning_rate": 3.086755658219132e-08, "loss": 3.7612, "step": 3390500 }, { "epoch": 37.66, "learning_rate": 3.085367478844146e-08, "loss": 3.7552, "step": 3391000 }, { "epoch": 37.66, "learning_rate": 3.08397929946916e-08, "loss": 3.7644, "step": 3391500 }, { "epoch": 37.67, "learning_rate": 3.082591120094174e-08, "loss": 3.7576, "step": 3392000 }, { "epoch": 37.68, "learning_rate": 3.081202940719188e-08, "loss": 3.7415, "step": 3392500 }, { "epoch": 37.68, "learning_rate": 3.079814761344202e-08, "loss": 3.7595, "step": 3393000 }, { "epoch": 37.69, "learning_rate": 3.0784265819692154e-08, "loss": 3.7499, "step": 3393500 }, { "epoch": 37.69, "learning_rate": 3.0770384025942297e-08, "loss": 3.7425, "step": 3394000 }, { "epoch": 37.7, "learning_rate": 3.075650223219243e-08, "loss": 3.7547, "step": 3394500 }, { "epoch": 37.7, "learning_rate": 3.0742620438442575e-08, "loss": 3.7531, "step": 3395000 }, { "epoch": 37.71, "learning_rate": 3.072873864469271e-08, "loss": 3.7303, "step": 3395500 }, { "epoch": 37.71, "learning_rate": 3.071485685094285e-08, "loss": 3.7471, "step": 3396000 }, { "epoch": 37.72, "learning_rate": 3.070097505719299e-08, "loss": 3.7319, "step": 3396500 }, { "epoch": 37.73, "learning_rate": 3.0687093263443124e-08, "loss": 3.7544, "step": 3397000 }, { "epoch": 37.73, "learning_rate": 3.0673211469693266e-08, "loss": 3.7547, "step": 3397500 }, { "epoch": 37.74, "learning_rate": 3.065932967594341e-08, "loss": 3.7591, "step": 3398000 }, { "epoch": 37.74, "learning_rate": 3.0645447882193544e-08, "loss": 3.7565, "step": 3398500 }, { "epoch": 37.75, "learning_rate": 3.0631566088443686e-08, "loss": 3.7447, "step": 3399000 }, { "epoch": 37.75, "learning_rate": 3.061768429469382e-08, "loss": 3.744, "step": 3399500 }, { "epoch": 37.76, "learning_rate": 3.060380250094396e-08, "loss": 3.7565, "step": 3400000 }, { "epoch": 37.76, "learning_rate": 3.05899207071941e-08, "loss": 3.7659, "step": 3400500 }, { "epoch": 37.77, "learning_rate": 3.057603891344424e-08, "loss": 3.7541, "step": 3401000 }, { "epoch": 37.78, "learning_rate": 3.056215711969438e-08, "loss": 3.769, "step": 3401500 }, { "epoch": 37.78, "learning_rate": 3.054827532594452e-08, "loss": 3.7546, "step": 3402000 }, { "epoch": 37.79, "learning_rate": 3.053439353219465e-08, "loss": 3.7453, "step": 3402500 }, { "epoch": 37.79, "learning_rate": 3.052051173844479e-08, "loss": 3.743, "step": 3403000 }, { "epoch": 37.8, "learning_rate": 3.0506629944694934e-08, "loss": 3.745, "step": 3403500 }, { "epoch": 37.8, "learning_rate": 3.049274815094507e-08, "loss": 3.7554, "step": 3404000 }, { "epoch": 37.81, "learning_rate": 3.047886635719521e-08, "loss": 3.7689, "step": 3404500 }, { "epoch": 37.81, "learning_rate": 3.0464984563445354e-08, "loss": 3.7539, "step": 3405000 }, { "epoch": 37.82, "learning_rate": 3.045110276969548e-08, "loss": 3.734, "step": 3405500 }, { "epoch": 37.83, "learning_rate": 3.0437220975945625e-08, "loss": 3.7547, "step": 3406000 }, { "epoch": 37.83, "learning_rate": 3.042333918219577e-08, "loss": 3.76, "step": 3406500 }, { "epoch": 37.84, "learning_rate": 3.04094573884459e-08, "loss": 3.7463, "step": 3407000 }, { "epoch": 37.84, "learning_rate": 3.0395575594696045e-08, "loss": 3.7683, "step": 3407500 }, { "epoch": 37.85, "learning_rate": 3.038169380094619e-08, "loss": 3.7528, "step": 3408000 }, { "epoch": 37.85, "learning_rate": 3.0367812007196316e-08, "loss": 3.7601, "step": 3408500 }, { "epoch": 37.86, "learning_rate": 3.035393021344646e-08, "loss": 3.7699, "step": 3409000 }, { "epoch": 37.86, "learning_rate": 3.0340048419696594e-08, "loss": 3.7559, "step": 3409500 }, { "epoch": 37.87, "learning_rate": 3.0326166625946737e-08, "loss": 3.7554, "step": 3410000 }, { "epoch": 37.88, "learning_rate": 3.031228483219688e-08, "loss": 3.7509, "step": 3410500 }, { "epoch": 37.88, "learning_rate": 3.0298403038447015e-08, "loss": 3.7593, "step": 3411000 }, { "epoch": 37.89, "learning_rate": 3.028452124469715e-08, "loss": 3.7492, "step": 3411500 }, { "epoch": 37.89, "learning_rate": 3.027063945094729e-08, "loss": 3.7598, "step": 3412000 }, { "epoch": 37.9, "learning_rate": 3.025675765719743e-08, "loss": 3.7339, "step": 3412500 }, { "epoch": 37.9, "learning_rate": 3.024287586344757e-08, "loss": 3.7586, "step": 3413000 }, { "epoch": 37.91, "learning_rate": 3.022899406969771e-08, "loss": 3.7677, "step": 3413500 }, { "epoch": 37.91, "learning_rate": 3.021511227594785e-08, "loss": 3.754, "step": 3414000 }, { "epoch": 37.92, "learning_rate": 3.0201230482197984e-08, "loss": 3.7471, "step": 3414500 }, { "epoch": 37.93, "learning_rate": 3.0187348688448126e-08, "loss": 3.7551, "step": 3415000 }, { "epoch": 37.93, "learning_rate": 3.017346689469826e-08, "loss": 3.7552, "step": 3415500 }, { "epoch": 37.94, "learning_rate": 3.0159585100948404e-08, "loss": 3.7701, "step": 3416000 }, { "epoch": 37.94, "learning_rate": 3.014570330719854e-08, "loss": 3.7689, "step": 3416500 }, { "epoch": 37.95, "learning_rate": 3.013182151344868e-08, "loss": 3.7536, "step": 3417000 }, { "epoch": 37.95, "learning_rate": 3.011793971969882e-08, "loss": 3.7631, "step": 3417500 }, { "epoch": 37.96, "learning_rate": 3.010405792594895e-08, "loss": 3.7546, "step": 3418000 }, { "epoch": 37.96, "learning_rate": 3.0090176132199096e-08, "loss": 3.7525, "step": 3418500 }, { "epoch": 37.97, "learning_rate": 3.007629433844924e-08, "loss": 3.7527, "step": 3419000 }, { "epoch": 37.98, "learning_rate": 3.0062412544699373e-08, "loss": 3.731, "step": 3419500 }, { "epoch": 37.98, "learning_rate": 3.0048530750949516e-08, "loss": 3.7615, "step": 3420000 }, { "epoch": 37.99, "learning_rate": 3.003464895719965e-08, "loss": 3.7465, "step": 3420500 }, { "epoch": 37.99, "learning_rate": 3.002076716344979e-08, "loss": 3.7562, "step": 3421000 }, { "epoch": 38.0, "learning_rate": 3.000688536969993e-08, "loss": 3.7605, "step": 3421500 }, { "epoch": 38.0, "eval_loss": 3.8280832767486572, "eval_runtime": 6.3072, "eval_samples_per_second": 246.384, "step": 3421748 }, { "epoch": 38.0, "learning_rate": 2.999300357595007e-08, "loss": 3.7658, "step": 3422000 }, { "epoch": 38.01, "learning_rate": 2.997912178220021e-08, "loss": 3.7584, "step": 3422500 }, { "epoch": 38.01, "learning_rate": 2.996523998845035e-08, "loss": 3.7427, "step": 3423000 }, { "epoch": 38.02, "learning_rate": 2.9951358194700485e-08, "loss": 3.735, "step": 3423500 }, { "epoch": 38.03, "learning_rate": 2.993747640095062e-08, "loss": 3.735, "step": 3424000 }, { "epoch": 38.03, "learning_rate": 2.992359460720076e-08, "loss": 3.739, "step": 3424500 }, { "epoch": 38.04, "learning_rate": 2.99097128134509e-08, "loss": 3.7649, "step": 3425000 }, { "epoch": 38.04, "learning_rate": 2.989583101970104e-08, "loss": 3.7447, "step": 3425500 }, { "epoch": 38.05, "learning_rate": 2.988194922595118e-08, "loss": 3.7576, "step": 3426000 }, { "epoch": 38.05, "learning_rate": 2.986806743220132e-08, "loss": 3.7477, "step": 3426500 }, { "epoch": 38.06, "learning_rate": 2.9854185638451454e-08, "loss": 3.7629, "step": 3427000 }, { "epoch": 38.06, "learning_rate": 2.98403038447016e-08, "loss": 3.7544, "step": 3427500 }, { "epoch": 38.07, "learning_rate": 2.982642205095173e-08, "loss": 3.7341, "step": 3428000 }, { "epoch": 38.07, "learning_rate": 2.9812540257201875e-08, "loss": 3.7559, "step": 3428500 }, { "epoch": 38.08, "learning_rate": 2.9798658463452014e-08, "loss": 3.7492, "step": 3429000 }, { "epoch": 38.09, "learning_rate": 2.978477666970215e-08, "loss": 3.7555, "step": 3429500 }, { "epoch": 38.09, "learning_rate": 2.977089487595229e-08, "loss": 3.7478, "step": 3430000 }, { "epoch": 38.1, "learning_rate": 2.975701308220243e-08, "loss": 3.764, "step": 3430500 }, { "epoch": 38.1, "learning_rate": 2.9743131288452566e-08, "loss": 3.749, "step": 3431000 }, { "epoch": 38.11, "learning_rate": 2.972924949470271e-08, "loss": 3.7642, "step": 3431500 }, { "epoch": 38.11, "learning_rate": 2.9715367700952844e-08, "loss": 3.7605, "step": 3432000 }, { "epoch": 38.12, "learning_rate": 2.9701485907202983e-08, "loss": 3.7785, "step": 3432500 }, { "epoch": 38.12, "learning_rate": 2.9687604113453125e-08, "loss": 3.7455, "step": 3433000 }, { "epoch": 38.13, "learning_rate": 2.967372231970326e-08, "loss": 3.7649, "step": 3433500 }, { "epoch": 38.14, "learning_rate": 2.96598405259534e-08, "loss": 3.7552, "step": 3434000 }, { "epoch": 38.14, "learning_rate": 2.9645958732203542e-08, "loss": 3.7456, "step": 3434500 }, { "epoch": 38.15, "learning_rate": 2.9632076938453678e-08, "loss": 3.7597, "step": 3435000 }, { "epoch": 38.15, "learning_rate": 2.9618195144703817e-08, "loss": 3.7324, "step": 3435500 }, { "epoch": 38.16, "learning_rate": 2.960431335095396e-08, "loss": 3.7703, "step": 3436000 }, { "epoch": 38.16, "learning_rate": 2.9590431557204095e-08, "loss": 3.7595, "step": 3436500 }, { "epoch": 38.17, "learning_rate": 2.9576549763454234e-08, "loss": 3.733, "step": 3437000 }, { "epoch": 38.17, "learning_rate": 2.9562667969704376e-08, "loss": 3.7789, "step": 3437500 }, { "epoch": 38.18, "learning_rate": 2.9548786175954508e-08, "loss": 3.7548, "step": 3438000 }, { "epoch": 38.19, "learning_rate": 2.953490438220465e-08, "loss": 3.7639, "step": 3438500 }, { "epoch": 38.19, "learning_rate": 2.9521022588454786e-08, "loss": 3.773, "step": 3439000 }, { "epoch": 38.2, "learning_rate": 2.9507140794704925e-08, "loss": 3.7394, "step": 3439500 }, { "epoch": 38.2, "learning_rate": 2.9493259000955067e-08, "loss": 3.7502, "step": 3440000 }, { "epoch": 38.21, "learning_rate": 2.9479377207205203e-08, "loss": 3.7541, "step": 3440500 }, { "epoch": 38.21, "learning_rate": 2.9465495413455342e-08, "loss": 3.7506, "step": 3441000 }, { "epoch": 38.22, "learning_rate": 2.9451613619705484e-08, "loss": 3.7602, "step": 3441500 }, { "epoch": 38.22, "learning_rate": 2.943773182595562e-08, "loss": 3.7532, "step": 3442000 }, { "epoch": 38.23, "learning_rate": 2.942385003220576e-08, "loss": 3.7475, "step": 3442500 }, { "epoch": 38.24, "learning_rate": 2.94099682384559e-08, "loss": 3.7576, "step": 3443000 }, { "epoch": 38.24, "learning_rate": 2.9396086444706037e-08, "loss": 3.749, "step": 3443500 }, { "epoch": 38.25, "learning_rate": 2.9382204650956176e-08, "loss": 3.7464, "step": 3444000 }, { "epoch": 38.25, "learning_rate": 2.9368322857206318e-08, "loss": 3.7677, "step": 3444500 }, { "epoch": 38.26, "learning_rate": 2.9354441063456454e-08, "loss": 3.7481, "step": 3445000 }, { "epoch": 38.26, "learning_rate": 2.9340559269706593e-08, "loss": 3.7454, "step": 3445500 }, { "epoch": 38.27, "learning_rate": 2.9326677475956728e-08, "loss": 3.766, "step": 3446000 }, { "epoch": 38.27, "learning_rate": 2.931279568220687e-08, "loss": 3.746, "step": 3446500 }, { "epoch": 38.28, "learning_rate": 2.929891388845701e-08, "loss": 3.7496, "step": 3447000 }, { "epoch": 38.29, "learning_rate": 2.9285032094707145e-08, "loss": 3.7606, "step": 3447500 }, { "epoch": 38.29, "learning_rate": 2.9271150300957287e-08, "loss": 3.7598, "step": 3448000 }, { "epoch": 38.3, "learning_rate": 2.9257268507207426e-08, "loss": 3.7538, "step": 3448500 }, { "epoch": 38.3, "learning_rate": 2.9243386713457562e-08, "loss": 3.7618, "step": 3449000 }, { "epoch": 38.31, "learning_rate": 2.9229504919707704e-08, "loss": 3.7546, "step": 3449500 }, { "epoch": 38.31, "learning_rate": 2.9215623125957843e-08, "loss": 3.7478, "step": 3450000 }, { "epoch": 38.32, "learning_rate": 2.920174133220798e-08, "loss": 3.7631, "step": 3450500 }, { "epoch": 38.32, "learning_rate": 2.918785953845812e-08, "loss": 3.76, "step": 3451000 }, { "epoch": 38.33, "learning_rate": 2.917397774470826e-08, "loss": 3.7539, "step": 3451500 }, { "epoch": 38.34, "learning_rate": 2.9160095950958396e-08, "loss": 3.745, "step": 3452000 }, { "epoch": 38.34, "learning_rate": 2.9146214157208538e-08, "loss": 3.7284, "step": 3452500 }, { "epoch": 38.35, "learning_rate": 2.9132332363458674e-08, "loss": 3.7579, "step": 3453000 }, { "epoch": 38.35, "learning_rate": 2.9118450569708813e-08, "loss": 3.7519, "step": 3453500 }, { "epoch": 38.36, "learning_rate": 2.9104568775958955e-08, "loss": 3.7567, "step": 3454000 }, { "epoch": 38.36, "learning_rate": 2.909068698220909e-08, "loss": 3.7407, "step": 3454500 }, { "epoch": 38.37, "learning_rate": 2.907680518845923e-08, "loss": 3.7542, "step": 3455000 }, { "epoch": 38.37, "learning_rate": 2.906292339470937e-08, "loss": 3.7651, "step": 3455500 }, { "epoch": 38.38, "learning_rate": 2.9049041600959507e-08, "loss": 3.7661, "step": 3456000 }, { "epoch": 38.39, "learning_rate": 2.9035159807209646e-08, "loss": 3.7559, "step": 3456500 }, { "epoch": 38.39, "learning_rate": 2.902127801345979e-08, "loss": 3.7596, "step": 3457000 }, { "epoch": 38.4, "learning_rate": 2.9007396219709924e-08, "loss": 3.753, "step": 3457500 }, { "epoch": 38.4, "learning_rate": 2.8993514425960063e-08, "loss": 3.7527, "step": 3458000 }, { "epoch": 38.41, "learning_rate": 2.8979632632210205e-08, "loss": 3.7503, "step": 3458500 }, { "epoch": 38.41, "learning_rate": 2.896575083846034e-08, "loss": 3.7453, "step": 3459000 }, { "epoch": 38.42, "learning_rate": 2.895186904471048e-08, "loss": 3.7524, "step": 3459500 }, { "epoch": 38.42, "learning_rate": 2.8937987250960622e-08, "loss": 3.7489, "step": 3460000 }, { "epoch": 38.43, "learning_rate": 2.8924105457210758e-08, "loss": 3.745, "step": 3460500 }, { "epoch": 38.44, "learning_rate": 2.8910223663460897e-08, "loss": 3.765, "step": 3461000 }, { "epoch": 38.44, "learning_rate": 2.8896341869711032e-08, "loss": 3.7431, "step": 3461500 }, { "epoch": 38.45, "learning_rate": 2.8882460075961175e-08, "loss": 3.7287, "step": 3462000 }, { "epoch": 38.45, "learning_rate": 2.8868578282211314e-08, "loss": 3.7557, "step": 3462500 }, { "epoch": 38.46, "learning_rate": 2.885469648846145e-08, "loss": 3.7507, "step": 3463000 }, { "epoch": 38.46, "learning_rate": 2.884081469471159e-08, "loss": 3.7603, "step": 3463500 }, { "epoch": 38.47, "learning_rate": 2.882693290096173e-08, "loss": 3.7438, "step": 3464000 }, { "epoch": 38.47, "learning_rate": 2.8813051107211866e-08, "loss": 3.7376, "step": 3464500 }, { "epoch": 38.48, "learning_rate": 2.879916931346201e-08, "loss": 3.7416, "step": 3465000 }, { "epoch": 38.49, "learning_rate": 2.8785287519712147e-08, "loss": 3.7641, "step": 3465500 }, { "epoch": 38.49, "learning_rate": 2.8771405725962283e-08, "loss": 3.7652, "step": 3466000 }, { "epoch": 38.5, "learning_rate": 2.8757523932212425e-08, "loss": 3.7758, "step": 3466500 }, { "epoch": 38.5, "learning_rate": 2.8743642138462564e-08, "loss": 3.7463, "step": 3467000 }, { "epoch": 38.51, "learning_rate": 2.87297603447127e-08, "loss": 3.7515, "step": 3467500 }, { "epoch": 38.51, "learning_rate": 2.8715878550962842e-08, "loss": 3.7448, "step": 3468000 }, { "epoch": 38.52, "learning_rate": 2.8701996757212975e-08, "loss": 3.7526, "step": 3468500 }, { "epoch": 38.52, "learning_rate": 2.8688114963463117e-08, "loss": 3.7561, "step": 3469000 }, { "epoch": 38.53, "learning_rate": 2.867423316971326e-08, "loss": 3.7667, "step": 3469500 }, { "epoch": 38.54, "learning_rate": 2.866035137596339e-08, "loss": 3.7514, "step": 3470000 }, { "epoch": 38.54, "learning_rate": 2.8646469582213534e-08, "loss": 3.7546, "step": 3470500 }, { "epoch": 38.55, "learning_rate": 2.8632587788463676e-08, "loss": 3.7418, "step": 3471000 }, { "epoch": 38.55, "learning_rate": 2.8618705994713808e-08, "loss": 3.7401, "step": 3471500 }, { "epoch": 38.56, "learning_rate": 2.860482420096395e-08, "loss": 3.7646, "step": 3472000 }, { "epoch": 38.56, "learning_rate": 2.8590942407214093e-08, "loss": 3.751, "step": 3472500 }, { "epoch": 38.57, "learning_rate": 2.8577060613464225e-08, "loss": 3.7566, "step": 3473000 }, { "epoch": 38.57, "learning_rate": 2.8563178819714367e-08, "loss": 3.7523, "step": 3473500 }, { "epoch": 38.58, "learning_rate": 2.854929702596451e-08, "loss": 3.7539, "step": 3474000 }, { "epoch": 38.59, "learning_rate": 2.8535415232214642e-08, "loss": 3.7471, "step": 3474500 }, { "epoch": 38.59, "learning_rate": 2.8521533438464784e-08, "loss": 3.744, "step": 3475000 }, { "epoch": 38.6, "learning_rate": 2.850765164471492e-08, "loss": 3.7605, "step": 3475500 }, { "epoch": 38.6, "learning_rate": 2.849376985096506e-08, "loss": 3.7393, "step": 3476000 }, { "epoch": 38.61, "learning_rate": 2.84798880572152e-08, "loss": 3.7382, "step": 3476500 }, { "epoch": 38.61, "learning_rate": 2.8466006263465337e-08, "loss": 3.736, "step": 3477000 }, { "epoch": 38.62, "learning_rate": 2.8452124469715476e-08, "loss": 3.7554, "step": 3477500 }, { "epoch": 38.62, "learning_rate": 2.8438242675965618e-08, "loss": 3.7476, "step": 3478000 }, { "epoch": 38.63, "learning_rate": 2.8424360882215754e-08, "loss": 3.7556, "step": 3478500 }, { "epoch": 38.64, "learning_rate": 2.8410479088465893e-08, "loss": 3.7514, "step": 3479000 }, { "epoch": 38.64, "learning_rate": 2.8396597294716035e-08, "loss": 3.7473, "step": 3479500 }, { "epoch": 38.65, "learning_rate": 2.838271550096617e-08, "loss": 3.7356, "step": 3480000 }, { "epoch": 38.65, "learning_rate": 2.836883370721631e-08, "loss": 3.7608, "step": 3480500 }, { "epoch": 38.66, "learning_rate": 2.8354951913466452e-08, "loss": 3.7694, "step": 3481000 }, { "epoch": 38.66, "learning_rate": 2.8341070119716587e-08, "loss": 3.7454, "step": 3481500 }, { "epoch": 38.67, "learning_rate": 2.8327188325966726e-08, "loss": 3.7566, "step": 3482000 }, { "epoch": 38.67, "learning_rate": 2.8313306532216862e-08, "loss": 3.743, "step": 3482500 }, { "epoch": 38.68, "learning_rate": 2.8299424738467004e-08, "loss": 3.745, "step": 3483000 }, { "epoch": 38.69, "learning_rate": 2.8285542944717143e-08, "loss": 3.7596, "step": 3483500 }, { "epoch": 38.69, "learning_rate": 2.827166115096728e-08, "loss": 3.7651, "step": 3484000 }, { "epoch": 38.7, "learning_rate": 2.825777935721742e-08, "loss": 3.752, "step": 3484500 }, { "epoch": 38.7, "learning_rate": 2.824389756346756e-08, "loss": 3.746, "step": 3485000 }, { "epoch": 38.71, "learning_rate": 2.8230015769717696e-08, "loss": 3.7606, "step": 3485500 }, { "epoch": 38.71, "learning_rate": 2.8216133975967838e-08, "loss": 3.7707, "step": 3486000 }, { "epoch": 38.72, "learning_rate": 2.8202252182217977e-08, "loss": 3.742, "step": 3486500 }, { "epoch": 38.72, "learning_rate": 2.8188370388468113e-08, "loss": 3.745, "step": 3487000 }, { "epoch": 38.73, "learning_rate": 2.8174488594718255e-08, "loss": 3.7541, "step": 3487500 }, { "epoch": 38.74, "learning_rate": 2.8160606800968394e-08, "loss": 3.7565, "step": 3488000 }, { "epoch": 38.74, "learning_rate": 2.814672500721853e-08, "loss": 3.7212, "step": 3488500 }, { "epoch": 38.75, "learning_rate": 2.8132843213468672e-08, "loss": 3.7444, "step": 3489000 }, { "epoch": 38.75, "learning_rate": 2.811896141971881e-08, "loss": 3.75, "step": 3489500 }, { "epoch": 38.76, "learning_rate": 2.8105079625968946e-08, "loss": 3.7609, "step": 3490000 }, { "epoch": 38.76, "learning_rate": 2.809119783221909e-08, "loss": 3.7626, "step": 3490500 }, { "epoch": 38.77, "learning_rate": 2.8077316038469224e-08, "loss": 3.7413, "step": 3491000 }, { "epoch": 38.77, "learning_rate": 2.8063434244719363e-08, "loss": 3.739, "step": 3491500 }, { "epoch": 38.78, "learning_rate": 2.8049552450969505e-08, "loss": 3.7489, "step": 3492000 }, { "epoch": 38.79, "learning_rate": 2.803567065721964e-08, "loss": 3.7706, "step": 3492500 }, { "epoch": 38.79, "learning_rate": 2.802178886346978e-08, "loss": 3.7609, "step": 3493000 }, { "epoch": 38.8, "learning_rate": 2.8007907069719922e-08, "loss": 3.7575, "step": 3493500 }, { "epoch": 38.8, "learning_rate": 2.7994025275970058e-08, "loss": 3.7854, "step": 3494000 }, { "epoch": 38.81, "learning_rate": 2.7980143482220197e-08, "loss": 3.7427, "step": 3494500 }, { "epoch": 38.81, "learning_rate": 2.796626168847034e-08, "loss": 3.7524, "step": 3495000 }, { "epoch": 38.82, "learning_rate": 2.7952379894720475e-08, "loss": 3.7357, "step": 3495500 }, { "epoch": 38.82, "learning_rate": 2.7938498100970614e-08, "loss": 3.7485, "step": 3496000 }, { "epoch": 38.83, "learning_rate": 2.7924616307220756e-08, "loss": 3.7661, "step": 3496500 }, { "epoch": 38.84, "learning_rate": 2.7910734513470892e-08, "loss": 3.7673, "step": 3497000 }, { "epoch": 38.84, "learning_rate": 2.789685271972103e-08, "loss": 3.7539, "step": 3497500 }, { "epoch": 38.85, "learning_rate": 2.7882970925971166e-08, "loss": 3.7616, "step": 3498000 }, { "epoch": 38.85, "learning_rate": 2.786908913222131e-08, "loss": 3.7651, "step": 3498500 }, { "epoch": 38.86, "learning_rate": 2.7855207338471448e-08, "loss": 3.7554, "step": 3499000 }, { "epoch": 38.86, "learning_rate": 2.7841325544721583e-08, "loss": 3.7668, "step": 3499500 }, { "epoch": 38.87, "learning_rate": 2.7827443750971725e-08, "loss": 3.7682, "step": 3500000 }, { "epoch": 38.87, "learning_rate": 2.7813561957221864e-08, "loss": 3.7428, "step": 3500500 }, { "epoch": 38.88, "learning_rate": 2.7799680163472e-08, "loss": 3.7513, "step": 3501000 }, { "epoch": 38.89, "learning_rate": 2.7785798369722142e-08, "loss": 3.7702, "step": 3501500 }, { "epoch": 38.89, "learning_rate": 2.777191657597228e-08, "loss": 3.7647, "step": 3502000 }, { "epoch": 38.9, "learning_rate": 2.7758034782222417e-08, "loss": 3.7353, "step": 3502500 }, { "epoch": 38.9, "learning_rate": 2.774415298847256e-08, "loss": 3.7527, "step": 3503000 }, { "epoch": 38.91, "learning_rate": 2.7730271194722698e-08, "loss": 3.7537, "step": 3503500 }, { "epoch": 38.91, "learning_rate": 2.7716389400972834e-08, "loss": 3.7481, "step": 3504000 }, { "epoch": 38.92, "learning_rate": 2.7702507607222976e-08, "loss": 3.7445, "step": 3504500 }, { "epoch": 38.92, "learning_rate": 2.768862581347311e-08, "loss": 3.7493, "step": 3505000 }, { "epoch": 38.93, "learning_rate": 2.767474401972325e-08, "loss": 3.7441, "step": 3505500 }, { "epoch": 38.94, "learning_rate": 2.7660862225973393e-08, "loss": 3.7656, "step": 3506000 }, { "epoch": 38.94, "learning_rate": 2.7646980432223525e-08, "loss": 3.7527, "step": 3506500 }, { "epoch": 38.95, "learning_rate": 2.7633098638473667e-08, "loss": 3.7352, "step": 3507000 }, { "epoch": 38.95, "learning_rate": 2.761921684472381e-08, "loss": 3.7693, "step": 3507500 }, { "epoch": 38.96, "learning_rate": 2.7605335050973942e-08, "loss": 3.7429, "step": 3508000 }, { "epoch": 38.96, "learning_rate": 2.7591453257224084e-08, "loss": 3.7384, "step": 3508500 }, { "epoch": 38.97, "learning_rate": 2.7577571463474227e-08, "loss": 3.748, "step": 3509000 }, { "epoch": 38.97, "learning_rate": 2.756368966972436e-08, "loss": 3.7727, "step": 3509500 }, { "epoch": 38.98, "learning_rate": 2.75498078759745e-08, "loss": 3.7509, "step": 3510000 }, { "epoch": 38.99, "learning_rate": 2.7535926082224643e-08, "loss": 3.7379, "step": 3510500 }, { "epoch": 38.99, "learning_rate": 2.7522044288474776e-08, "loss": 3.7386, "step": 3511000 }, { "epoch": 39.0, "learning_rate": 2.7508162494724918e-08, "loss": 3.7521, "step": 3511500 }, { "epoch": 39.0, "eval_loss": 3.82729172706604, "eval_runtime": 6.3024, "eval_samples_per_second": 246.573, "step": 3511794 }, { "epoch": 39.0, "learning_rate": 2.7494280700975054e-08, "loss": 3.7364, "step": 3512000 }, { "epoch": 39.01, "learning_rate": 2.7480398907225193e-08, "loss": 3.7451, "step": 3512500 }, { "epoch": 39.01, "learning_rate": 2.7466517113475335e-08, "loss": 3.7426, "step": 3513000 }, { "epoch": 39.02, "learning_rate": 2.745263531972547e-08, "loss": 3.764, "step": 3513500 }, { "epoch": 39.02, "learning_rate": 2.743875352597561e-08, "loss": 3.7632, "step": 3514000 }, { "epoch": 39.03, "learning_rate": 2.7424871732225752e-08, "loss": 3.7508, "step": 3514500 }, { "epoch": 39.04, "learning_rate": 2.7410989938475887e-08, "loss": 3.7539, "step": 3515000 }, { "epoch": 39.04, "learning_rate": 2.7397108144726026e-08, "loss": 3.7615, "step": 3515500 }, { "epoch": 39.05, "learning_rate": 2.738322635097617e-08, "loss": 3.7502, "step": 3516000 }, { "epoch": 39.05, "learning_rate": 2.7369344557226304e-08, "loss": 3.7465, "step": 3516500 }, { "epoch": 39.06, "learning_rate": 2.7355462763476443e-08, "loss": 3.7516, "step": 3517000 }, { "epoch": 39.06, "learning_rate": 2.7341580969726586e-08, "loss": 3.7589, "step": 3517500 }, { "epoch": 39.07, "learning_rate": 2.732769917597672e-08, "loss": 3.7642, "step": 3518000 }, { "epoch": 39.07, "learning_rate": 2.731381738222686e-08, "loss": 3.7521, "step": 3518500 }, { "epoch": 39.08, "learning_rate": 2.7299935588477002e-08, "loss": 3.7461, "step": 3519000 }, { "epoch": 39.09, "learning_rate": 2.7286053794727138e-08, "loss": 3.7551, "step": 3519500 }, { "epoch": 39.09, "learning_rate": 2.7272172000977277e-08, "loss": 3.7707, "step": 3520000 }, { "epoch": 39.1, "learning_rate": 2.7258290207227413e-08, "loss": 3.7479, "step": 3520500 }, { "epoch": 39.1, "learning_rate": 2.7244408413477555e-08, "loss": 3.7665, "step": 3521000 }, { "epoch": 39.11, "learning_rate": 2.7230526619727694e-08, "loss": 3.7669, "step": 3521500 }, { "epoch": 39.11, "learning_rate": 2.721664482597783e-08, "loss": 3.731, "step": 3522000 }, { "epoch": 39.12, "learning_rate": 2.7202763032227972e-08, "loss": 3.7581, "step": 3522500 }, { "epoch": 39.12, "learning_rate": 2.718888123847811e-08, "loss": 3.7739, "step": 3523000 }, { "epoch": 39.13, "learning_rate": 2.7174999444728246e-08, "loss": 3.7391, "step": 3523500 }, { "epoch": 39.14, "learning_rate": 2.716111765097839e-08, "loss": 3.7496, "step": 3524000 }, { "epoch": 39.14, "learning_rate": 2.7147235857228528e-08, "loss": 3.7448, "step": 3524500 }, { "epoch": 39.15, "learning_rate": 2.7133354063478663e-08, "loss": 3.7557, "step": 3525000 }, { "epoch": 39.15, "learning_rate": 2.7119472269728806e-08, "loss": 3.734, "step": 3525500 }, { "epoch": 39.16, "learning_rate": 2.7105590475978944e-08, "loss": 3.7516, "step": 3526000 }, { "epoch": 39.16, "learning_rate": 2.709170868222908e-08, "loss": 3.7597, "step": 3526500 }, { "epoch": 39.17, "learning_rate": 2.7077826888479222e-08, "loss": 3.7613, "step": 3527000 }, { "epoch": 39.17, "learning_rate": 2.7063945094729358e-08, "loss": 3.7499, "step": 3527500 }, { "epoch": 39.18, "learning_rate": 2.7050063300979497e-08, "loss": 3.7534, "step": 3528000 }, { "epoch": 39.19, "learning_rate": 2.703618150722964e-08, "loss": 3.7728, "step": 3528500 }, { "epoch": 39.19, "learning_rate": 2.7022299713479775e-08, "loss": 3.7418, "step": 3529000 }, { "epoch": 39.2, "learning_rate": 2.7008417919729914e-08, "loss": 3.7534, "step": 3529500 }, { "epoch": 39.2, "learning_rate": 2.6994536125980056e-08, "loss": 3.7543, "step": 3530000 }, { "epoch": 39.21, "learning_rate": 2.6980654332230192e-08, "loss": 3.7337, "step": 3530500 }, { "epoch": 39.21, "learning_rate": 2.696677253848033e-08, "loss": 3.7574, "step": 3531000 }, { "epoch": 39.22, "learning_rate": 2.6952890744730473e-08, "loss": 3.7431, "step": 3531500 }, { "epoch": 39.22, "learning_rate": 2.693900895098061e-08, "loss": 3.7423, "step": 3532000 }, { "epoch": 39.23, "learning_rate": 2.6925127157230748e-08, "loss": 3.7417, "step": 3532500 }, { "epoch": 39.24, "learning_rate": 2.691124536348089e-08, "loss": 3.7629, "step": 3533000 }, { "epoch": 39.24, "learning_rate": 2.6897363569731026e-08, "loss": 3.7343, "step": 3533500 }, { "epoch": 39.25, "learning_rate": 2.6883481775981164e-08, "loss": 3.7511, "step": 3534000 }, { "epoch": 39.25, "learning_rate": 2.68695999822313e-08, "loss": 3.7429, "step": 3534500 }, { "epoch": 39.26, "learning_rate": 2.6855718188481442e-08, "loss": 3.7562, "step": 3535000 }, { "epoch": 39.26, "learning_rate": 2.684183639473158e-08, "loss": 3.752, "step": 3535500 }, { "epoch": 39.27, "learning_rate": 2.6827954600981717e-08, "loss": 3.7493, "step": 3536000 }, { "epoch": 39.27, "learning_rate": 2.681407280723186e-08, "loss": 3.7525, "step": 3536500 }, { "epoch": 39.28, "learning_rate": 2.6800191013481998e-08, "loss": 3.7383, "step": 3537000 }, { "epoch": 39.29, "learning_rate": 2.6786309219732134e-08, "loss": 3.7495, "step": 3537500 }, { "epoch": 39.29, "learning_rate": 2.6772427425982276e-08, "loss": 3.7643, "step": 3538000 }, { "epoch": 39.3, "learning_rate": 2.6758545632232415e-08, "loss": 3.7354, "step": 3538500 }, { "epoch": 39.3, "learning_rate": 2.674466383848255e-08, "loss": 3.7526, "step": 3539000 }, { "epoch": 39.31, "learning_rate": 2.6730782044732693e-08, "loss": 3.7405, "step": 3539500 }, { "epoch": 39.31, "learning_rate": 2.6716900250982832e-08, "loss": 3.7404, "step": 3540000 }, { "epoch": 39.32, "learning_rate": 2.6703018457232968e-08, "loss": 3.7521, "step": 3540500 }, { "epoch": 39.32, "learning_rate": 2.668913666348311e-08, "loss": 3.728, "step": 3541000 }, { "epoch": 39.33, "learning_rate": 2.6675254869733242e-08, "loss": 3.7332, "step": 3541500 }, { "epoch": 39.34, "learning_rate": 2.6661373075983384e-08, "loss": 3.7437, "step": 3542000 }, { "epoch": 39.34, "learning_rate": 2.6647491282233527e-08, "loss": 3.7499, "step": 3542500 }, { "epoch": 39.35, "learning_rate": 2.663360948848366e-08, "loss": 3.7398, "step": 3543000 }, { "epoch": 39.35, "learning_rate": 2.66197276947338e-08, "loss": 3.7479, "step": 3543500 }, { "epoch": 39.36, "learning_rate": 2.6605845900983944e-08, "loss": 3.7543, "step": 3544000 }, { "epoch": 39.36, "learning_rate": 2.6591964107234076e-08, "loss": 3.7407, "step": 3544500 }, { "epoch": 39.37, "learning_rate": 2.6578082313484218e-08, "loss": 3.7523, "step": 3545000 }, { "epoch": 39.37, "learning_rate": 2.656420051973436e-08, "loss": 3.7489, "step": 3545500 }, { "epoch": 39.38, "learning_rate": 2.6550318725984493e-08, "loss": 3.7331, "step": 3546000 }, { "epoch": 39.39, "learning_rate": 2.6536436932234635e-08, "loss": 3.7488, "step": 3546500 }, { "epoch": 39.39, "learning_rate": 2.6522555138484777e-08, "loss": 3.7547, "step": 3547000 }, { "epoch": 39.4, "learning_rate": 2.650867334473491e-08, "loss": 3.7471, "step": 3547500 }, { "epoch": 39.4, "learning_rate": 2.6494791550985052e-08, "loss": 3.7585, "step": 3548000 }, { "epoch": 39.41, "learning_rate": 2.6480909757235188e-08, "loss": 3.7486, "step": 3548500 }, { "epoch": 39.41, "learning_rate": 2.6467027963485327e-08, "loss": 3.7359, "step": 3549000 }, { "epoch": 39.42, "learning_rate": 2.645314616973547e-08, "loss": 3.7748, "step": 3549500 }, { "epoch": 39.42, "learning_rate": 2.6439264375985604e-08, "loss": 3.7561, "step": 3550000 }, { "epoch": 39.43, "learning_rate": 2.6425382582235743e-08, "loss": 3.7611, "step": 3550500 }, { "epoch": 39.44, "learning_rate": 2.6411500788485886e-08, "loss": 3.7402, "step": 3551000 }, { "epoch": 39.44, "learning_rate": 2.639761899473602e-08, "loss": 3.7467, "step": 3551500 }, { "epoch": 39.45, "learning_rate": 2.638373720098616e-08, "loss": 3.7491, "step": 3552000 }, { "epoch": 39.45, "learning_rate": 2.6369855407236303e-08, "loss": 3.7595, "step": 3552500 }, { "epoch": 39.46, "learning_rate": 2.6355973613486438e-08, "loss": 3.7478, "step": 3553000 }, { "epoch": 39.46, "learning_rate": 2.6342091819736577e-08, "loss": 3.7376, "step": 3553500 }, { "epoch": 39.47, "learning_rate": 2.632821002598672e-08, "loss": 3.7532, "step": 3554000 }, { "epoch": 39.47, "learning_rate": 2.6314328232236855e-08, "loss": 3.7675, "step": 3554500 }, { "epoch": 39.48, "learning_rate": 2.6300446438486994e-08, "loss": 3.7457, "step": 3555000 }, { "epoch": 39.49, "learning_rate": 2.6286564644737136e-08, "loss": 3.7635, "step": 3555500 }, { "epoch": 39.49, "learning_rate": 2.6272682850987272e-08, "loss": 3.7608, "step": 3556000 }, { "epoch": 39.5, "learning_rate": 2.625880105723741e-08, "loss": 3.7744, "step": 3556500 }, { "epoch": 39.5, "learning_rate": 2.6244919263487546e-08, "loss": 3.752, "step": 3557000 }, { "epoch": 39.51, "learning_rate": 2.623103746973769e-08, "loss": 3.7555, "step": 3557500 }, { "epoch": 39.51, "learning_rate": 2.6217155675987828e-08, "loss": 3.7638, "step": 3558000 }, { "epoch": 39.52, "learning_rate": 2.6203273882237963e-08, "loss": 3.77, "step": 3558500 }, { "epoch": 39.52, "learning_rate": 2.6189392088488106e-08, "loss": 3.7632, "step": 3559000 }, { "epoch": 39.53, "learning_rate": 2.6175510294738245e-08, "loss": 3.7377, "step": 3559500 }, { "epoch": 39.54, "learning_rate": 2.616162850098838e-08, "loss": 3.7491, "step": 3560000 }, { "epoch": 39.54, "learning_rate": 2.6147746707238522e-08, "loss": 3.7468, "step": 3560500 }, { "epoch": 39.55, "learning_rate": 2.613386491348866e-08, "loss": 3.7298, "step": 3561000 }, { "epoch": 39.55, "learning_rate": 2.6119983119738797e-08, "loss": 3.7588, "step": 3561500 }, { "epoch": 39.56, "learning_rate": 2.610610132598894e-08, "loss": 3.7634, "step": 3562000 }, { "epoch": 39.56, "learning_rate": 2.6092219532239078e-08, "loss": 3.7626, "step": 3562500 }, { "epoch": 39.57, "learning_rate": 2.6078337738489214e-08, "loss": 3.7645, "step": 3563000 }, { "epoch": 39.57, "learning_rate": 2.6064455944739356e-08, "loss": 3.7373, "step": 3563500 }, { "epoch": 39.58, "learning_rate": 2.6050574150989492e-08, "loss": 3.7687, "step": 3564000 }, { "epoch": 39.59, "learning_rate": 2.603669235723963e-08, "loss": 3.7589, "step": 3564500 }, { "epoch": 39.59, "learning_rate": 2.6022810563489773e-08, "loss": 3.75, "step": 3565000 }, { "epoch": 39.6, "learning_rate": 2.600892876973991e-08, "loss": 3.7374, "step": 3565500 }, { "epoch": 39.6, "learning_rate": 2.5995046975990048e-08, "loss": 3.7585, "step": 3566000 }, { "epoch": 39.61, "learning_rate": 2.598116518224019e-08, "loss": 3.7565, "step": 3566500 }, { "epoch": 39.61, "learning_rate": 2.5967283388490326e-08, "loss": 3.7653, "step": 3567000 }, { "epoch": 39.62, "learning_rate": 2.5953401594740465e-08, "loss": 3.7511, "step": 3567500 }, { "epoch": 39.62, "learning_rate": 2.5939519800990607e-08, "loss": 3.7425, "step": 3568000 }, { "epoch": 39.63, "learning_rate": 2.5925638007240742e-08, "loss": 3.7602, "step": 3568500 }, { "epoch": 39.64, "learning_rate": 2.591175621349088e-08, "loss": 3.74, "step": 3569000 }, { "epoch": 39.64, "learning_rate": 2.5897874419741024e-08, "loss": 3.7633, "step": 3569500 }, { "epoch": 39.65, "learning_rate": 2.588399262599116e-08, "loss": 3.7459, "step": 3570000 }, { "epoch": 39.65, "learning_rate": 2.5870110832241298e-08, "loss": 3.7427, "step": 3570500 }, { "epoch": 39.66, "learning_rate": 2.5856229038491434e-08, "loss": 3.7378, "step": 3571000 }, { "epoch": 39.66, "learning_rate": 2.5842347244741576e-08, "loss": 3.7611, "step": 3571500 }, { "epoch": 39.67, "learning_rate": 2.5828465450991715e-08, "loss": 3.7641, "step": 3572000 }, { "epoch": 39.67, "learning_rate": 2.581458365724185e-08, "loss": 3.7389, "step": 3572500 }, { "epoch": 39.68, "learning_rate": 2.5800701863491993e-08, "loss": 3.7494, "step": 3573000 }, { "epoch": 39.69, "learning_rate": 2.5786820069742132e-08, "loss": 3.7645, "step": 3573500 }, { "epoch": 39.69, "learning_rate": 2.5772938275992268e-08, "loss": 3.7459, "step": 3574000 }, { "epoch": 39.7, "learning_rate": 2.575905648224241e-08, "loss": 3.7464, "step": 3574500 }, { "epoch": 39.7, "learning_rate": 2.574517468849255e-08, "loss": 3.7359, "step": 3575000 }, { "epoch": 39.71, "learning_rate": 2.5731292894742685e-08, "loss": 3.735, "step": 3575500 }, { "epoch": 39.71, "learning_rate": 2.5717411100992827e-08, "loss": 3.7456, "step": 3576000 }, { "epoch": 39.72, "learning_rate": 2.5703529307242966e-08, "loss": 3.7683, "step": 3576500 }, { "epoch": 39.72, "learning_rate": 2.56896475134931e-08, "loss": 3.7525, "step": 3577000 }, { "epoch": 39.73, "learning_rate": 2.5675765719743244e-08, "loss": 3.7698, "step": 3577500 }, { "epoch": 39.74, "learning_rate": 2.5661883925993376e-08, "loss": 3.7659, "step": 3578000 }, { "epoch": 39.74, "learning_rate": 2.5648002132243518e-08, "loss": 3.7428, "step": 3578500 }, { "epoch": 39.75, "learning_rate": 2.5634120338493657e-08, "loss": 3.7521, "step": 3579000 }, { "epoch": 39.75, "learning_rate": 2.5620238544743793e-08, "loss": 3.7483, "step": 3579500 }, { "epoch": 39.76, "learning_rate": 2.5606356750993935e-08, "loss": 3.762, "step": 3580000 }, { "epoch": 39.76, "learning_rate": 2.5592474957244074e-08, "loss": 3.7581, "step": 3580500 }, { "epoch": 39.77, "learning_rate": 2.557859316349421e-08, "loss": 3.7611, "step": 3581000 }, { "epoch": 39.77, "learning_rate": 2.5564711369744352e-08, "loss": 3.7358, "step": 3581500 }, { "epoch": 39.78, "learning_rate": 2.555082957599449e-08, "loss": 3.7613, "step": 3582000 }, { "epoch": 39.79, "learning_rate": 2.5536947782244627e-08, "loss": 3.7352, "step": 3582500 }, { "epoch": 39.79, "learning_rate": 2.552306598849477e-08, "loss": 3.7403, "step": 3583000 }, { "epoch": 39.8, "learning_rate": 2.5509184194744908e-08, "loss": 3.7634, "step": 3583500 }, { "epoch": 39.8, "learning_rate": 2.5495302400995043e-08, "loss": 3.7427, "step": 3584000 }, { "epoch": 39.81, "learning_rate": 2.5481420607245186e-08, "loss": 3.7565, "step": 3584500 }, { "epoch": 39.81, "learning_rate": 2.5467538813495325e-08, "loss": 3.7362, "step": 3585000 }, { "epoch": 39.82, "learning_rate": 2.545365701974546e-08, "loss": 3.7578, "step": 3585500 }, { "epoch": 39.82, "learning_rate": 2.5439775225995603e-08, "loss": 3.768, "step": 3586000 }, { "epoch": 39.83, "learning_rate": 2.5425893432245738e-08, "loss": 3.7456, "step": 3586500 }, { "epoch": 39.84, "learning_rate": 2.5412011638495877e-08, "loss": 3.7613, "step": 3587000 }, { "epoch": 39.84, "learning_rate": 2.539812984474602e-08, "loss": 3.7482, "step": 3587500 }, { "epoch": 39.85, "learning_rate": 2.5384248050996155e-08, "loss": 3.7567, "step": 3588000 }, { "epoch": 39.85, "learning_rate": 2.5370366257246294e-08, "loss": 3.7471, "step": 3588500 }, { "epoch": 39.86, "learning_rate": 2.5356484463496436e-08, "loss": 3.7614, "step": 3589000 }, { "epoch": 39.86, "learning_rate": 2.5342602669746572e-08, "loss": 3.7504, "step": 3589500 }, { "epoch": 39.87, "learning_rate": 2.532872087599671e-08, "loss": 3.7457, "step": 3590000 }, { "epoch": 39.87, "learning_rate": 2.5314839082246853e-08, "loss": 3.7433, "step": 3590500 }, { "epoch": 39.88, "learning_rate": 2.530095728849699e-08, "loss": 3.7718, "step": 3591000 }, { "epoch": 39.89, "learning_rate": 2.5287075494747128e-08, "loss": 3.749, "step": 3591500 }, { "epoch": 39.89, "learning_rate": 2.527319370099727e-08, "loss": 3.746, "step": 3592000 }, { "epoch": 39.9, "learning_rate": 2.5259311907247406e-08, "loss": 3.7566, "step": 3592500 }, { "epoch": 39.9, "learning_rate": 2.5245430113497545e-08, "loss": 3.7559, "step": 3593000 }, { "epoch": 39.91, "learning_rate": 2.523154831974768e-08, "loss": 3.7397, "step": 3593500 }, { "epoch": 39.91, "learning_rate": 2.5217666525997823e-08, "loss": 3.7608, "step": 3594000 }, { "epoch": 39.92, "learning_rate": 2.520378473224796e-08, "loss": 3.755, "step": 3594500 }, { "epoch": 39.92, "learning_rate": 2.5189902938498097e-08, "loss": 3.7598, "step": 3595000 }, { "epoch": 39.93, "learning_rate": 2.517602114474824e-08, "loss": 3.7525, "step": 3595500 }, { "epoch": 39.94, "learning_rate": 2.516213935099838e-08, "loss": 3.7196, "step": 3596000 }, { "epoch": 39.94, "learning_rate": 2.5148257557248514e-08, "loss": 3.7425, "step": 3596500 }, { "epoch": 39.95, "learning_rate": 2.5134375763498656e-08, "loss": 3.7719, "step": 3597000 }, { "epoch": 39.95, "learning_rate": 2.5120493969748795e-08, "loss": 3.7451, "step": 3597500 }, { "epoch": 39.96, "learning_rate": 2.510661217599893e-08, "loss": 3.7648, "step": 3598000 }, { "epoch": 39.96, "learning_rate": 2.5092730382249073e-08, "loss": 3.7464, "step": 3598500 }, { "epoch": 39.97, "learning_rate": 2.5078848588499212e-08, "loss": 3.7555, "step": 3599000 }, { "epoch": 39.97, "learning_rate": 2.5064966794749348e-08, "loss": 3.7586, "step": 3599500 }, { "epoch": 39.98, "learning_rate": 2.505108500099949e-08, "loss": 3.7495, "step": 3600000 }, { "epoch": 39.99, "learning_rate": 2.5037203207249626e-08, "loss": 3.767, "step": 3600500 }, { "epoch": 39.99, "learning_rate": 2.5023321413499765e-08, "loss": 3.7543, "step": 3601000 }, { "epoch": 40.0, "learning_rate": 2.5009439619749907e-08, "loss": 3.7413, "step": 3601500 }, { "epoch": 40.0, "eval_loss": 3.8263349533081055, "eval_runtime": 6.3013, "eval_samples_per_second": 246.615, "step": 3601840 }, { "epoch": 40.0, "learning_rate": 2.4995557826000043e-08, "loss": 3.7471, "step": 3602000 }, { "epoch": 40.01, "learning_rate": 2.498167603225018e-08, "loss": 3.7482, "step": 3602500 }, { "epoch": 40.01, "learning_rate": 2.496779423850032e-08, "loss": 3.7522, "step": 3603000 }, { "epoch": 40.02, "learning_rate": 2.495391244475046e-08, "loss": 3.7459, "step": 3603500 }, { "epoch": 40.02, "learning_rate": 2.49400306510006e-08, "loss": 3.748, "step": 3604000 }, { "epoch": 40.03, "learning_rate": 2.4926148857250737e-08, "loss": 3.7795, "step": 3604500 }, { "epoch": 40.04, "learning_rate": 2.4912267063500876e-08, "loss": 3.7414, "step": 3605000 }, { "epoch": 40.04, "learning_rate": 2.4898385269751015e-08, "loss": 3.7623, "step": 3605500 }, { "epoch": 40.05, "learning_rate": 2.4884503476001154e-08, "loss": 3.7607, "step": 3606000 }, { "epoch": 40.05, "learning_rate": 2.4870621682251293e-08, "loss": 3.7598, "step": 3606500 }, { "epoch": 40.06, "learning_rate": 2.4856739888501432e-08, "loss": 3.7501, "step": 3607000 }, { "epoch": 40.06, "learning_rate": 2.484285809475157e-08, "loss": 3.7554, "step": 3607500 }, { "epoch": 40.07, "learning_rate": 2.482897630100171e-08, "loss": 3.7579, "step": 3608000 }, { "epoch": 40.07, "learning_rate": 2.481509450725185e-08, "loss": 3.7378, "step": 3608500 }, { "epoch": 40.08, "learning_rate": 2.4801212713501988e-08, "loss": 3.7683, "step": 3609000 }, { "epoch": 40.09, "learning_rate": 2.4787330919752124e-08, "loss": 3.7673, "step": 3609500 }, { "epoch": 40.09, "learning_rate": 2.4773449126002263e-08, "loss": 3.7278, "step": 3610000 }, { "epoch": 40.1, "learning_rate": 2.4759567332252405e-08, "loss": 3.7523, "step": 3610500 }, { "epoch": 40.1, "learning_rate": 2.474568553850254e-08, "loss": 3.7508, "step": 3611000 }, { "epoch": 40.11, "learning_rate": 2.473180374475268e-08, "loss": 3.7685, "step": 3611500 }, { "epoch": 40.11, "learning_rate": 2.471792195100282e-08, "loss": 3.7623, "step": 3612000 }, { "epoch": 40.12, "learning_rate": 2.4704040157252957e-08, "loss": 3.7518, "step": 3612500 }, { "epoch": 40.12, "learning_rate": 2.4690158363503096e-08, "loss": 3.7546, "step": 3613000 }, { "epoch": 40.13, "learning_rate": 2.4676276569753235e-08, "loss": 3.7676, "step": 3613500 }, { "epoch": 40.14, "learning_rate": 2.4662394776003374e-08, "loss": 3.7538, "step": 3614000 }, { "epoch": 40.14, "learning_rate": 2.4648512982253513e-08, "loss": 3.7411, "step": 3614500 }, { "epoch": 40.15, "learning_rate": 2.4634631188503652e-08, "loss": 3.761, "step": 3615000 }, { "epoch": 40.15, "learning_rate": 2.462074939475379e-08, "loss": 3.7577, "step": 3615500 }, { "epoch": 40.16, "learning_rate": 2.460686760100393e-08, "loss": 3.7587, "step": 3616000 }, { "epoch": 40.16, "learning_rate": 2.459298580725407e-08, "loss": 3.7592, "step": 3616500 }, { "epoch": 40.17, "learning_rate": 2.4579104013504208e-08, "loss": 3.7605, "step": 3617000 }, { "epoch": 40.17, "learning_rate": 2.4565222219754347e-08, "loss": 3.7463, "step": 3617500 }, { "epoch": 40.18, "learning_rate": 2.4551340426004486e-08, "loss": 3.7551, "step": 3618000 }, { "epoch": 40.19, "learning_rate": 2.4537458632254625e-08, "loss": 3.7524, "step": 3618500 }, { "epoch": 40.19, "learning_rate": 2.4523576838504764e-08, "loss": 3.7378, "step": 3619000 }, { "epoch": 40.2, "learning_rate": 2.4509695044754903e-08, "loss": 3.7618, "step": 3619500 }, { "epoch": 40.2, "learning_rate": 2.449581325100504e-08, "loss": 3.7568, "step": 3620000 }, { "epoch": 40.21, "learning_rate": 2.4481931457255177e-08, "loss": 3.7693, "step": 3620500 }, { "epoch": 40.21, "learning_rate": 2.446804966350532e-08, "loss": 3.743, "step": 3621000 }, { "epoch": 40.22, "learning_rate": 2.445416786975546e-08, "loss": 3.7329, "step": 3621500 }, { "epoch": 40.22, "learning_rate": 2.4440286076005594e-08, "loss": 3.7491, "step": 3622000 }, { "epoch": 40.23, "learning_rate": 2.4426404282255736e-08, "loss": 3.7615, "step": 3622500 }, { "epoch": 40.23, "learning_rate": 2.4412522488505875e-08, "loss": 3.7561, "step": 3623000 }, { "epoch": 40.24, "learning_rate": 2.439864069475601e-08, "loss": 3.7551, "step": 3623500 }, { "epoch": 40.25, "learning_rate": 2.438475890100615e-08, "loss": 3.7568, "step": 3624000 }, { "epoch": 40.25, "learning_rate": 2.4370877107256292e-08, "loss": 3.7457, "step": 3624500 }, { "epoch": 40.26, "learning_rate": 2.4356995313506428e-08, "loss": 3.7534, "step": 3625000 }, { "epoch": 40.26, "learning_rate": 2.4343113519756567e-08, "loss": 3.7401, "step": 3625500 }, { "epoch": 40.27, "learning_rate": 2.432923172600671e-08, "loss": 3.752, "step": 3626000 }, { "epoch": 40.27, "learning_rate": 2.4315349932256845e-08, "loss": 3.7561, "step": 3626500 }, { "epoch": 40.28, "learning_rate": 2.4301468138506984e-08, "loss": 3.7625, "step": 3627000 }, { "epoch": 40.28, "learning_rate": 2.4287586344757126e-08, "loss": 3.7336, "step": 3627500 }, { "epoch": 40.29, "learning_rate": 2.427370455100726e-08, "loss": 3.7718, "step": 3628000 }, { "epoch": 40.3, "learning_rate": 2.42598227572574e-08, "loss": 3.7393, "step": 3628500 }, { "epoch": 40.3, "learning_rate": 2.424594096350754e-08, "loss": 3.7511, "step": 3629000 }, { "epoch": 40.31, "learning_rate": 2.423205916975768e-08, "loss": 3.7438, "step": 3629500 }, { "epoch": 40.31, "learning_rate": 2.4218177376007817e-08, "loss": 3.7411, "step": 3630000 }, { "epoch": 40.32, "learning_rate": 2.4204295582257956e-08, "loss": 3.765, "step": 3630500 }, { "epoch": 40.32, "learning_rate": 2.4190413788508095e-08, "loss": 3.7321, "step": 3631000 }, { "epoch": 40.33, "learning_rate": 2.4176531994758234e-08, "loss": 3.7451, "step": 3631500 }, { "epoch": 40.33, "learning_rate": 2.4162650201008373e-08, "loss": 3.7369, "step": 3632000 }, { "epoch": 40.34, "learning_rate": 2.414876840725851e-08, "loss": 3.76, "step": 3632500 }, { "epoch": 40.35, "learning_rate": 2.413488661350865e-08, "loss": 3.7482, "step": 3633000 }, { "epoch": 40.35, "learning_rate": 2.412100481975879e-08, "loss": 3.7557, "step": 3633500 }, { "epoch": 40.36, "learning_rate": 2.4107123026008926e-08, "loss": 3.7295, "step": 3634000 }, { "epoch": 40.36, "learning_rate": 2.4093241232259068e-08, "loss": 3.7425, "step": 3634500 }, { "epoch": 40.37, "learning_rate": 2.4079359438509207e-08, "loss": 3.7603, "step": 3635000 }, { "epoch": 40.37, "learning_rate": 2.4065477644759343e-08, "loss": 3.7547, "step": 3635500 }, { "epoch": 40.38, "learning_rate": 2.405159585100948e-08, "loss": 3.7487, "step": 3636000 }, { "epoch": 40.38, "learning_rate": 2.4037714057259624e-08, "loss": 3.7447, "step": 3636500 }, { "epoch": 40.39, "learning_rate": 2.402383226350976e-08, "loss": 3.7536, "step": 3637000 }, { "epoch": 40.4, "learning_rate": 2.40099504697599e-08, "loss": 3.7613, "step": 3637500 }, { "epoch": 40.4, "learning_rate": 2.399606867601004e-08, "loss": 3.7338, "step": 3638000 }, { "epoch": 40.41, "learning_rate": 2.3982186882260176e-08, "loss": 3.752, "step": 3638500 }, { "epoch": 40.41, "learning_rate": 2.3968305088510315e-08, "loss": 3.7721, "step": 3639000 }, { "epoch": 40.42, "learning_rate": 2.3954423294760454e-08, "loss": 3.7477, "step": 3639500 }, { "epoch": 40.42, "learning_rate": 2.3940541501010593e-08, "loss": 3.751, "step": 3640000 }, { "epoch": 40.43, "learning_rate": 2.3926659707260732e-08, "loss": 3.7506, "step": 3640500 }, { "epoch": 40.43, "learning_rate": 2.391277791351087e-08, "loss": 3.76, "step": 3641000 }, { "epoch": 40.44, "learning_rate": 2.389889611976101e-08, "loss": 3.7506, "step": 3641500 }, { "epoch": 40.45, "learning_rate": 2.388501432601115e-08, "loss": 3.7783, "step": 3642000 }, { "epoch": 40.45, "learning_rate": 2.3871132532261288e-08, "loss": 3.7562, "step": 3642500 }, { "epoch": 40.46, "learning_rate": 2.3857250738511424e-08, "loss": 3.731, "step": 3643000 }, { "epoch": 40.46, "learning_rate": 2.3843368944761566e-08, "loss": 3.7573, "step": 3643500 }, { "epoch": 40.47, "learning_rate": 2.3829487151011705e-08, "loss": 3.7492, "step": 3644000 }, { "epoch": 40.47, "learning_rate": 2.381560535726184e-08, "loss": 3.7654, "step": 3644500 }, { "epoch": 40.48, "learning_rate": 2.3801723563511983e-08, "loss": 3.749, "step": 3645000 }, { "epoch": 40.48, "learning_rate": 2.3787841769762122e-08, "loss": 3.7503, "step": 3645500 }, { "epoch": 40.49, "learning_rate": 2.3773959976012257e-08, "loss": 3.7483, "step": 3646000 }, { "epoch": 40.5, "learning_rate": 2.3760078182262396e-08, "loss": 3.7522, "step": 3646500 }, { "epoch": 40.5, "learning_rate": 2.374619638851254e-08, "loss": 3.7469, "step": 3647000 }, { "epoch": 40.51, "learning_rate": 2.3732314594762674e-08, "loss": 3.7486, "step": 3647500 }, { "epoch": 40.51, "learning_rate": 2.3718432801012813e-08, "loss": 3.764, "step": 3648000 }, { "epoch": 40.52, "learning_rate": 2.3704551007262955e-08, "loss": 3.7301, "step": 3648500 }, { "epoch": 40.52, "learning_rate": 2.369066921351309e-08, "loss": 3.7509, "step": 3649000 }, { "epoch": 40.53, "learning_rate": 2.367678741976323e-08, "loss": 3.7427, "step": 3649500 }, { "epoch": 40.53, "learning_rate": 2.366290562601337e-08, "loss": 3.7599, "step": 3650000 }, { "epoch": 40.54, "learning_rate": 2.3649023832263508e-08, "loss": 3.7573, "step": 3650500 }, { "epoch": 40.55, "learning_rate": 2.3635142038513647e-08, "loss": 3.7548, "step": 3651000 }, { "epoch": 40.55, "learning_rate": 2.3621260244763786e-08, "loss": 3.725, "step": 3651500 }, { "epoch": 40.56, "learning_rate": 2.3607378451013925e-08, "loss": 3.7511, "step": 3652000 }, { "epoch": 40.56, "learning_rate": 2.3593496657264064e-08, "loss": 3.7554, "step": 3652500 }, { "epoch": 40.57, "learning_rate": 2.3579614863514203e-08, "loss": 3.7447, "step": 3653000 }, { "epoch": 40.57, "learning_rate": 2.3565733069764342e-08, "loss": 3.7581, "step": 3653500 }, { "epoch": 40.58, "learning_rate": 2.355185127601448e-08, "loss": 3.7376, "step": 3654000 }, { "epoch": 40.58, "learning_rate": 2.353796948226462e-08, "loss": 3.7443, "step": 3654500 }, { "epoch": 40.59, "learning_rate": 2.352408768851476e-08, "loss": 3.7425, "step": 3655000 }, { "epoch": 40.6, "learning_rate": 2.3510205894764898e-08, "loss": 3.7513, "step": 3655500 }, { "epoch": 40.6, "learning_rate": 2.3496324101015036e-08, "loss": 3.7469, "step": 3656000 }, { "epoch": 40.61, "learning_rate": 2.3482442307265175e-08, "loss": 3.7767, "step": 3656500 }, { "epoch": 40.61, "learning_rate": 2.346856051351531e-08, "loss": 3.7529, "step": 3657000 }, { "epoch": 40.62, "learning_rate": 2.3454678719765453e-08, "loss": 3.7441, "step": 3657500 }, { "epoch": 40.62, "learning_rate": 2.3440796926015592e-08, "loss": 3.7323, "step": 3658000 }, { "epoch": 40.63, "learning_rate": 2.3426915132265728e-08, "loss": 3.7337, "step": 3658500 }, { "epoch": 40.63, "learning_rate": 2.341303333851587e-08, "loss": 3.755, "step": 3659000 }, { "epoch": 40.64, "learning_rate": 2.339915154476601e-08, "loss": 3.7399, "step": 3659500 }, { "epoch": 40.65, "learning_rate": 2.3385269751016145e-08, "loss": 3.7402, "step": 3660000 }, { "epoch": 40.65, "learning_rate": 2.3371387957266287e-08, "loss": 3.7521, "step": 3660500 }, { "epoch": 40.66, "learning_rate": 2.3357506163516426e-08, "loss": 3.7618, "step": 3661000 }, { "epoch": 40.66, "learning_rate": 2.3343624369766562e-08, "loss": 3.747, "step": 3661500 }, { "epoch": 40.67, "learning_rate": 2.33297425760167e-08, "loss": 3.7524, "step": 3662000 }, { "epoch": 40.67, "learning_rate": 2.3315860782266843e-08, "loss": 3.7458, "step": 3662500 }, { "epoch": 40.68, "learning_rate": 2.330197898851698e-08, "loss": 3.7401, "step": 3663000 }, { "epoch": 40.68, "learning_rate": 2.3288097194767118e-08, "loss": 3.7383, "step": 3663500 }, { "epoch": 40.69, "learning_rate": 2.3274215401017256e-08, "loss": 3.7525, "step": 3664000 }, { "epoch": 40.7, "learning_rate": 2.3260333607267395e-08, "loss": 3.7486, "step": 3664500 }, { "epoch": 40.7, "learning_rate": 2.3246451813517534e-08, "loss": 3.7263, "step": 3665000 }, { "epoch": 40.71, "learning_rate": 2.3232570019767673e-08, "loss": 3.7393, "step": 3665500 }, { "epoch": 40.71, "learning_rate": 2.3218688226017812e-08, "loss": 3.7416, "step": 3666000 }, { "epoch": 40.72, "learning_rate": 2.320480643226795e-08, "loss": 3.7504, "step": 3666500 }, { "epoch": 40.72, "learning_rate": 2.319092463851809e-08, "loss": 3.7592, "step": 3667000 }, { "epoch": 40.73, "learning_rate": 2.317704284476823e-08, "loss": 3.7557, "step": 3667500 }, { "epoch": 40.73, "learning_rate": 2.3163161051018368e-08, "loss": 3.7418, "step": 3668000 }, { "epoch": 40.74, "learning_rate": 2.3149279257268507e-08, "loss": 3.7464, "step": 3668500 }, { "epoch": 40.75, "learning_rate": 2.3135397463518643e-08, "loss": 3.7624, "step": 3669000 }, { "epoch": 40.75, "learning_rate": 2.3121515669768785e-08, "loss": 3.7725, "step": 3669500 }, { "epoch": 40.76, "learning_rate": 2.3107633876018924e-08, "loss": 3.7491, "step": 3670000 }, { "epoch": 40.76, "learning_rate": 2.309375208226906e-08, "loss": 3.7542, "step": 3670500 }, { "epoch": 40.77, "learning_rate": 2.3079870288519202e-08, "loss": 3.7444, "step": 3671000 }, { "epoch": 40.77, "learning_rate": 2.306598849476934e-08, "loss": 3.7429, "step": 3671500 }, { "epoch": 40.78, "learning_rate": 2.3052106701019476e-08, "loss": 3.7363, "step": 3672000 }, { "epoch": 40.78, "learning_rate": 2.3038224907269615e-08, "loss": 3.7357, "step": 3672500 }, { "epoch": 40.79, "learning_rate": 2.3024343113519758e-08, "loss": 3.7491, "step": 3673000 }, { "epoch": 40.8, "learning_rate": 2.3010461319769893e-08, "loss": 3.7528, "step": 3673500 }, { "epoch": 40.8, "learning_rate": 2.2996579526020032e-08, "loss": 3.7521, "step": 3674000 }, { "epoch": 40.81, "learning_rate": 2.2982697732270175e-08, "loss": 3.7462, "step": 3674500 }, { "epoch": 40.81, "learning_rate": 2.296881593852031e-08, "loss": 3.7502, "step": 3675000 }, { "epoch": 40.82, "learning_rate": 2.295493414477045e-08, "loss": 3.7353, "step": 3675500 }, { "epoch": 40.82, "learning_rate": 2.2941052351020588e-08, "loss": 3.7356, "step": 3676000 }, { "epoch": 40.83, "learning_rate": 2.2927170557270727e-08, "loss": 3.7673, "step": 3676500 }, { "epoch": 40.83, "learning_rate": 2.2913288763520866e-08, "loss": 3.7283, "step": 3677000 }, { "epoch": 40.84, "learning_rate": 2.2899406969771005e-08, "loss": 3.7487, "step": 3677500 }, { "epoch": 40.85, "learning_rate": 2.2885525176021144e-08, "loss": 3.7555, "step": 3678000 }, { "epoch": 40.85, "learning_rate": 2.2871643382271283e-08, "loss": 3.7416, "step": 3678500 }, { "epoch": 40.86, "learning_rate": 2.2857761588521422e-08, "loss": 3.7408, "step": 3679000 }, { "epoch": 40.86, "learning_rate": 2.2843879794771557e-08, "loss": 3.7335, "step": 3679500 }, { "epoch": 40.87, "learning_rate": 2.28299980010217e-08, "loss": 3.7644, "step": 3680000 }, { "epoch": 40.87, "learning_rate": 2.281611620727184e-08, "loss": 3.7474, "step": 3680500 }, { "epoch": 40.88, "learning_rate": 2.2802234413521974e-08, "loss": 3.7358, "step": 3681000 }, { "epoch": 40.88, "learning_rate": 2.2788352619772117e-08, "loss": 3.7395, "step": 3681500 }, { "epoch": 40.89, "learning_rate": 2.2774470826022256e-08, "loss": 3.7353, "step": 3682000 }, { "epoch": 40.9, "learning_rate": 2.276058903227239e-08, "loss": 3.7454, "step": 3682500 }, { "epoch": 40.9, "learning_rate": 2.274670723852253e-08, "loss": 3.7577, "step": 3683000 }, { "epoch": 40.91, "learning_rate": 2.2732825444772672e-08, "loss": 3.7287, "step": 3683500 }, { "epoch": 40.91, "learning_rate": 2.2718943651022808e-08, "loss": 3.745, "step": 3684000 }, { "epoch": 40.92, "learning_rate": 2.2705061857272947e-08, "loss": 3.7586, "step": 3684500 }, { "epoch": 40.92, "learning_rate": 2.269118006352309e-08, "loss": 3.7679, "step": 3685000 }, { "epoch": 40.93, "learning_rate": 2.2677298269773225e-08, "loss": 3.7601, "step": 3685500 }, { "epoch": 40.93, "learning_rate": 2.2663416476023364e-08, "loss": 3.7607, "step": 3686000 }, { "epoch": 40.94, "learning_rate": 2.2649534682273503e-08, "loss": 3.7488, "step": 3686500 }, { "epoch": 40.95, "learning_rate": 2.2635652888523642e-08, "loss": 3.7738, "step": 3687000 }, { "epoch": 40.95, "learning_rate": 2.262177109477378e-08, "loss": 3.7532, "step": 3687500 }, { "epoch": 40.96, "learning_rate": 2.260788930102392e-08, "loss": 3.7496, "step": 3688000 }, { "epoch": 40.96, "learning_rate": 2.259400750727406e-08, "loss": 3.7555, "step": 3688500 }, { "epoch": 40.97, "learning_rate": 2.2580125713524198e-08, "loss": 3.7515, "step": 3689000 }, { "epoch": 40.97, "learning_rate": 2.2566243919774337e-08, "loss": 3.7609, "step": 3689500 }, { "epoch": 40.98, "learning_rate": 2.2552362126024476e-08, "loss": 3.7799, "step": 3690000 }, { "epoch": 40.98, "learning_rate": 2.2538480332274614e-08, "loss": 3.7663, "step": 3690500 }, { "epoch": 40.99, "learning_rate": 2.2524598538524753e-08, "loss": 3.7547, "step": 3691000 }, { "epoch": 41.0, "learning_rate": 2.2510716744774892e-08, "loss": 3.7379, "step": 3691500 }, { "epoch": 41.0, "eval_loss": 3.826106548309326, "eval_runtime": 6.3033, "eval_samples_per_second": 246.537, "step": 3691886 }, { "epoch": 41.0, "learning_rate": 2.249683495102503e-08, "loss": 3.7603, "step": 3692000 }, { "epoch": 41.01, "learning_rate": 2.248295315727517e-08, "loss": 3.7643, "step": 3692500 }, { "epoch": 41.01, "learning_rate": 2.246907136352531e-08, "loss": 3.7455, "step": 3693000 }, { "epoch": 41.02, "learning_rate": 2.2455189569775448e-08, "loss": 3.7449, "step": 3693500 }, { "epoch": 41.02, "learning_rate": 2.2441307776025587e-08, "loss": 3.7448, "step": 3694000 }, { "epoch": 41.03, "learning_rate": 2.2427425982275726e-08, "loss": 3.7556, "step": 3694500 }, { "epoch": 41.03, "learning_rate": 2.2413544188525862e-08, "loss": 3.7418, "step": 3695000 }, { "epoch": 41.04, "learning_rate": 2.2399662394776004e-08, "loss": 3.7766, "step": 3695500 }, { "epoch": 41.05, "learning_rate": 2.238578060102614e-08, "loss": 3.7308, "step": 3696000 }, { "epoch": 41.05, "learning_rate": 2.237189880727628e-08, "loss": 3.7448, "step": 3696500 }, { "epoch": 41.06, "learning_rate": 2.235801701352642e-08, "loss": 3.7586, "step": 3697000 }, { "epoch": 41.06, "learning_rate": 2.2344135219776557e-08, "loss": 3.7438, "step": 3697500 }, { "epoch": 41.07, "learning_rate": 2.2330253426026695e-08, "loss": 3.7499, "step": 3698000 }, { "epoch": 41.07, "learning_rate": 2.2316371632276834e-08, "loss": 3.7575, "step": 3698500 }, { "epoch": 41.08, "learning_rate": 2.2302489838526973e-08, "loss": 3.7529, "step": 3699000 }, { "epoch": 41.08, "learning_rate": 2.2288608044777112e-08, "loss": 3.7491, "step": 3699500 }, { "epoch": 41.09, "learning_rate": 2.227472625102725e-08, "loss": 3.7335, "step": 3700000 }, { "epoch": 41.1, "learning_rate": 2.226084445727739e-08, "loss": 3.7453, "step": 3700500 }, { "epoch": 41.1, "learning_rate": 2.224696266352753e-08, "loss": 3.7685, "step": 3701000 }, { "epoch": 41.11, "learning_rate": 2.2233080869777668e-08, "loss": 3.7471, "step": 3701500 }, { "epoch": 41.11, "learning_rate": 2.2219199076027807e-08, "loss": 3.7441, "step": 3702000 }, { "epoch": 41.12, "learning_rate": 2.2205317282277946e-08, "loss": 3.7526, "step": 3702500 }, { "epoch": 41.12, "learning_rate": 2.2191435488528085e-08, "loss": 3.7518, "step": 3703000 }, { "epoch": 41.13, "learning_rate": 2.2177553694778224e-08, "loss": 3.7561, "step": 3703500 }, { "epoch": 41.13, "learning_rate": 2.2163671901028363e-08, "loss": 3.7603, "step": 3704000 }, { "epoch": 41.14, "learning_rate": 2.2149790107278502e-08, "loss": 3.7537, "step": 3704500 }, { "epoch": 41.15, "learning_rate": 2.213590831352864e-08, "loss": 3.7676, "step": 3705000 }, { "epoch": 41.15, "learning_rate": 2.2122026519778777e-08, "loss": 3.7581, "step": 3705500 }, { "epoch": 41.16, "learning_rate": 2.210814472602892e-08, "loss": 3.734, "step": 3706000 }, { "epoch": 41.16, "learning_rate": 2.2094262932279058e-08, "loss": 3.767, "step": 3706500 }, { "epoch": 41.17, "learning_rate": 2.2080381138529193e-08, "loss": 3.7578, "step": 3707000 }, { "epoch": 41.17, "learning_rate": 2.2066499344779336e-08, "loss": 3.74, "step": 3707500 }, { "epoch": 41.18, "learning_rate": 2.2052617551029475e-08, "loss": 3.7551, "step": 3708000 }, { "epoch": 41.18, "learning_rate": 2.203873575727961e-08, "loss": 3.7496, "step": 3708500 }, { "epoch": 41.19, "learning_rate": 2.202485396352975e-08, "loss": 3.7714, "step": 3709000 }, { "epoch": 41.2, "learning_rate": 2.201097216977989e-08, "loss": 3.7273, "step": 3709500 }, { "epoch": 41.2, "learning_rate": 2.1997090376030027e-08, "loss": 3.7518, "step": 3710000 }, { "epoch": 41.21, "learning_rate": 2.1983208582280166e-08, "loss": 3.7663, "step": 3710500 }, { "epoch": 41.21, "learning_rate": 2.1969326788530308e-08, "loss": 3.7453, "step": 3711000 }, { "epoch": 41.22, "learning_rate": 2.1955444994780444e-08, "loss": 3.7401, "step": 3711500 }, { "epoch": 41.22, "learning_rate": 2.1941563201030583e-08, "loss": 3.7375, "step": 3712000 }, { "epoch": 41.23, "learning_rate": 2.1927681407280722e-08, "loss": 3.7493, "step": 3712500 }, { "epoch": 41.23, "learning_rate": 2.191379961353086e-08, "loss": 3.7453, "step": 3713000 }, { "epoch": 41.24, "learning_rate": 2.1899917819781e-08, "loss": 3.756, "step": 3713500 }, { "epoch": 41.25, "learning_rate": 2.188603602603114e-08, "loss": 3.7488, "step": 3714000 }, { "epoch": 41.25, "learning_rate": 2.1872154232281278e-08, "loss": 3.7675, "step": 3714500 }, { "epoch": 41.26, "learning_rate": 2.1858272438531417e-08, "loss": 3.7388, "step": 3715000 }, { "epoch": 41.26, "learning_rate": 2.1844390644781556e-08, "loss": 3.733, "step": 3715500 }, { "epoch": 41.27, "learning_rate": 2.183050885103169e-08, "loss": 3.7553, "step": 3716000 }, { "epoch": 41.27, "learning_rate": 2.1816627057281834e-08, "loss": 3.7665, "step": 3716500 }, { "epoch": 41.28, "learning_rate": 2.1802745263531972e-08, "loss": 3.7475, "step": 3717000 }, { "epoch": 41.28, "learning_rate": 2.1788863469782108e-08, "loss": 3.7631, "step": 3717500 }, { "epoch": 41.29, "learning_rate": 2.177498167603225e-08, "loss": 3.7534, "step": 3718000 }, { "epoch": 41.3, "learning_rate": 2.176109988228239e-08, "loss": 3.7793, "step": 3718500 }, { "epoch": 41.3, "learning_rate": 2.1747218088532525e-08, "loss": 3.7399, "step": 3719000 }, { "epoch": 41.31, "learning_rate": 2.1733336294782664e-08, "loss": 3.7517, "step": 3719500 }, { "epoch": 41.31, "learning_rate": 2.1719454501032806e-08, "loss": 3.7603, "step": 3720000 }, { "epoch": 41.32, "learning_rate": 2.1705572707282942e-08, "loss": 3.7614, "step": 3720500 }, { "epoch": 41.32, "learning_rate": 2.169169091353308e-08, "loss": 3.7555, "step": 3721000 }, { "epoch": 41.33, "learning_rate": 2.1677809119783223e-08, "loss": 3.7324, "step": 3721500 }, { "epoch": 41.33, "learning_rate": 2.166392732603336e-08, "loss": 3.7434, "step": 3722000 }, { "epoch": 41.34, "learning_rate": 2.1650045532283498e-08, "loss": 3.744, "step": 3722500 }, { "epoch": 41.35, "learning_rate": 2.163616373853364e-08, "loss": 3.7342, "step": 3723000 }, { "epoch": 41.35, "learning_rate": 2.1622281944783776e-08, "loss": 3.7592, "step": 3723500 }, { "epoch": 41.36, "learning_rate": 2.1608400151033915e-08, "loss": 3.7443, "step": 3724000 }, { "epoch": 41.36, "learning_rate": 2.1594518357284054e-08, "loss": 3.7478, "step": 3724500 }, { "epoch": 41.37, "learning_rate": 2.1580636563534192e-08, "loss": 3.7618, "step": 3725000 }, { "epoch": 41.37, "learning_rate": 2.156675476978433e-08, "loss": 3.7597, "step": 3725500 }, { "epoch": 41.38, "learning_rate": 2.155287297603447e-08, "loss": 3.759, "step": 3726000 }, { "epoch": 41.38, "learning_rate": 2.153899118228461e-08, "loss": 3.7281, "step": 3726500 }, { "epoch": 41.39, "learning_rate": 2.1525109388534748e-08, "loss": 3.738, "step": 3727000 }, { "epoch": 41.4, "learning_rate": 2.1511227594784887e-08, "loss": 3.7371, "step": 3727500 }, { "epoch": 41.4, "learning_rate": 2.1497345801035023e-08, "loss": 3.7489, "step": 3728000 }, { "epoch": 41.41, "learning_rate": 2.1483464007285165e-08, "loss": 3.756, "step": 3728500 }, { "epoch": 41.41, "learning_rate": 2.1469582213535304e-08, "loss": 3.7448, "step": 3729000 }, { "epoch": 41.42, "learning_rate": 2.145570041978544e-08, "loss": 3.7548, "step": 3729500 }, { "epoch": 41.42, "learning_rate": 2.1441818626035582e-08, "loss": 3.7433, "step": 3730000 }, { "epoch": 41.43, "learning_rate": 2.142793683228572e-08, "loss": 3.7573, "step": 3730500 }, { "epoch": 41.43, "learning_rate": 2.1414055038535857e-08, "loss": 3.758, "step": 3731000 }, { "epoch": 41.44, "learning_rate": 2.1400173244785996e-08, "loss": 3.7446, "step": 3731500 }, { "epoch": 41.45, "learning_rate": 2.1386291451036138e-08, "loss": 3.7514, "step": 3732000 }, { "epoch": 41.45, "learning_rate": 2.1372409657286273e-08, "loss": 3.7488, "step": 3732500 }, { "epoch": 41.46, "learning_rate": 2.1358527863536412e-08, "loss": 3.7364, "step": 3733000 }, { "epoch": 41.46, "learning_rate": 2.1344646069786555e-08, "loss": 3.7523, "step": 3733500 }, { "epoch": 41.47, "learning_rate": 2.133076427603669e-08, "loss": 3.7517, "step": 3734000 }, { "epoch": 41.47, "learning_rate": 2.131688248228683e-08, "loss": 3.7579, "step": 3734500 }, { "epoch": 41.48, "learning_rate": 2.1303000688536968e-08, "loss": 3.7446, "step": 3735000 }, { "epoch": 41.48, "learning_rate": 2.1289118894787107e-08, "loss": 3.7363, "step": 3735500 }, { "epoch": 41.49, "learning_rate": 2.1275237101037246e-08, "loss": 3.752, "step": 3736000 }, { "epoch": 41.5, "learning_rate": 2.1261355307287385e-08, "loss": 3.7526, "step": 3736500 }, { "epoch": 41.5, "learning_rate": 2.1247473513537524e-08, "loss": 3.7606, "step": 3737000 }, { "epoch": 41.51, "learning_rate": 2.1233591719787663e-08, "loss": 3.7543, "step": 3737500 }, { "epoch": 41.51, "learning_rate": 2.1219709926037802e-08, "loss": 3.7508, "step": 3738000 }, { "epoch": 41.52, "learning_rate": 2.120582813228794e-08, "loss": 3.7646, "step": 3738500 }, { "epoch": 41.52, "learning_rate": 2.119194633853808e-08, "loss": 3.7461, "step": 3739000 }, { "epoch": 41.53, "learning_rate": 2.117806454478822e-08, "loss": 3.7575, "step": 3739500 }, { "epoch": 41.53, "learning_rate": 2.1164182751038358e-08, "loss": 3.7605, "step": 3740000 }, { "epoch": 41.54, "learning_rate": 2.1150300957288497e-08, "loss": 3.7633, "step": 3740500 }, { "epoch": 41.55, "learning_rate": 2.1136419163538636e-08, "loss": 3.7253, "step": 3741000 }, { "epoch": 41.55, "learning_rate": 2.1122537369788775e-08, "loss": 3.7594, "step": 3741500 }, { "epoch": 41.56, "learning_rate": 2.110865557603891e-08, "loss": 3.7596, "step": 3742000 }, { "epoch": 41.56, "learning_rate": 2.1094773782289053e-08, "loss": 3.7436, "step": 3742500 }, { "epoch": 41.57, "learning_rate": 2.108089198853919e-08, "loss": 3.7416, "step": 3743000 }, { "epoch": 41.57, "learning_rate": 2.1067010194789327e-08, "loss": 3.7358, "step": 3743500 }, { "epoch": 41.58, "learning_rate": 2.105312840103947e-08, "loss": 3.7479, "step": 3744000 }, { "epoch": 41.58, "learning_rate": 2.103924660728961e-08, "loss": 3.7513, "step": 3744500 }, { "epoch": 41.59, "learning_rate": 2.1025364813539744e-08, "loss": 3.7635, "step": 3745000 }, { "epoch": 41.6, "learning_rate": 2.1011483019789883e-08, "loss": 3.7684, "step": 3745500 }, { "epoch": 41.6, "learning_rate": 2.0997601226040025e-08, "loss": 3.7582, "step": 3746000 }, { "epoch": 41.61, "learning_rate": 2.098371943229016e-08, "loss": 3.7426, "step": 3746500 }, { "epoch": 41.61, "learning_rate": 2.09698376385403e-08, "loss": 3.752, "step": 3747000 }, { "epoch": 41.62, "learning_rate": 2.0955955844790442e-08, "loss": 3.7571, "step": 3747500 }, { "epoch": 41.62, "learning_rate": 2.0942074051040578e-08, "loss": 3.7346, "step": 3748000 }, { "epoch": 41.63, "learning_rate": 2.0928192257290717e-08, "loss": 3.7188, "step": 3748500 }, { "epoch": 41.63, "learning_rate": 2.0914310463540856e-08, "loss": 3.761, "step": 3749000 }, { "epoch": 41.64, "learning_rate": 2.0900428669790995e-08, "loss": 3.7587, "step": 3749500 }, { "epoch": 41.65, "learning_rate": 2.0886546876041134e-08, "loss": 3.7376, "step": 3750000 }, { "epoch": 41.65, "learning_rate": 2.0872665082291273e-08, "loss": 3.7643, "step": 3750500 }, { "epoch": 41.66, "learning_rate": 2.085878328854141e-08, "loss": 3.7413, "step": 3751000 }, { "epoch": 41.66, "learning_rate": 2.084490149479155e-08, "loss": 3.7477, "step": 3751500 }, { "epoch": 41.67, "learning_rate": 2.083101970104169e-08, "loss": 3.7441, "step": 3752000 }, { "epoch": 41.67, "learning_rate": 2.081713790729183e-08, "loss": 3.7458, "step": 3752500 }, { "epoch": 41.68, "learning_rate": 2.0803256113541967e-08, "loss": 3.748, "step": 3753000 }, { "epoch": 41.68, "learning_rate": 2.0789374319792106e-08, "loss": 3.7536, "step": 3753500 }, { "epoch": 41.69, "learning_rate": 2.0775492526042242e-08, "loss": 3.7538, "step": 3754000 }, { "epoch": 41.7, "learning_rate": 2.0761610732292384e-08, "loss": 3.7593, "step": 3754500 }, { "epoch": 41.7, "learning_rate": 2.0747728938542523e-08, "loss": 3.7641, "step": 3755000 }, { "epoch": 41.71, "learning_rate": 2.073384714479266e-08, "loss": 3.752, "step": 3755500 }, { "epoch": 41.71, "learning_rate": 2.07199653510428e-08, "loss": 3.7499, "step": 3756000 }, { "epoch": 41.72, "learning_rate": 2.070608355729294e-08, "loss": 3.7624, "step": 3756500 }, { "epoch": 41.72, "learning_rate": 2.0692201763543076e-08, "loss": 3.7275, "step": 3757000 }, { "epoch": 41.73, "learning_rate": 2.0678319969793215e-08, "loss": 3.7608, "step": 3757500 }, { "epoch": 41.73, "learning_rate": 2.0664438176043357e-08, "loss": 3.7511, "step": 3758000 }, { "epoch": 41.74, "learning_rate": 2.0650556382293493e-08, "loss": 3.7449, "step": 3758500 }, { "epoch": 41.75, "learning_rate": 2.063667458854363e-08, "loss": 3.7463, "step": 3759000 }, { "epoch": 41.75, "learning_rate": 2.0622792794793774e-08, "loss": 3.768, "step": 3759500 }, { "epoch": 41.76, "learning_rate": 2.060891100104391e-08, "loss": 3.752, "step": 3760000 }, { "epoch": 41.76, "learning_rate": 2.059502920729405e-08, "loss": 3.7528, "step": 3760500 }, { "epoch": 41.77, "learning_rate": 2.0581147413544187e-08, "loss": 3.7624, "step": 3761000 }, { "epoch": 41.77, "learning_rate": 2.0567265619794326e-08, "loss": 3.7313, "step": 3761500 }, { "epoch": 41.78, "learning_rate": 2.0553383826044465e-08, "loss": 3.7451, "step": 3762000 }, { "epoch": 41.78, "learning_rate": 2.0539502032294604e-08, "loss": 3.7545, "step": 3762500 }, { "epoch": 41.79, "learning_rate": 2.0525620238544743e-08, "loss": 3.7478, "step": 3763000 }, { "epoch": 41.8, "learning_rate": 2.0511738444794882e-08, "loss": 3.7376, "step": 3763500 }, { "epoch": 41.8, "learning_rate": 2.049785665104502e-08, "loss": 3.7487, "step": 3764000 }, { "epoch": 41.81, "learning_rate": 2.0483974857295157e-08, "loss": 3.7407, "step": 3764500 }, { "epoch": 41.81, "learning_rate": 2.04700930635453e-08, "loss": 3.758, "step": 3765000 }, { "epoch": 41.82, "learning_rate": 2.0456211269795438e-08, "loss": 3.7576, "step": 3765500 }, { "epoch": 41.82, "learning_rate": 2.0442329476045574e-08, "loss": 3.7412, "step": 3766000 }, { "epoch": 41.83, "learning_rate": 2.0428447682295716e-08, "loss": 3.7597, "step": 3766500 }, { "epoch": 41.83, "learning_rate": 2.0414565888545855e-08, "loss": 3.7439, "step": 3767000 }, { "epoch": 41.84, "learning_rate": 2.040068409479599e-08, "loss": 3.7394, "step": 3767500 }, { "epoch": 41.85, "learning_rate": 2.038680230104613e-08, "loss": 3.7137, "step": 3768000 }, { "epoch": 41.85, "learning_rate": 2.037292050729627e-08, "loss": 3.7631, "step": 3768500 }, { "epoch": 41.86, "learning_rate": 2.0359038713546407e-08, "loss": 3.7368, "step": 3769000 }, { "epoch": 41.86, "learning_rate": 2.0345156919796546e-08, "loss": 3.7518, "step": 3769500 }, { "epoch": 41.87, "learning_rate": 2.033127512604669e-08, "loss": 3.7503, "step": 3770000 }, { "epoch": 41.87, "learning_rate": 2.0317393332296824e-08, "loss": 3.7532, "step": 3770500 }, { "epoch": 41.88, "learning_rate": 2.0303511538546963e-08, "loss": 3.7434, "step": 3771000 }, { "epoch": 41.88, "learning_rate": 2.0289629744797102e-08, "loss": 3.7422, "step": 3771500 }, { "epoch": 41.89, "learning_rate": 2.027574795104724e-08, "loss": 3.7478, "step": 3772000 }, { "epoch": 41.9, "learning_rate": 2.026186615729738e-08, "loss": 3.7551, "step": 3772500 }, { "epoch": 41.9, "learning_rate": 2.024798436354752e-08, "loss": 3.7531, "step": 3773000 }, { "epoch": 41.91, "learning_rate": 2.0234102569797658e-08, "loss": 3.7409, "step": 3773500 }, { "epoch": 41.91, "learning_rate": 2.0220220776047797e-08, "loss": 3.757, "step": 3774000 }, { "epoch": 41.92, "learning_rate": 2.0206338982297936e-08, "loss": 3.7367, "step": 3774500 }, { "epoch": 41.92, "learning_rate": 2.0192457188548075e-08, "loss": 3.7459, "step": 3775000 }, { "epoch": 41.93, "learning_rate": 2.0178575394798214e-08, "loss": 3.7514, "step": 3775500 }, { "epoch": 41.93, "learning_rate": 2.0164693601048353e-08, "loss": 3.7525, "step": 3776000 }, { "epoch": 41.94, "learning_rate": 2.015081180729849e-08, "loss": 3.7296, "step": 3776500 }, { "epoch": 41.95, "learning_rate": 2.013693001354863e-08, "loss": 3.7335, "step": 3777000 }, { "epoch": 41.95, "learning_rate": 2.012304821979877e-08, "loss": 3.7467, "step": 3777500 }, { "epoch": 41.96, "learning_rate": 2.010916642604891e-08, "loss": 3.7427, "step": 3778000 }, { "epoch": 41.96, "learning_rate": 2.0095284632299044e-08, "loss": 3.7496, "step": 3778500 }, { "epoch": 41.97, "learning_rate": 2.0081402838549186e-08, "loss": 3.7426, "step": 3779000 }, { "epoch": 41.97, "learning_rate": 2.0067521044799325e-08, "loss": 3.7636, "step": 3779500 }, { "epoch": 41.98, "learning_rate": 2.005363925104946e-08, "loss": 3.7458, "step": 3780000 }, { "epoch": 41.98, "learning_rate": 2.0039757457299603e-08, "loss": 3.7414, "step": 3780500 }, { "epoch": 41.99, "learning_rate": 2.002587566354974e-08, "loss": 3.7337, "step": 3781000 }, { "epoch": 42.0, "learning_rate": 2.0011993869799878e-08, "loss": 3.7582, "step": 3781500 }, { "epoch": 42.0, "eval_loss": 3.825657844543457, "eval_runtime": 6.298, "eval_samples_per_second": 246.746, "step": 3781932 }, { "epoch": 42.0, "learning_rate": 1.9998112076050017e-08, "loss": 3.7414, "step": 3782000 }, { "epoch": 42.01, "learning_rate": 1.9984230282300156e-08, "loss": 3.757, "step": 3782500 }, { "epoch": 42.01, "learning_rate": 1.9970348488550295e-08, "loss": 3.7462, "step": 3783000 }, { "epoch": 42.02, "learning_rate": 1.9956466694800434e-08, "loss": 3.7467, "step": 3783500 }, { "epoch": 42.02, "learning_rate": 1.9942584901050573e-08, "loss": 3.7842, "step": 3784000 }, { "epoch": 42.03, "learning_rate": 1.992870310730071e-08, "loss": 3.7375, "step": 3784500 }, { "epoch": 42.03, "learning_rate": 1.991482131355085e-08, "loss": 3.7367, "step": 3785000 }, { "epoch": 42.04, "learning_rate": 1.990093951980099e-08, "loss": 3.7641, "step": 3785500 }, { "epoch": 42.05, "learning_rate": 1.988705772605113e-08, "loss": 3.7621, "step": 3786000 }, { "epoch": 42.05, "learning_rate": 1.9873175932301267e-08, "loss": 3.7412, "step": 3786500 }, { "epoch": 42.06, "learning_rate": 1.9859294138551406e-08, "loss": 3.7382, "step": 3787000 }, { "epoch": 42.06, "learning_rate": 1.9845412344801545e-08, "loss": 3.7589, "step": 3787500 }, { "epoch": 42.07, "learning_rate": 1.9831530551051684e-08, "loss": 3.7544, "step": 3788000 }, { "epoch": 42.07, "learning_rate": 1.9817648757301823e-08, "loss": 3.7463, "step": 3788500 }, { "epoch": 42.08, "learning_rate": 1.9803766963551962e-08, "loss": 3.7774, "step": 3789000 }, { "epoch": 42.08, "learning_rate": 1.97898851698021e-08, "loss": 3.7423, "step": 3789500 }, { "epoch": 42.09, "learning_rate": 1.977600337605224e-08, "loss": 3.7436, "step": 3790000 }, { "epoch": 42.1, "learning_rate": 1.9762121582302376e-08, "loss": 3.7413, "step": 3790500 }, { "epoch": 42.1, "learning_rate": 1.9748239788552518e-08, "loss": 3.7564, "step": 3791000 }, { "epoch": 42.11, "learning_rate": 1.9734357994802657e-08, "loss": 3.7535, "step": 3791500 }, { "epoch": 42.11, "learning_rate": 1.9720476201052793e-08, "loss": 3.737, "step": 3792000 }, { "epoch": 42.12, "learning_rate": 1.9706594407302935e-08, "loss": 3.7388, "step": 3792500 }, { "epoch": 42.12, "learning_rate": 1.9692712613553074e-08, "loss": 3.7215, "step": 3793000 }, { "epoch": 42.13, "learning_rate": 1.967883081980321e-08, "loss": 3.7415, "step": 3793500 }, { "epoch": 42.13, "learning_rate": 1.966494902605335e-08, "loss": 3.7418, "step": 3794000 }, { "epoch": 42.14, "learning_rate": 1.965106723230349e-08, "loss": 3.7559, "step": 3794500 }, { "epoch": 42.15, "learning_rate": 1.9637185438553626e-08, "loss": 3.7462, "step": 3795000 }, { "epoch": 42.15, "learning_rate": 1.9623303644803765e-08, "loss": 3.754, "step": 3795500 }, { "epoch": 42.16, "learning_rate": 1.9609421851053908e-08, "loss": 3.7488, "step": 3796000 }, { "epoch": 42.16, "learning_rate": 1.9595540057304043e-08, "loss": 3.7534, "step": 3796500 }, { "epoch": 42.17, "learning_rate": 1.9581658263554182e-08, "loss": 3.7392, "step": 3797000 }, { "epoch": 42.17, "learning_rate": 1.956777646980432e-08, "loss": 3.744, "step": 3797500 }, { "epoch": 42.18, "learning_rate": 1.955389467605446e-08, "loss": 3.7621, "step": 3798000 }, { "epoch": 42.18, "learning_rate": 1.95400128823046e-08, "loss": 3.7517, "step": 3798500 }, { "epoch": 42.19, "learning_rate": 1.9526131088554738e-08, "loss": 3.7521, "step": 3799000 }, { "epoch": 42.2, "learning_rate": 1.9512249294804877e-08, "loss": 3.7508, "step": 3799500 }, { "epoch": 42.2, "learning_rate": 1.9498367501055016e-08, "loss": 3.7584, "step": 3800000 }, { "epoch": 42.21, "learning_rate": 1.9484485707305155e-08, "loss": 3.7598, "step": 3800500 }, { "epoch": 42.21, "learning_rate": 1.947060391355529e-08, "loss": 3.7573, "step": 3801000 }, { "epoch": 42.22, "learning_rate": 1.9456722119805433e-08, "loss": 3.7455, "step": 3801500 }, { "epoch": 42.22, "learning_rate": 1.9442840326055572e-08, "loss": 3.7331, "step": 3802000 }, { "epoch": 42.23, "learning_rate": 1.9428958532305707e-08, "loss": 3.7581, "step": 3802500 }, { "epoch": 42.23, "learning_rate": 1.941507673855585e-08, "loss": 3.7487, "step": 3803000 }, { "epoch": 42.24, "learning_rate": 1.940119494480599e-08, "loss": 3.7387, "step": 3803500 }, { "epoch": 42.25, "learning_rate": 1.9387313151056124e-08, "loss": 3.7291, "step": 3804000 }, { "epoch": 42.25, "learning_rate": 1.9373431357306263e-08, "loss": 3.7514, "step": 3804500 }, { "epoch": 42.26, "learning_rate": 1.9359549563556405e-08, "loss": 3.7562, "step": 3805000 }, { "epoch": 42.26, "learning_rate": 1.934566776980654e-08, "loss": 3.752, "step": 3805500 }, { "epoch": 42.27, "learning_rate": 1.933178597605668e-08, "loss": 3.7583, "step": 3806000 }, { "epoch": 42.27, "learning_rate": 1.9317904182306822e-08, "loss": 3.7646, "step": 3806500 }, { "epoch": 42.28, "learning_rate": 1.9304022388556958e-08, "loss": 3.7467, "step": 3807000 }, { "epoch": 42.28, "learning_rate": 1.9290140594807097e-08, "loss": 3.7651, "step": 3807500 }, { "epoch": 42.29, "learning_rate": 1.9276258801057236e-08, "loss": 3.7563, "step": 3808000 }, { "epoch": 42.3, "learning_rate": 1.9262377007307375e-08, "loss": 3.7503, "step": 3808500 }, { "epoch": 42.3, "learning_rate": 1.9248495213557514e-08, "loss": 3.7393, "step": 3809000 }, { "epoch": 42.31, "learning_rate": 1.9234613419807653e-08, "loss": 3.74, "step": 3809500 }, { "epoch": 42.31, "learning_rate": 1.9220731626057792e-08, "loss": 3.73, "step": 3810000 }, { "epoch": 42.32, "learning_rate": 1.920684983230793e-08, "loss": 3.7407, "step": 3810500 }, { "epoch": 42.32, "learning_rate": 1.919296803855807e-08, "loss": 3.719, "step": 3811000 }, { "epoch": 42.33, "learning_rate": 1.9179086244808205e-08, "loss": 3.751, "step": 3811500 }, { "epoch": 42.33, "learning_rate": 1.9165204451058348e-08, "loss": 3.7445, "step": 3812000 }, { "epoch": 42.34, "learning_rate": 1.9151322657308486e-08, "loss": 3.7381, "step": 3812500 }, { "epoch": 42.35, "learning_rate": 1.9137440863558622e-08, "loss": 3.7323, "step": 3813000 }, { "epoch": 42.35, "learning_rate": 1.9123559069808764e-08, "loss": 3.7512, "step": 3813500 }, { "epoch": 42.36, "learning_rate": 1.9109677276058903e-08, "loss": 3.7411, "step": 3814000 }, { "epoch": 42.36, "learning_rate": 1.909579548230904e-08, "loss": 3.7421, "step": 3814500 }, { "epoch": 42.37, "learning_rate": 1.908191368855918e-08, "loss": 3.7571, "step": 3815000 }, { "epoch": 42.37, "learning_rate": 1.906803189480932e-08, "loss": 3.7286, "step": 3815500 }, { "epoch": 42.38, "learning_rate": 1.9054150101059456e-08, "loss": 3.7598, "step": 3816000 }, { "epoch": 42.38, "learning_rate": 1.9040268307309595e-08, "loss": 3.7496, "step": 3816500 }, { "epoch": 42.39, "learning_rate": 1.9026386513559737e-08, "loss": 3.7444, "step": 3817000 }, { "epoch": 42.39, "learning_rate": 1.9012504719809873e-08, "loss": 3.7566, "step": 3817500 }, { "epoch": 42.4, "learning_rate": 1.8998622926060012e-08, "loss": 3.7505, "step": 3818000 }, { "epoch": 42.41, "learning_rate": 1.8984741132310154e-08, "loss": 3.7615, "step": 3818500 }, { "epoch": 42.41, "learning_rate": 1.897085933856029e-08, "loss": 3.7447, "step": 3819000 }, { "epoch": 42.42, "learning_rate": 1.895697754481043e-08, "loss": 3.7308, "step": 3819500 }, { "epoch": 42.42, "learning_rate": 1.8943095751060568e-08, "loss": 3.7757, "step": 3820000 }, { "epoch": 42.43, "learning_rate": 1.8929213957310706e-08, "loss": 3.757, "step": 3820500 }, { "epoch": 42.43, "learning_rate": 1.8915332163560845e-08, "loss": 3.7345, "step": 3821000 }, { "epoch": 42.44, "learning_rate": 1.8901450369810984e-08, "loss": 3.7321, "step": 3821500 }, { "epoch": 42.44, "learning_rate": 1.8887568576061123e-08, "loss": 3.7498, "step": 3822000 }, { "epoch": 42.45, "learning_rate": 1.8873686782311262e-08, "loss": 3.7938, "step": 3822500 }, { "epoch": 42.46, "learning_rate": 1.88598049885614e-08, "loss": 3.7506, "step": 3823000 }, { "epoch": 42.46, "learning_rate": 1.884592319481154e-08, "loss": 3.7895, "step": 3823500 }, { "epoch": 42.47, "learning_rate": 1.883204140106168e-08, "loss": 3.7406, "step": 3824000 }, { "epoch": 42.47, "learning_rate": 1.8818159607311818e-08, "loss": 3.7373, "step": 3824500 }, { "epoch": 42.48, "learning_rate": 1.8804277813561957e-08, "loss": 3.736, "step": 3825000 }, { "epoch": 42.48, "learning_rate": 1.8790396019812096e-08, "loss": 3.758, "step": 3825500 }, { "epoch": 42.49, "learning_rate": 1.8776514226062235e-08, "loss": 3.7689, "step": 3826000 }, { "epoch": 42.49, "learning_rate": 1.8762632432312374e-08, "loss": 3.7511, "step": 3826500 }, { "epoch": 42.5, "learning_rate": 1.874875063856251e-08, "loss": 3.7557, "step": 3827000 }, { "epoch": 42.51, "learning_rate": 1.8734868844812652e-08, "loss": 3.7505, "step": 3827500 }, { "epoch": 42.51, "learning_rate": 1.872098705106279e-08, "loss": 3.7355, "step": 3828000 }, { "epoch": 42.52, "learning_rate": 1.8707105257312926e-08, "loss": 3.7454, "step": 3828500 }, { "epoch": 42.52, "learning_rate": 1.869322346356307e-08, "loss": 3.7542, "step": 3829000 }, { "epoch": 42.53, "learning_rate": 1.8679341669813208e-08, "loss": 3.7695, "step": 3829500 }, { "epoch": 42.53, "learning_rate": 1.8665459876063343e-08, "loss": 3.7371, "step": 3830000 }, { "epoch": 42.54, "learning_rate": 1.8651578082313482e-08, "loss": 3.725, "step": 3830500 }, { "epoch": 42.54, "learning_rate": 1.8637696288563625e-08, "loss": 3.7504, "step": 3831000 }, { "epoch": 42.55, "learning_rate": 1.862381449481376e-08, "loss": 3.7388, "step": 3831500 }, { "epoch": 42.56, "learning_rate": 1.86099327010639e-08, "loss": 3.7387, "step": 3832000 }, { "epoch": 42.56, "learning_rate": 1.859605090731404e-08, "loss": 3.7515, "step": 3832500 }, { "epoch": 42.57, "learning_rate": 1.8582169113564177e-08, "loss": 3.7474, "step": 3833000 }, { "epoch": 42.57, "learning_rate": 1.8568287319814316e-08, "loss": 3.7727, "step": 3833500 }, { "epoch": 42.58, "learning_rate": 1.8554405526064455e-08, "loss": 3.7489, "step": 3834000 }, { "epoch": 42.58, "learning_rate": 1.8540523732314594e-08, "loss": 3.7557, "step": 3834500 }, { "epoch": 42.59, "learning_rate": 1.8526641938564733e-08, "loss": 3.7484, "step": 3835000 }, { "epoch": 42.59, "learning_rate": 1.8512760144814872e-08, "loss": 3.7547, "step": 3835500 }, { "epoch": 42.6, "learning_rate": 1.849887835106501e-08, "loss": 3.7546, "step": 3836000 }, { "epoch": 42.61, "learning_rate": 1.848499655731515e-08, "loss": 3.7361, "step": 3836500 }, { "epoch": 42.61, "learning_rate": 1.847111476356529e-08, "loss": 3.7453, "step": 3837000 }, { "epoch": 42.62, "learning_rate": 1.8457232969815424e-08, "loss": 3.7468, "step": 3837500 }, { "epoch": 42.62, "learning_rate": 1.8443351176065567e-08, "loss": 3.7656, "step": 3838000 }, { "epoch": 42.63, "learning_rate": 1.8429469382315706e-08, "loss": 3.7545, "step": 3838500 }, { "epoch": 42.63, "learning_rate": 1.841558758856584e-08, "loss": 3.7415, "step": 3839000 }, { "epoch": 42.64, "learning_rate": 1.8401705794815983e-08, "loss": 3.7599, "step": 3839500 }, { "epoch": 42.64, "learning_rate": 1.8387824001066122e-08, "loss": 3.754, "step": 3840000 }, { "epoch": 42.65, "learning_rate": 1.8373942207316258e-08, "loss": 3.7493, "step": 3840500 }, { "epoch": 42.66, "learning_rate": 1.8360060413566397e-08, "loss": 3.7377, "step": 3841000 }, { "epoch": 42.66, "learning_rate": 1.834617861981654e-08, "loss": 3.7589, "step": 3841500 }, { "epoch": 42.67, "learning_rate": 1.8332296826066675e-08, "loss": 3.7618, "step": 3842000 }, { "epoch": 42.67, "learning_rate": 1.8318415032316814e-08, "loss": 3.7473, "step": 3842500 }, { "epoch": 42.68, "learning_rate": 1.8304533238566956e-08, "loss": 3.7514, "step": 3843000 }, { "epoch": 42.68, "learning_rate": 1.8290651444817092e-08, "loss": 3.7404, "step": 3843500 }, { "epoch": 42.69, "learning_rate": 1.827676965106723e-08, "loss": 3.743, "step": 3844000 }, { "epoch": 42.69, "learning_rate": 1.826288785731737e-08, "loss": 3.7384, "step": 3844500 }, { "epoch": 42.7, "learning_rate": 1.824900606356751e-08, "loss": 3.7588, "step": 3845000 }, { "epoch": 42.71, "learning_rate": 1.8235124269817648e-08, "loss": 3.7412, "step": 3845500 }, { "epoch": 42.71, "learning_rate": 1.8221242476067787e-08, "loss": 3.7569, "step": 3846000 }, { "epoch": 42.72, "learning_rate": 1.8207360682317926e-08, "loss": 3.743, "step": 3846500 }, { "epoch": 42.72, "learning_rate": 1.8193478888568064e-08, "loss": 3.7522, "step": 3847000 }, { "epoch": 42.73, "learning_rate": 1.8179597094818203e-08, "loss": 3.7464, "step": 3847500 }, { "epoch": 42.73, "learning_rate": 1.8165715301068342e-08, "loss": 3.7456, "step": 3848000 }, { "epoch": 42.74, "learning_rate": 1.815183350731848e-08, "loss": 3.7551, "step": 3848500 }, { "epoch": 42.74, "learning_rate": 1.813795171356862e-08, "loss": 3.7489, "step": 3849000 }, { "epoch": 42.75, "learning_rate": 1.8124069919818756e-08, "loss": 3.7553, "step": 3849500 }, { "epoch": 42.76, "learning_rate": 1.8110188126068898e-08, "loss": 3.7448, "step": 3850000 }, { "epoch": 42.76, "learning_rate": 1.8096306332319037e-08, "loss": 3.7474, "step": 3850500 }, { "epoch": 42.77, "learning_rate": 1.8082424538569173e-08, "loss": 3.759, "step": 3851000 }, { "epoch": 42.77, "learning_rate": 1.8068542744819315e-08, "loss": 3.723, "step": 3851500 }, { "epoch": 42.78, "learning_rate": 1.8054660951069454e-08, "loss": 3.7386, "step": 3852000 }, { "epoch": 42.78, "learning_rate": 1.804077915731959e-08, "loss": 3.7521, "step": 3852500 }, { "epoch": 42.79, "learning_rate": 1.802689736356973e-08, "loss": 3.7472, "step": 3853000 }, { "epoch": 42.79, "learning_rate": 1.801301556981987e-08, "loss": 3.7389, "step": 3853500 }, { "epoch": 42.8, "learning_rate": 1.7999133776070007e-08, "loss": 3.736, "step": 3854000 }, { "epoch": 42.81, "learning_rate": 1.7985251982320145e-08, "loss": 3.7141, "step": 3854500 }, { "epoch": 42.81, "learning_rate": 1.7971370188570288e-08, "loss": 3.7617, "step": 3855000 }, { "epoch": 42.82, "learning_rate": 1.7957488394820423e-08, "loss": 3.7578, "step": 3855500 }, { "epoch": 42.82, "learning_rate": 1.7943606601070562e-08, "loss": 3.7573, "step": 3856000 }, { "epoch": 42.83, "learning_rate": 1.79297248073207e-08, "loss": 3.7686, "step": 3856500 }, { "epoch": 42.83, "learning_rate": 1.791584301357084e-08, "loss": 3.7626, "step": 3857000 }, { "epoch": 42.84, "learning_rate": 1.790196121982098e-08, "loss": 3.7464, "step": 3857500 }, { "epoch": 42.84, "learning_rate": 1.7888079426071118e-08, "loss": 3.7486, "step": 3858000 }, { "epoch": 42.85, "learning_rate": 1.7874197632321257e-08, "loss": 3.7533, "step": 3858500 }, { "epoch": 42.86, "learning_rate": 1.7860315838571396e-08, "loss": 3.7628, "step": 3859000 }, { "epoch": 42.86, "learning_rate": 1.7846434044821535e-08, "loss": 3.7502, "step": 3859500 }, { "epoch": 42.87, "learning_rate": 1.7832552251071674e-08, "loss": 3.7589, "step": 3860000 }, { "epoch": 42.87, "learning_rate": 1.7818670457321813e-08, "loss": 3.7314, "step": 3860500 }, { "epoch": 42.88, "learning_rate": 1.7804788663571952e-08, "loss": 3.7613, "step": 3861000 }, { "epoch": 42.88, "learning_rate": 1.779090686982209e-08, "loss": 3.7562, "step": 3861500 }, { "epoch": 42.89, "learning_rate": 1.777702507607223e-08, "loss": 3.7591, "step": 3862000 }, { "epoch": 42.89, "learning_rate": 1.776314328232237e-08, "loss": 3.7408, "step": 3862500 }, { "epoch": 42.9, "learning_rate": 1.7749261488572508e-08, "loss": 3.7382, "step": 3863000 }, { "epoch": 42.91, "learning_rate": 1.7735379694822643e-08, "loss": 3.7479, "step": 3863500 }, { "epoch": 42.91, "learning_rate": 1.7721497901072786e-08, "loss": 3.7333, "step": 3864000 }, { "epoch": 42.92, "learning_rate": 1.7707616107322925e-08, "loss": 3.7654, "step": 3864500 }, { "epoch": 42.92, "learning_rate": 1.769373431357306e-08, "loss": 3.75, "step": 3865000 }, { "epoch": 42.93, "learning_rate": 1.7679852519823203e-08, "loss": 3.7363, "step": 3865500 }, { "epoch": 42.93, "learning_rate": 1.766597072607334e-08, "loss": 3.7382, "step": 3866000 }, { "epoch": 42.94, "learning_rate": 1.7652088932323477e-08, "loss": 3.743, "step": 3866500 }, { "epoch": 42.94, "learning_rate": 1.7638207138573616e-08, "loss": 3.7381, "step": 3867000 }, { "epoch": 42.95, "learning_rate": 1.7624325344823755e-08, "loss": 3.7507, "step": 3867500 }, { "epoch": 42.96, "learning_rate": 1.7610443551073894e-08, "loss": 3.7458, "step": 3868000 }, { "epoch": 42.96, "learning_rate": 1.7596561757324033e-08, "loss": 3.7623, "step": 3868500 }, { "epoch": 42.97, "learning_rate": 1.7582679963574172e-08, "loss": 3.7407, "step": 3869000 }, { "epoch": 42.97, "learning_rate": 1.756879816982431e-08, "loss": 3.7582, "step": 3869500 }, { "epoch": 42.98, "learning_rate": 1.755491637607445e-08, "loss": 3.7495, "step": 3870000 }, { "epoch": 42.98, "learning_rate": 1.754103458232459e-08, "loss": 3.7433, "step": 3870500 }, { "epoch": 42.99, "learning_rate": 1.7527152788574728e-08, "loss": 3.751, "step": 3871000 }, { "epoch": 42.99, "learning_rate": 1.7513270994824867e-08, "loss": 3.7533, "step": 3871500 }, { "epoch": 43.0, "eval_loss": 3.8255672454833984, "eval_runtime": 6.3042, "eval_samples_per_second": 246.502, "step": 3871978 }, { "epoch": 43.0, "learning_rate": 1.7499389201075006e-08, "loss": 3.7528, "step": 3872000 }, { "epoch": 43.01, "learning_rate": 1.7485507407325145e-08, "loss": 3.7406, "step": 3872500 }, { "epoch": 43.01, "learning_rate": 1.7471625613575284e-08, "loss": 3.7506, "step": 3873000 }, { "epoch": 43.02, "learning_rate": 1.7457743819825422e-08, "loss": 3.7445, "step": 3873500 }, { "epoch": 43.02, "learning_rate": 1.7443862026075558e-08, "loss": 3.7486, "step": 3874000 }, { "epoch": 43.03, "learning_rate": 1.74299802323257e-08, "loss": 3.7601, "step": 3874500 }, { "epoch": 43.03, "learning_rate": 1.741609843857584e-08, "loss": 3.7414, "step": 3875000 }, { "epoch": 43.04, "learning_rate": 1.7402216644825975e-08, "loss": 3.7485, "step": 3875500 }, { "epoch": 43.04, "learning_rate": 1.7388334851076117e-08, "loss": 3.7608, "step": 3876000 }, { "epoch": 43.05, "learning_rate": 1.7374453057326256e-08, "loss": 3.7693, "step": 3876500 }, { "epoch": 43.06, "learning_rate": 1.7360571263576392e-08, "loss": 3.7726, "step": 3877000 }, { "epoch": 43.06, "learning_rate": 1.7346689469826534e-08, "loss": 3.7592, "step": 3877500 }, { "epoch": 43.07, "learning_rate": 1.7332807676076673e-08, "loss": 3.7737, "step": 3878000 }, { "epoch": 43.07, "learning_rate": 1.731892588232681e-08, "loss": 3.7501, "step": 3878500 }, { "epoch": 43.08, "learning_rate": 1.7305044088576948e-08, "loss": 3.7322, "step": 3879000 }, { "epoch": 43.08, "learning_rate": 1.729116229482709e-08, "loss": 3.7318, "step": 3879500 }, { "epoch": 43.09, "learning_rate": 1.7277280501077226e-08, "loss": 3.7381, "step": 3880000 }, { "epoch": 43.09, "learning_rate": 1.7263398707327365e-08, "loss": 3.7397, "step": 3880500 }, { "epoch": 43.1, "learning_rate": 1.7249516913577507e-08, "loss": 3.7475, "step": 3881000 }, { "epoch": 43.11, "learning_rate": 1.7235635119827642e-08, "loss": 3.7414, "step": 3881500 }, { "epoch": 43.11, "learning_rate": 1.722175332607778e-08, "loss": 3.7565, "step": 3882000 }, { "epoch": 43.12, "learning_rate": 1.720787153232792e-08, "loss": 3.7487, "step": 3882500 }, { "epoch": 43.12, "learning_rate": 1.719398973857806e-08, "loss": 3.7308, "step": 3883000 }, { "epoch": 43.13, "learning_rate": 1.7180107944828198e-08, "loss": 3.7622, "step": 3883500 }, { "epoch": 43.13, "learning_rate": 1.7166226151078337e-08, "loss": 3.7475, "step": 3884000 }, { "epoch": 43.14, "learning_rate": 1.7152344357328476e-08, "loss": 3.7635, "step": 3884500 }, { "epoch": 43.14, "learning_rate": 1.7138462563578615e-08, "loss": 3.7495, "step": 3885000 }, { "epoch": 43.15, "learning_rate": 1.7124580769828754e-08, "loss": 3.7445, "step": 3885500 }, { "epoch": 43.16, "learning_rate": 1.711069897607889e-08, "loss": 3.7559, "step": 3886000 }, { "epoch": 43.16, "learning_rate": 1.7096817182329032e-08, "loss": 3.7689, "step": 3886500 }, { "epoch": 43.17, "learning_rate": 1.708293538857917e-08, "loss": 3.7366, "step": 3887000 }, { "epoch": 43.17, "learning_rate": 1.7069053594829307e-08, "loss": 3.7438, "step": 3887500 }, { "epoch": 43.18, "learning_rate": 1.705517180107945e-08, "loss": 3.7305, "step": 3888000 }, { "epoch": 43.18, "learning_rate": 1.7041290007329588e-08, "loss": 3.7471, "step": 3888500 }, { "epoch": 43.19, "learning_rate": 1.7027408213579723e-08, "loss": 3.7479, "step": 3889000 }, { "epoch": 43.19, "learning_rate": 1.7013526419829862e-08, "loss": 3.7439, "step": 3889500 }, { "epoch": 43.2, "learning_rate": 1.6999644626080005e-08, "loss": 3.7548, "step": 3890000 }, { "epoch": 43.21, "learning_rate": 1.698576283233014e-08, "loss": 3.7437, "step": 3890500 }, { "epoch": 43.21, "learning_rate": 1.697188103858028e-08, "loss": 3.7404, "step": 3891000 }, { "epoch": 43.22, "learning_rate": 1.695799924483042e-08, "loss": 3.7559, "step": 3891500 }, { "epoch": 43.22, "learning_rate": 1.6944117451080557e-08, "loss": 3.7526, "step": 3892000 }, { "epoch": 43.23, "learning_rate": 1.6930235657330696e-08, "loss": 3.7628, "step": 3892500 }, { "epoch": 43.23, "learning_rate": 1.6916353863580835e-08, "loss": 3.755, "step": 3893000 }, { "epoch": 43.24, "learning_rate": 1.6902472069830974e-08, "loss": 3.7483, "step": 3893500 }, { "epoch": 43.24, "learning_rate": 1.6888590276081113e-08, "loss": 3.732, "step": 3894000 }, { "epoch": 43.25, "learning_rate": 1.6874708482331252e-08, "loss": 3.7459, "step": 3894500 }, { "epoch": 43.26, "learning_rate": 1.686082668858139e-08, "loss": 3.7528, "step": 3895000 }, { "epoch": 43.26, "learning_rate": 1.684694489483153e-08, "loss": 3.7382, "step": 3895500 }, { "epoch": 43.27, "learning_rate": 1.683306310108167e-08, "loss": 3.7536, "step": 3896000 }, { "epoch": 43.27, "learning_rate": 1.6819181307331808e-08, "loss": 3.7462, "step": 3896500 }, { "epoch": 43.28, "learning_rate": 1.6805299513581947e-08, "loss": 3.7549, "step": 3897000 }, { "epoch": 43.28, "learning_rate": 1.6791417719832086e-08, "loss": 3.7586, "step": 3897500 }, { "epoch": 43.29, "learning_rate": 1.677753592608222e-08, "loss": 3.7454, "step": 3898000 }, { "epoch": 43.29, "learning_rate": 1.6763654132332364e-08, "loss": 3.7264, "step": 3898500 }, { "epoch": 43.3, "learning_rate": 1.6749772338582503e-08, "loss": 3.7493, "step": 3899000 }, { "epoch": 43.31, "learning_rate": 1.6735890544832638e-08, "loss": 3.7701, "step": 3899500 }, { "epoch": 43.31, "learning_rate": 1.6722008751082777e-08, "loss": 3.7509, "step": 3900000 }, { "epoch": 43.32, "learning_rate": 1.670812695733292e-08, "loss": 3.7465, "step": 3900500 }, { "epoch": 43.32, "learning_rate": 1.6694245163583055e-08, "loss": 3.7447, "step": 3901000 }, { "epoch": 43.33, "learning_rate": 1.6680363369833194e-08, "loss": 3.7437, "step": 3901500 }, { "epoch": 43.33, "learning_rate": 1.6666481576083336e-08, "loss": 3.7702, "step": 3902000 }, { "epoch": 43.34, "learning_rate": 1.6652599782333472e-08, "loss": 3.7513, "step": 3902500 }, { "epoch": 43.34, "learning_rate": 1.663871798858361e-08, "loss": 3.7491, "step": 3903000 }, { "epoch": 43.35, "learning_rate": 1.662483619483375e-08, "loss": 3.7572, "step": 3903500 }, { "epoch": 43.36, "learning_rate": 1.661095440108389e-08, "loss": 3.7428, "step": 3904000 }, { "epoch": 43.36, "learning_rate": 1.6597072607334028e-08, "loss": 3.735, "step": 3904500 }, { "epoch": 43.37, "learning_rate": 1.6583190813584167e-08, "loss": 3.7267, "step": 3905000 }, { "epoch": 43.37, "learning_rate": 1.6569309019834306e-08, "loss": 3.7736, "step": 3905500 }, { "epoch": 43.38, "learning_rate": 1.6555427226084445e-08, "loss": 3.749, "step": 3906000 }, { "epoch": 43.38, "learning_rate": 1.6541545432334584e-08, "loss": 3.7508, "step": 3906500 }, { "epoch": 43.39, "learning_rate": 1.6527663638584723e-08, "loss": 3.7404, "step": 3907000 }, { "epoch": 43.39, "learning_rate": 1.651378184483486e-08, "loss": 3.7419, "step": 3907500 }, { "epoch": 43.4, "learning_rate": 1.6499900051085e-08, "loss": 3.7409, "step": 3908000 }, { "epoch": 43.41, "learning_rate": 1.648601825733514e-08, "loss": 3.7618, "step": 3908500 }, { "epoch": 43.41, "learning_rate": 1.647213646358528e-08, "loss": 3.7564, "step": 3909000 }, { "epoch": 43.42, "learning_rate": 1.6458254669835417e-08, "loss": 3.755, "step": 3909500 }, { "epoch": 43.42, "learning_rate": 1.6444372876085556e-08, "loss": 3.744, "step": 3910000 }, { "epoch": 43.43, "learning_rate": 1.6430491082335695e-08, "loss": 3.7483, "step": 3910500 }, { "epoch": 43.43, "learning_rate": 1.6416609288585834e-08, "loss": 3.7437, "step": 3911000 }, { "epoch": 43.44, "learning_rate": 1.6402727494835973e-08, "loss": 3.7425, "step": 3911500 }, { "epoch": 43.44, "learning_rate": 1.638884570108611e-08, "loss": 3.7548, "step": 3912000 }, { "epoch": 43.45, "learning_rate": 1.637496390733625e-08, "loss": 3.7564, "step": 3912500 }, { "epoch": 43.46, "learning_rate": 1.636108211358639e-08, "loss": 3.7471, "step": 3913000 }, { "epoch": 43.46, "learning_rate": 1.6347200319836526e-08, "loss": 3.7405, "step": 3913500 }, { "epoch": 43.47, "learning_rate": 1.6333318526086668e-08, "loss": 3.7463, "step": 3914000 }, { "epoch": 43.47, "learning_rate": 1.6319436732336807e-08, "loss": 3.7577, "step": 3914500 }, { "epoch": 43.48, "learning_rate": 1.6305554938586943e-08, "loss": 3.7576, "step": 3915000 }, { "epoch": 43.48, "learning_rate": 1.629167314483708e-08, "loss": 3.7591, "step": 3915500 }, { "epoch": 43.49, "learning_rate": 1.6277791351087224e-08, "loss": 3.7517, "step": 3916000 }, { "epoch": 43.49, "learning_rate": 1.626390955733736e-08, "loss": 3.7541, "step": 3916500 }, { "epoch": 43.5, "learning_rate": 1.62500277635875e-08, "loss": 3.7497, "step": 3917000 }, { "epoch": 43.51, "learning_rate": 1.623614596983764e-08, "loss": 3.7563, "step": 3917500 }, { "epoch": 43.51, "learning_rate": 1.6222264176087776e-08, "loss": 3.7538, "step": 3918000 }, { "epoch": 43.52, "learning_rate": 1.6208382382337915e-08, "loss": 3.75, "step": 3918500 }, { "epoch": 43.52, "learning_rate": 1.6194500588588054e-08, "loss": 3.7583, "step": 3919000 }, { "epoch": 43.53, "learning_rate": 1.6180618794838193e-08, "loss": 3.7424, "step": 3919500 }, { "epoch": 43.53, "learning_rate": 1.6166737001088332e-08, "loss": 3.7493, "step": 3920000 }, { "epoch": 43.54, "learning_rate": 1.615285520733847e-08, "loss": 3.7348, "step": 3920500 }, { "epoch": 43.54, "learning_rate": 1.613897341358861e-08, "loss": 3.7469, "step": 3921000 }, { "epoch": 43.55, "learning_rate": 1.612509161983875e-08, "loss": 3.732, "step": 3921500 }, { "epoch": 43.56, "learning_rate": 1.6111209826088888e-08, "loss": 3.7543, "step": 3922000 }, { "epoch": 43.56, "learning_rate": 1.6097328032339024e-08, "loss": 3.7402, "step": 3922500 }, { "epoch": 43.57, "learning_rate": 1.6083446238589166e-08, "loss": 3.7561, "step": 3923000 }, { "epoch": 43.57, "learning_rate": 1.6069564444839305e-08, "loss": 3.7522, "step": 3923500 }, { "epoch": 43.58, "learning_rate": 1.605568265108944e-08, "loss": 3.7392, "step": 3924000 }, { "epoch": 43.58, "learning_rate": 1.6041800857339583e-08, "loss": 3.755, "step": 3924500 }, { "epoch": 43.59, "learning_rate": 1.6027919063589722e-08, "loss": 3.7657, "step": 3925000 }, { "epoch": 43.59, "learning_rate": 1.6014037269839857e-08, "loss": 3.7469, "step": 3925500 }, { "epoch": 43.6, "learning_rate": 1.6000155476089996e-08, "loss": 3.7536, "step": 3926000 }, { "epoch": 43.61, "learning_rate": 1.598627368234014e-08, "loss": 3.7645, "step": 3926500 }, { "epoch": 43.61, "learning_rate": 1.5972391888590274e-08, "loss": 3.761, "step": 3927000 }, { "epoch": 43.62, "learning_rate": 1.5958510094840413e-08, "loss": 3.7535, "step": 3927500 }, { "epoch": 43.62, "learning_rate": 1.5944628301090555e-08, "loss": 3.7536, "step": 3928000 }, { "epoch": 43.63, "learning_rate": 1.593074650734069e-08, "loss": 3.7379, "step": 3928500 }, { "epoch": 43.63, "learning_rate": 1.591686471359083e-08, "loss": 3.7404, "step": 3929000 }, { "epoch": 43.64, "learning_rate": 1.590298291984097e-08, "loss": 3.7342, "step": 3929500 }, { "epoch": 43.64, "learning_rate": 1.5889101126091108e-08, "loss": 3.7661, "step": 3930000 }, { "epoch": 43.65, "learning_rate": 1.5875219332341247e-08, "loss": 3.7399, "step": 3930500 }, { "epoch": 43.66, "learning_rate": 1.5861337538591386e-08, "loss": 3.7416, "step": 3931000 }, { "epoch": 43.66, "learning_rate": 1.5847455744841525e-08, "loss": 3.7442, "step": 3931500 }, { "epoch": 43.67, "learning_rate": 1.5833573951091664e-08, "loss": 3.7418, "step": 3932000 }, { "epoch": 43.67, "learning_rate": 1.5819692157341803e-08, "loss": 3.7529, "step": 3932500 }, { "epoch": 43.68, "learning_rate": 1.5805810363591938e-08, "loss": 3.7489, "step": 3933000 }, { "epoch": 43.68, "learning_rate": 1.579192856984208e-08, "loss": 3.7534, "step": 3933500 }, { "epoch": 43.69, "learning_rate": 1.577804677609222e-08, "loss": 3.7475, "step": 3934000 }, { "epoch": 43.69, "learning_rate": 1.5764164982342355e-08, "loss": 3.754, "step": 3934500 }, { "epoch": 43.7, "learning_rate": 1.5750283188592497e-08, "loss": 3.7412, "step": 3935000 }, { "epoch": 43.71, "learning_rate": 1.5736401394842636e-08, "loss": 3.7617, "step": 3935500 }, { "epoch": 43.71, "learning_rate": 1.5722519601092772e-08, "loss": 3.7397, "step": 3936000 }, { "epoch": 43.72, "learning_rate": 1.570863780734291e-08, "loss": 3.7407, "step": 3936500 }, { "epoch": 43.72, "learning_rate": 1.5694756013593053e-08, "loss": 3.7395, "step": 3937000 }, { "epoch": 43.73, "learning_rate": 1.568087421984319e-08, "loss": 3.7607, "step": 3937500 }, { "epoch": 43.73, "learning_rate": 1.5666992426093328e-08, "loss": 3.7512, "step": 3938000 }, { "epoch": 43.74, "learning_rate": 1.565311063234347e-08, "loss": 3.758, "step": 3938500 }, { "epoch": 43.74, "learning_rate": 1.5639228838593606e-08, "loss": 3.7398, "step": 3939000 }, { "epoch": 43.75, "learning_rate": 1.5625347044843745e-08, "loss": 3.7476, "step": 3939500 }, { "epoch": 43.76, "learning_rate": 1.5611465251093887e-08, "loss": 3.7398, "step": 3940000 }, { "epoch": 43.76, "learning_rate": 1.5597583457344023e-08, "loss": 3.7547, "step": 3940500 }, { "epoch": 43.77, "learning_rate": 1.558370166359416e-08, "loss": 3.7469, "step": 3941000 }, { "epoch": 43.77, "learning_rate": 1.55698198698443e-08, "loss": 3.746, "step": 3941500 }, { "epoch": 43.78, "learning_rate": 1.555593807609444e-08, "loss": 3.7542, "step": 3942000 }, { "epoch": 43.78, "learning_rate": 1.554205628234458e-08, "loss": 3.743, "step": 3942500 }, { "epoch": 43.79, "learning_rate": 1.5528174488594717e-08, "loss": 3.7359, "step": 3943000 }, { "epoch": 43.79, "learning_rate": 1.5514292694844856e-08, "loss": 3.7464, "step": 3943500 }, { "epoch": 43.8, "learning_rate": 1.5500410901094995e-08, "loss": 3.7674, "step": 3944000 }, { "epoch": 43.81, "learning_rate": 1.5486529107345134e-08, "loss": 3.7369, "step": 3944500 }, { "epoch": 43.81, "learning_rate": 1.5472647313595273e-08, "loss": 3.7428, "step": 3945000 }, { "epoch": 43.82, "learning_rate": 1.5458765519845412e-08, "loss": 3.7261, "step": 3945500 }, { "epoch": 43.82, "learning_rate": 1.544488372609555e-08, "loss": 3.7375, "step": 3946000 }, { "epoch": 43.83, "learning_rate": 1.543100193234569e-08, "loss": 3.7342, "step": 3946500 }, { "epoch": 43.83, "learning_rate": 1.541712013859583e-08, "loss": 3.7523, "step": 3947000 }, { "epoch": 43.84, "learning_rate": 1.5403238344845968e-08, "loss": 3.7373, "step": 3947500 }, { "epoch": 43.84, "learning_rate": 1.5389356551096107e-08, "loss": 3.7577, "step": 3948000 }, { "epoch": 43.85, "learning_rate": 1.5375474757346243e-08, "loss": 3.7551, "step": 3948500 }, { "epoch": 43.86, "learning_rate": 1.5361592963596385e-08, "loss": 3.7512, "step": 3949000 }, { "epoch": 43.86, "learning_rate": 1.5347711169846524e-08, "loss": 3.7569, "step": 3949500 }, { "epoch": 43.87, "learning_rate": 1.533382937609666e-08, "loss": 3.7532, "step": 3950000 }, { "epoch": 43.87, "learning_rate": 1.5319947582346802e-08, "loss": 3.7384, "step": 3950500 }, { "epoch": 43.88, "learning_rate": 1.530606578859694e-08, "loss": 3.7415, "step": 3951000 }, { "epoch": 43.88, "learning_rate": 1.5292183994847076e-08, "loss": 3.742, "step": 3951500 }, { "epoch": 43.89, "learning_rate": 1.5278302201097215e-08, "loss": 3.7574, "step": 3952000 }, { "epoch": 43.89, "learning_rate": 1.5264420407347354e-08, "loss": 3.7507, "step": 3952500 }, { "epoch": 43.9, "learning_rate": 1.5250538613597493e-08, "loss": 3.7542, "step": 3953000 }, { "epoch": 43.91, "learning_rate": 1.5236656819847632e-08, "loss": 3.7392, "step": 3953500 }, { "epoch": 43.91, "learning_rate": 1.522277502609777e-08, "loss": 3.7515, "step": 3954000 }, { "epoch": 43.92, "learning_rate": 1.520889323234791e-08, "loss": 3.7611, "step": 3954500 }, { "epoch": 43.92, "learning_rate": 1.519501143859805e-08, "loss": 3.77, "step": 3955000 }, { "epoch": 43.93, "learning_rate": 1.5181129644848188e-08, "loss": 3.743, "step": 3955500 }, { "epoch": 43.93, "learning_rate": 1.5167247851098327e-08, "loss": 3.7243, "step": 3956000 }, { "epoch": 43.94, "learning_rate": 1.5153366057348466e-08, "loss": 3.7485, "step": 3956500 }, { "epoch": 43.94, "learning_rate": 1.5139484263598605e-08, "loss": 3.7357, "step": 3957000 }, { "epoch": 43.95, "learning_rate": 1.5125602469848744e-08, "loss": 3.7501, "step": 3957500 }, { "epoch": 43.96, "learning_rate": 1.5111720676098883e-08, "loss": 3.7529, "step": 3958000 }, { "epoch": 43.96, "learning_rate": 1.5097838882349022e-08, "loss": 3.7492, "step": 3958500 }, { "epoch": 43.97, "learning_rate": 1.5083957088599157e-08, "loss": 3.7333, "step": 3959000 }, { "epoch": 43.97, "learning_rate": 1.50700752948493e-08, "loss": 3.7454, "step": 3959500 }, { "epoch": 43.98, "learning_rate": 1.505619350109944e-08, "loss": 3.7475, "step": 3960000 }, { "epoch": 43.98, "learning_rate": 1.5042311707349574e-08, "loss": 3.7323, "step": 3960500 }, { "epoch": 43.99, "learning_rate": 1.5028429913599717e-08, "loss": 3.7384, "step": 3961000 }, { "epoch": 43.99, "learning_rate": 1.5014548119849855e-08, "loss": 3.7494, "step": 3961500 }, { "epoch": 44.0, "learning_rate": 1.500066632609999e-08, "loss": 3.7449, "step": 3962000 }, { "epoch": 44.0, "eval_loss": 3.8248701095581055, "eval_runtime": 6.3081, "eval_samples_per_second": 246.351, "step": 3962024 }, { "epoch": 44.01, "learning_rate": 1.498678453235013e-08, "loss": 3.7435, "step": 3962500 }, { "epoch": 44.01, "learning_rate": 1.4972902738600272e-08, "loss": 3.7607, "step": 3963000 }, { "epoch": 44.02, "learning_rate": 1.4959020944850408e-08, "loss": 3.7453, "step": 3963500 }, { "epoch": 44.02, "learning_rate": 1.4945139151100547e-08, "loss": 3.7395, "step": 3964000 }, { "epoch": 44.03, "learning_rate": 1.493125735735069e-08, "loss": 3.73, "step": 3964500 }, { "epoch": 44.03, "learning_rate": 1.4917375563600825e-08, "loss": 3.7628, "step": 3965000 }, { "epoch": 44.04, "learning_rate": 1.4903493769850964e-08, "loss": 3.7502, "step": 3965500 }, { "epoch": 44.04, "learning_rate": 1.4889611976101101e-08, "loss": 3.7414, "step": 3966000 }, { "epoch": 44.05, "learning_rate": 1.4875730182351243e-08, "loss": 3.7374, "step": 3966500 }, { "epoch": 44.06, "learning_rate": 1.486184838860138e-08, "loss": 3.7627, "step": 3967000 }, { "epoch": 44.06, "learning_rate": 1.4847966594851518e-08, "loss": 3.7364, "step": 3967500 }, { "epoch": 44.07, "learning_rate": 1.483408480110166e-08, "loss": 3.7583, "step": 3968000 }, { "epoch": 44.07, "learning_rate": 1.4820203007351798e-08, "loss": 3.7499, "step": 3968500 }, { "epoch": 44.08, "learning_rate": 1.4806321213601935e-08, "loss": 3.7442, "step": 3969000 }, { "epoch": 44.08, "learning_rate": 1.4792439419852074e-08, "loss": 3.7424, "step": 3969500 }, { "epoch": 44.09, "learning_rate": 1.4778557626102214e-08, "loss": 3.7549, "step": 3970000 }, { "epoch": 44.09, "learning_rate": 1.4764675832352352e-08, "loss": 3.7386, "step": 3970500 }, { "epoch": 44.1, "learning_rate": 1.475079403860249e-08, "loss": 3.7489, "step": 3971000 }, { "epoch": 44.11, "learning_rate": 1.4736912244852631e-08, "loss": 3.7499, "step": 3971500 }, { "epoch": 44.11, "learning_rate": 1.4723030451102769e-08, "loss": 3.7492, "step": 3972000 }, { "epoch": 44.12, "learning_rate": 1.4709148657352908e-08, "loss": 3.7406, "step": 3972500 }, { "epoch": 44.12, "learning_rate": 1.4695266863603048e-08, "loss": 3.7284, "step": 3973000 }, { "epoch": 44.13, "learning_rate": 1.4681385069853185e-08, "loss": 3.7371, "step": 3973500 }, { "epoch": 44.13, "learning_rate": 1.4667503276103324e-08, "loss": 3.7484, "step": 3974000 }, { "epoch": 44.14, "learning_rate": 1.4653621482353462e-08, "loss": 3.762, "step": 3974500 }, { "epoch": 44.14, "learning_rate": 1.4639739688603602e-08, "loss": 3.7443, "step": 3975000 }, { "epoch": 44.15, "learning_rate": 1.4625857894853741e-08, "loss": 3.7604, "step": 3975500 }, { "epoch": 44.16, "learning_rate": 1.4611976101103879e-08, "loss": 3.7367, "step": 3976000 }, { "epoch": 44.16, "learning_rate": 1.4598094307354019e-08, "loss": 3.7637, "step": 3976500 }, { "epoch": 44.17, "learning_rate": 1.4584212513604158e-08, "loss": 3.7452, "step": 3977000 }, { "epoch": 44.17, "learning_rate": 1.4570330719854295e-08, "loss": 3.7246, "step": 3977500 }, { "epoch": 44.18, "learning_rate": 1.4556448926104434e-08, "loss": 3.7642, "step": 3978000 }, { "epoch": 44.18, "learning_rate": 1.4542567132354575e-08, "loss": 3.7721, "step": 3978500 }, { "epoch": 44.19, "learning_rate": 1.4528685338604712e-08, "loss": 3.7495, "step": 3979000 }, { "epoch": 44.19, "learning_rate": 1.4514803544854851e-08, "loss": 3.7444, "step": 3979500 }, { "epoch": 44.2, "learning_rate": 1.4500921751104992e-08, "loss": 3.7519, "step": 3980000 }, { "epoch": 44.21, "learning_rate": 1.4487039957355129e-08, "loss": 3.7442, "step": 3980500 }, { "epoch": 44.21, "learning_rate": 1.4473158163605268e-08, "loss": 3.768, "step": 3981000 }, { "epoch": 44.22, "learning_rate": 1.4459276369855405e-08, "loss": 3.7554, "step": 3981500 }, { "epoch": 44.22, "learning_rate": 1.4445394576105546e-08, "loss": 3.7301, "step": 3982000 }, { "epoch": 44.23, "learning_rate": 1.4431512782355685e-08, "loss": 3.7521, "step": 3982500 }, { "epoch": 44.23, "learning_rate": 1.4417630988605822e-08, "loss": 3.7522, "step": 3983000 }, { "epoch": 44.24, "learning_rate": 1.4403749194855963e-08, "loss": 3.7554, "step": 3983500 }, { "epoch": 44.24, "learning_rate": 1.4389867401106102e-08, "loss": 3.7347, "step": 3984000 }, { "epoch": 44.25, "learning_rate": 1.4375985607356239e-08, "loss": 3.7266, "step": 3984500 }, { "epoch": 44.26, "learning_rate": 1.4362103813606376e-08, "loss": 3.741, "step": 3985000 }, { "epoch": 44.26, "learning_rate": 1.4348222019856519e-08, "loss": 3.7482, "step": 3985500 }, { "epoch": 44.27, "learning_rate": 1.4334340226106656e-08, "loss": 3.7459, "step": 3986000 }, { "epoch": 44.27, "learning_rate": 1.4320458432356793e-08, "loss": 3.7621, "step": 3986500 }, { "epoch": 44.28, "learning_rate": 1.4306576638606936e-08, "loss": 3.7382, "step": 3987000 }, { "epoch": 44.28, "learning_rate": 1.4292694844857073e-08, "loss": 3.748, "step": 3987500 }, { "epoch": 44.29, "learning_rate": 1.427881305110721e-08, "loss": 3.7637, "step": 3988000 }, { "epoch": 44.29, "learning_rate": 1.4264931257357349e-08, "loss": 3.7428, "step": 3988500 }, { "epoch": 44.3, "learning_rate": 1.425104946360749e-08, "loss": 3.7289, "step": 3989000 }, { "epoch": 44.31, "learning_rate": 1.4237167669857627e-08, "loss": 3.7704, "step": 3989500 }, { "epoch": 44.31, "learning_rate": 1.4223285876107766e-08, "loss": 3.7569, "step": 3990000 }, { "epoch": 44.32, "learning_rate": 1.4209404082357907e-08, "loss": 3.771, "step": 3990500 }, { "epoch": 44.32, "learning_rate": 1.4195522288608044e-08, "loss": 3.7529, "step": 3991000 }, { "epoch": 44.33, "learning_rate": 1.4181640494858183e-08, "loss": 3.7669, "step": 3991500 }, { "epoch": 44.33, "learning_rate": 1.416775870110832e-08, "loss": 3.7479, "step": 3992000 }, { "epoch": 44.34, "learning_rate": 1.415387690735846e-08, "loss": 3.7628, "step": 3992500 }, { "epoch": 44.34, "learning_rate": 1.41399951136086e-08, "loss": 3.7516, "step": 3993000 }, { "epoch": 44.35, "learning_rate": 1.4126113319858737e-08, "loss": 3.7432, "step": 3993500 }, { "epoch": 44.36, "learning_rate": 1.4112231526108878e-08, "loss": 3.7659, "step": 3994000 }, { "epoch": 44.36, "learning_rate": 1.4098349732359017e-08, "loss": 3.7552, "step": 3994500 }, { "epoch": 44.37, "learning_rate": 1.4084467938609154e-08, "loss": 3.7322, "step": 3995000 }, { "epoch": 44.37, "learning_rate": 1.4070586144859293e-08, "loss": 3.7449, "step": 3995500 }, { "epoch": 44.38, "learning_rate": 1.4056704351109433e-08, "loss": 3.7483, "step": 3996000 }, { "epoch": 44.38, "learning_rate": 1.404282255735957e-08, "loss": 3.7239, "step": 3996500 }, { "epoch": 44.39, "learning_rate": 1.402894076360971e-08, "loss": 3.7373, "step": 3997000 }, { "epoch": 44.39, "learning_rate": 1.401505896985985e-08, "loss": 3.7341, "step": 3997500 }, { "epoch": 44.4, "learning_rate": 1.4001177176109988e-08, "loss": 3.7541, "step": 3998000 }, { "epoch": 44.41, "learning_rate": 1.3987295382360127e-08, "loss": 3.7219, "step": 3998500 }, { "epoch": 44.41, "learning_rate": 1.3973413588610264e-08, "loss": 3.7479, "step": 3999000 }, { "epoch": 44.42, "learning_rate": 1.3959531794860404e-08, "loss": 3.7457, "step": 3999500 }, { "epoch": 44.42, "learning_rate": 1.3945650001110543e-08, "loss": 3.7491, "step": 4000000 }, { "epoch": 44.43, "learning_rate": 1.393176820736068e-08, "loss": 3.7566, "step": 4000500 }, { "epoch": 44.43, "learning_rate": 1.3917886413610821e-08, "loss": 3.7669, "step": 4001000 }, { "epoch": 44.44, "learning_rate": 1.390400461986096e-08, "loss": 3.7531, "step": 4001500 }, { "epoch": 44.44, "learning_rate": 1.3890122826111098e-08, "loss": 3.7343, "step": 4002000 }, { "epoch": 44.45, "learning_rate": 1.3876241032361238e-08, "loss": 3.7539, "step": 4002500 }, { "epoch": 44.46, "learning_rate": 1.3862359238611376e-08, "loss": 3.7508, "step": 4003000 }, { "epoch": 44.46, "learning_rate": 1.3848477444861514e-08, "loss": 3.7296, "step": 4003500 }, { "epoch": 44.47, "learning_rate": 1.3834595651111652e-08, "loss": 3.7681, "step": 4004000 }, { "epoch": 44.47, "learning_rate": 1.3820713857361792e-08, "loss": 3.7451, "step": 4004500 }, { "epoch": 44.48, "learning_rate": 1.3806832063611931e-08, "loss": 3.7555, "step": 4005000 }, { "epoch": 44.48, "learning_rate": 1.3792950269862069e-08, "loss": 3.745, "step": 4005500 }, { "epoch": 44.49, "learning_rate": 1.377906847611221e-08, "loss": 3.7486, "step": 4006000 }, { "epoch": 44.49, "learning_rate": 1.3765186682362348e-08, "loss": 3.7372, "step": 4006500 }, { "epoch": 44.5, "learning_rate": 1.3751304888612486e-08, "loss": 3.7507, "step": 4007000 }, { "epoch": 44.51, "learning_rate": 1.3737423094862624e-08, "loss": 3.7391, "step": 4007500 }, { "epoch": 44.51, "learning_rate": 1.3723541301112765e-08, "loss": 3.7495, "step": 4008000 }, { "epoch": 44.52, "learning_rate": 1.3709659507362902e-08, "loss": 3.7727, "step": 4008500 }, { "epoch": 44.52, "learning_rate": 1.3695777713613041e-08, "loss": 3.756, "step": 4009000 }, { "epoch": 44.53, "learning_rate": 1.3681895919863182e-08, "loss": 3.7414, "step": 4009500 }, { "epoch": 44.53, "learning_rate": 1.366801412611332e-08, "loss": 3.7522, "step": 4010000 }, { "epoch": 44.54, "learning_rate": 1.3654132332363458e-08, "loss": 3.7396, "step": 4010500 }, { "epoch": 44.54, "learning_rate": 1.3640250538613596e-08, "loss": 3.7509, "step": 4011000 }, { "epoch": 44.55, "learning_rate": 1.3626368744863736e-08, "loss": 3.7472, "step": 4011500 }, { "epoch": 44.56, "learning_rate": 1.3612486951113875e-08, "loss": 3.759, "step": 4012000 }, { "epoch": 44.56, "learning_rate": 1.3598605157364012e-08, "loss": 3.7635, "step": 4012500 }, { "epoch": 44.57, "learning_rate": 1.3584723363614153e-08, "loss": 3.7449, "step": 4013000 }, { "epoch": 44.57, "learning_rate": 1.3570841569864292e-08, "loss": 3.7508, "step": 4013500 }, { "epoch": 44.58, "learning_rate": 1.355695977611443e-08, "loss": 3.7459, "step": 4014000 }, { "epoch": 44.58, "learning_rate": 1.3543077982364568e-08, "loss": 3.7684, "step": 4014500 }, { "epoch": 44.59, "learning_rate": 1.3529196188614709e-08, "loss": 3.7569, "step": 4015000 }, { "epoch": 44.59, "learning_rate": 1.3515314394864846e-08, "loss": 3.7226, "step": 4015500 }, { "epoch": 44.6, "learning_rate": 1.3501432601114985e-08, "loss": 3.758, "step": 4016000 }, { "epoch": 44.6, "learning_rate": 1.3487550807365126e-08, "loss": 3.758, "step": 4016500 }, { "epoch": 44.61, "learning_rate": 1.3473669013615263e-08, "loss": 3.7382, "step": 4017000 }, { "epoch": 44.62, "learning_rate": 1.3459787219865402e-08, "loss": 3.7436, "step": 4017500 }, { "epoch": 44.62, "learning_rate": 1.344590542611554e-08, "loss": 3.7335, "step": 4018000 }, { "epoch": 44.63, "learning_rate": 1.343202363236568e-08, "loss": 3.7361, "step": 4018500 }, { "epoch": 44.63, "learning_rate": 1.3418141838615817e-08, "loss": 3.7572, "step": 4019000 }, { "epoch": 44.64, "learning_rate": 1.3404260044865956e-08, "loss": 3.7661, "step": 4019500 }, { "epoch": 44.64, "learning_rate": 1.3390378251116097e-08, "loss": 3.7308, "step": 4020000 }, { "epoch": 44.65, "learning_rate": 1.3376496457366234e-08, "loss": 3.7391, "step": 4020500 }, { "epoch": 44.65, "learning_rate": 1.3362614663616373e-08, "loss": 3.7574, "step": 4021000 }, { "epoch": 44.66, "learning_rate": 1.334873286986651e-08, "loss": 3.7558, "step": 4021500 }, { "epoch": 44.67, "learning_rate": 1.3334851076116651e-08, "loss": 3.7232, "step": 4022000 }, { "epoch": 44.67, "learning_rate": 1.332096928236679e-08, "loss": 3.7447, "step": 4022500 }, { "epoch": 44.68, "learning_rate": 1.3307087488616927e-08, "loss": 3.7503, "step": 4023000 }, { "epoch": 44.68, "learning_rate": 1.3293205694867068e-08, "loss": 3.756, "step": 4023500 }, { "epoch": 44.69, "learning_rate": 1.3279323901117207e-08, "loss": 3.7406, "step": 4024000 }, { "epoch": 44.69, "learning_rate": 1.3265442107367344e-08, "loss": 3.7481, "step": 4024500 }, { "epoch": 44.7, "learning_rate": 1.3251560313617483e-08, "loss": 3.7262, "step": 4025000 }, { "epoch": 44.7, "learning_rate": 1.3237678519867624e-08, "loss": 3.7111, "step": 4025500 }, { "epoch": 44.71, "learning_rate": 1.3223796726117761e-08, "loss": 3.7257, "step": 4026000 }, { "epoch": 44.72, "learning_rate": 1.32099149323679e-08, "loss": 3.7578, "step": 4026500 }, { "epoch": 44.72, "learning_rate": 1.319603313861804e-08, "loss": 3.7541, "step": 4027000 }, { "epoch": 44.73, "learning_rate": 1.3182151344868178e-08, "loss": 3.7401, "step": 4027500 }, { "epoch": 44.73, "learning_rate": 1.3168269551118317e-08, "loss": 3.7298, "step": 4028000 }, { "epoch": 44.74, "learning_rate": 1.3154387757368454e-08, "loss": 3.7379, "step": 4028500 }, { "epoch": 44.74, "learning_rate": 1.3140505963618595e-08, "loss": 3.7431, "step": 4029000 }, { "epoch": 44.75, "learning_rate": 1.3126624169868734e-08, "loss": 3.7572, "step": 4029500 }, { "epoch": 44.75, "learning_rate": 1.3112742376118871e-08, "loss": 3.744, "step": 4030000 }, { "epoch": 44.76, "learning_rate": 1.3098860582369011e-08, "loss": 3.7559, "step": 4030500 }, { "epoch": 44.77, "learning_rate": 1.308497878861915e-08, "loss": 3.7692, "step": 4031000 }, { "epoch": 44.77, "learning_rate": 1.3071096994869288e-08, "loss": 3.7629, "step": 4031500 }, { "epoch": 44.78, "learning_rate": 1.3057215201119427e-08, "loss": 3.7448, "step": 4032000 }, { "epoch": 44.78, "learning_rate": 1.3043333407369567e-08, "loss": 3.7355, "step": 4032500 }, { "epoch": 44.79, "learning_rate": 1.3029451613619705e-08, "loss": 3.754, "step": 4033000 }, { "epoch": 44.79, "learning_rate": 1.3015569819869844e-08, "loss": 3.7549, "step": 4033500 }, { "epoch": 44.8, "learning_rate": 1.3001688026119984e-08, "loss": 3.7674, "step": 4034000 }, { "epoch": 44.8, "learning_rate": 1.2987806232370121e-08, "loss": 3.7338, "step": 4034500 }, { "epoch": 44.81, "learning_rate": 1.2973924438620259e-08, "loss": 3.7445, "step": 4035000 }, { "epoch": 44.82, "learning_rate": 1.2960042644870401e-08, "loss": 3.7565, "step": 4035500 }, { "epoch": 44.82, "learning_rate": 1.2946160851120538e-08, "loss": 3.7377, "step": 4036000 }, { "epoch": 44.83, "learning_rate": 1.2932279057370676e-08, "loss": 3.7462, "step": 4036500 }, { "epoch": 44.83, "learning_rate": 1.2918397263620815e-08, "loss": 3.7426, "step": 4037000 }, { "epoch": 44.84, "learning_rate": 1.2904515469870955e-08, "loss": 3.7397, "step": 4037500 }, { "epoch": 44.84, "learning_rate": 1.2890633676121092e-08, "loss": 3.7572, "step": 4038000 }, { "epoch": 44.85, "learning_rate": 1.2876751882371231e-08, "loss": 3.7421, "step": 4038500 }, { "epoch": 44.85, "learning_rate": 1.2862870088621372e-08, "loss": 3.7649, "step": 4039000 }, { "epoch": 44.86, "learning_rate": 1.284898829487151e-08, "loss": 3.7551, "step": 4039500 }, { "epoch": 44.87, "learning_rate": 1.2835106501121648e-08, "loss": 3.7483, "step": 4040000 }, { "epoch": 44.87, "learning_rate": 1.2821224707371786e-08, "loss": 3.7292, "step": 4040500 }, { "epoch": 44.88, "learning_rate": 1.2807342913621926e-08, "loss": 3.7425, "step": 4041000 }, { "epoch": 44.88, "learning_rate": 1.2793461119872065e-08, "loss": 3.7643, "step": 4041500 }, { "epoch": 44.89, "learning_rate": 1.2779579326122202e-08, "loss": 3.751, "step": 4042000 }, { "epoch": 44.89, "learning_rate": 1.2765697532372343e-08, "loss": 3.7423, "step": 4042500 }, { "epoch": 44.9, "learning_rate": 1.2751815738622482e-08, "loss": 3.7656, "step": 4043000 }, { "epoch": 44.9, "learning_rate": 1.273793394487262e-08, "loss": 3.7598, "step": 4043500 }, { "epoch": 44.91, "learning_rate": 1.2724052151122758e-08, "loss": 3.7552, "step": 4044000 }, { "epoch": 44.92, "learning_rate": 1.2710170357372899e-08, "loss": 3.7349, "step": 4044500 }, { "epoch": 44.92, "learning_rate": 1.2696288563623036e-08, "loss": 3.743, "step": 4045000 }, { "epoch": 44.93, "learning_rate": 1.2682406769873175e-08, "loss": 3.7793, "step": 4045500 }, { "epoch": 44.93, "learning_rate": 1.2668524976123316e-08, "loss": 3.7613, "step": 4046000 }, { "epoch": 44.94, "learning_rate": 1.2654643182373453e-08, "loss": 3.7471, "step": 4046500 }, { "epoch": 44.94, "learning_rate": 1.2640761388623592e-08, "loss": 3.7377, "step": 4047000 }, { "epoch": 44.95, "learning_rate": 1.262687959487373e-08, "loss": 3.747, "step": 4047500 }, { "epoch": 44.95, "learning_rate": 1.261299780112387e-08, "loss": 3.7668, "step": 4048000 }, { "epoch": 44.96, "learning_rate": 1.2599116007374009e-08, "loss": 3.7604, "step": 4048500 }, { "epoch": 44.97, "learning_rate": 1.2585234213624146e-08, "loss": 3.7462, "step": 4049000 }, { "epoch": 44.97, "learning_rate": 1.2571352419874287e-08, "loss": 3.7346, "step": 4049500 }, { "epoch": 44.98, "learning_rate": 1.2557470626124426e-08, "loss": 3.7358, "step": 4050000 }, { "epoch": 44.98, "learning_rate": 1.2543588832374563e-08, "loss": 3.7457, "step": 4050500 }, { "epoch": 44.99, "learning_rate": 1.25297070386247e-08, "loss": 3.738, "step": 4051000 }, { "epoch": 44.99, "learning_rate": 1.2515825244874843e-08, "loss": 3.7374, "step": 4051500 }, { "epoch": 45.0, "learning_rate": 1.250194345112498e-08, "loss": 3.7573, "step": 4052000 }, { "epoch": 45.0, "eval_loss": 3.8247101306915283, "eval_runtime": 6.3052, "eval_samples_per_second": 246.462, "step": 4052070 }, { "epoch": 45.0, "learning_rate": 1.2488061657375119e-08, "loss": 3.7472, "step": 4052500 }, { "epoch": 45.01, "learning_rate": 1.2474179863625258e-08, "loss": 3.7597, "step": 4053000 }, { "epoch": 45.02, "learning_rate": 1.2460298069875397e-08, "loss": 3.7478, "step": 4053500 }, { "epoch": 45.02, "learning_rate": 1.2446416276125534e-08, "loss": 3.7377, "step": 4054000 }, { "epoch": 45.03, "learning_rate": 1.2432534482375675e-08, "loss": 3.75, "step": 4054500 }, { "epoch": 45.03, "learning_rate": 1.2418652688625812e-08, "loss": 3.7509, "step": 4055000 }, { "epoch": 45.04, "learning_rate": 1.2404770894875951e-08, "loss": 3.7481, "step": 4055500 }, { "epoch": 45.04, "learning_rate": 1.2390889101126092e-08, "loss": 3.7611, "step": 4056000 }, { "epoch": 45.05, "learning_rate": 1.2377007307376229e-08, "loss": 3.7565, "step": 4056500 }, { "epoch": 45.05, "learning_rate": 1.2363125513626368e-08, "loss": 3.7633, "step": 4057000 }, { "epoch": 45.06, "learning_rate": 1.2349243719876507e-08, "loss": 3.7424, "step": 4057500 }, { "epoch": 45.07, "learning_rate": 1.2335361926126646e-08, "loss": 3.7399, "step": 4058000 }, { "epoch": 45.07, "learning_rate": 1.2321480132376785e-08, "loss": 3.7541, "step": 4058500 }, { "epoch": 45.08, "learning_rate": 1.2307598338626924e-08, "loss": 3.7407, "step": 4059000 }, { "epoch": 45.08, "learning_rate": 1.2293716544877063e-08, "loss": 3.7414, "step": 4059500 }, { "epoch": 45.09, "learning_rate": 1.2279834751127202e-08, "loss": 3.7469, "step": 4060000 }, { "epoch": 45.09, "learning_rate": 1.226595295737734e-08, "loss": 3.7551, "step": 4060500 }, { "epoch": 45.1, "learning_rate": 1.2252071163627478e-08, "loss": 3.739, "step": 4061000 }, { "epoch": 45.1, "learning_rate": 1.2238189369877617e-08, "loss": 3.7541, "step": 4061500 }, { "epoch": 45.11, "learning_rate": 1.2224307576127757e-08, "loss": 3.7308, "step": 4062000 }, { "epoch": 45.12, "learning_rate": 1.2210425782377895e-08, "loss": 3.7442, "step": 4062500 }, { "epoch": 45.12, "learning_rate": 1.2196543988628034e-08, "loss": 3.7178, "step": 4063000 }, { "epoch": 45.13, "learning_rate": 1.2182662194878173e-08, "loss": 3.752, "step": 4063500 }, { "epoch": 45.13, "learning_rate": 1.2168780401128312e-08, "loss": 3.7237, "step": 4064000 }, { "epoch": 45.14, "learning_rate": 1.215489860737845e-08, "loss": 3.7518, "step": 4064500 }, { "epoch": 45.14, "learning_rate": 1.214101681362859e-08, "loss": 3.7263, "step": 4065000 }, { "epoch": 45.15, "learning_rate": 1.2127135019878728e-08, "loss": 3.7328, "step": 4065500 }, { "epoch": 45.15, "learning_rate": 1.2113253226128867e-08, "loss": 3.7408, "step": 4066000 }, { "epoch": 45.16, "learning_rate": 1.2099371432379006e-08, "loss": 3.7489, "step": 4066500 }, { "epoch": 45.17, "learning_rate": 1.2085489638629144e-08, "loss": 3.7408, "step": 4067000 }, { "epoch": 45.17, "learning_rate": 1.2071607844879284e-08, "loss": 3.753, "step": 4067500 }, { "epoch": 45.18, "learning_rate": 1.2057726051129422e-08, "loss": 3.7521, "step": 4068000 }, { "epoch": 45.18, "learning_rate": 1.204384425737956e-08, "loss": 3.74, "step": 4068500 }, { "epoch": 45.19, "learning_rate": 1.2029962463629701e-08, "loss": 3.7246, "step": 4069000 }, { "epoch": 45.19, "learning_rate": 1.2016080669879838e-08, "loss": 3.7426, "step": 4069500 }, { "epoch": 45.2, "learning_rate": 1.2002198876129977e-08, "loss": 3.7464, "step": 4070000 }, { "epoch": 45.2, "learning_rate": 1.1988317082380116e-08, "loss": 3.7602, "step": 4070500 }, { "epoch": 45.21, "learning_rate": 1.1974435288630255e-08, "loss": 3.7413, "step": 4071000 }, { "epoch": 45.22, "learning_rate": 1.1960553494880394e-08, "loss": 3.7405, "step": 4071500 }, { "epoch": 45.22, "learning_rate": 1.1946671701130533e-08, "loss": 3.7508, "step": 4072000 }, { "epoch": 45.23, "learning_rate": 1.1932789907380672e-08, "loss": 3.7437, "step": 4072500 }, { "epoch": 45.23, "learning_rate": 1.191890811363081e-08, "loss": 3.7679, "step": 4073000 }, { "epoch": 45.24, "learning_rate": 1.190502631988095e-08, "loss": 3.7467, "step": 4073500 }, { "epoch": 45.24, "learning_rate": 1.1891144526131087e-08, "loss": 3.7599, "step": 4074000 }, { "epoch": 45.25, "learning_rate": 1.1877262732381226e-08, "loss": 3.7611, "step": 4074500 }, { "epoch": 45.25, "learning_rate": 1.1863380938631367e-08, "loss": 3.7522, "step": 4075000 }, { "epoch": 45.26, "learning_rate": 1.1849499144881504e-08, "loss": 3.756, "step": 4075500 }, { "epoch": 45.27, "learning_rate": 1.1835617351131643e-08, "loss": 3.7582, "step": 4076000 }, { "epoch": 45.27, "learning_rate": 1.1821735557381782e-08, "loss": 3.7371, "step": 4076500 }, { "epoch": 45.28, "learning_rate": 1.1807853763631921e-08, "loss": 3.7583, "step": 4077000 }, { "epoch": 45.28, "learning_rate": 1.1793971969882058e-08, "loss": 3.7402, "step": 4077500 }, { "epoch": 45.29, "learning_rate": 1.1780090176132199e-08, "loss": 3.7629, "step": 4078000 }, { "epoch": 45.29, "learning_rate": 1.1766208382382338e-08, "loss": 3.7404, "step": 4078500 }, { "epoch": 45.3, "learning_rate": 1.1752326588632475e-08, "loss": 3.7602, "step": 4079000 }, { "epoch": 45.3, "learning_rate": 1.1738444794882616e-08, "loss": 3.74, "step": 4079500 }, { "epoch": 45.31, "learning_rate": 1.1724563001132753e-08, "loss": 3.7437, "step": 4080000 }, { "epoch": 45.32, "learning_rate": 1.1710681207382892e-08, "loss": 3.7595, "step": 4080500 }, { "epoch": 45.32, "learning_rate": 1.1696799413633031e-08, "loss": 3.7593, "step": 4081000 }, { "epoch": 45.33, "learning_rate": 1.168291761988317e-08, "loss": 3.7379, "step": 4081500 }, { "epoch": 45.33, "learning_rate": 1.1669035826133309e-08, "loss": 3.7428, "step": 4082000 }, { "epoch": 45.34, "learning_rate": 1.1655154032383448e-08, "loss": 3.7346, "step": 4082500 }, { "epoch": 45.34, "learning_rate": 1.1641272238633587e-08, "loss": 3.7299, "step": 4083000 }, { "epoch": 45.35, "learning_rate": 1.1627390444883726e-08, "loss": 3.744, "step": 4083500 }, { "epoch": 45.35, "learning_rate": 1.1613508651133865e-08, "loss": 3.7515, "step": 4084000 }, { "epoch": 45.36, "learning_rate": 1.1599626857384002e-08, "loss": 3.7353, "step": 4084500 }, { "epoch": 45.37, "learning_rate": 1.1585745063634143e-08, "loss": 3.7435, "step": 4085000 }, { "epoch": 45.37, "learning_rate": 1.1571863269884282e-08, "loss": 3.7552, "step": 4085500 }, { "epoch": 45.38, "learning_rate": 1.1557981476134419e-08, "loss": 3.749, "step": 4086000 }, { "epoch": 45.38, "learning_rate": 1.154409968238456e-08, "loss": 3.7577, "step": 4086500 }, { "epoch": 45.39, "learning_rate": 1.1530217888634697e-08, "loss": 3.7472, "step": 4087000 }, { "epoch": 45.39, "learning_rate": 1.1516336094884836e-08, "loss": 3.7482, "step": 4087500 }, { "epoch": 45.4, "learning_rate": 1.1502454301134976e-08, "loss": 3.7297, "step": 4088000 }, { "epoch": 45.4, "learning_rate": 1.1488572507385114e-08, "loss": 3.7501, "step": 4088500 }, { "epoch": 45.41, "learning_rate": 1.1474690713635253e-08, "loss": 3.7697, "step": 4089000 }, { "epoch": 45.42, "learning_rate": 1.1460808919885392e-08, "loss": 3.7517, "step": 4089500 }, { "epoch": 45.42, "learning_rate": 1.144692712613553e-08, "loss": 3.744, "step": 4090000 }, { "epoch": 45.43, "learning_rate": 1.1433045332385668e-08, "loss": 3.7476, "step": 4090500 }, { "epoch": 45.43, "learning_rate": 1.1419163538635809e-08, "loss": 3.7611, "step": 4091000 }, { "epoch": 45.44, "learning_rate": 1.1405281744885947e-08, "loss": 3.7593, "step": 4091500 }, { "epoch": 45.44, "learning_rate": 1.1391399951136085e-08, "loss": 3.7525, "step": 4092000 }, { "epoch": 45.45, "learning_rate": 1.1377518157386225e-08, "loss": 3.7613, "step": 4092500 }, { "epoch": 45.45, "learning_rate": 1.1363636363636363e-08, "loss": 3.757, "step": 4093000 }, { "epoch": 45.46, "learning_rate": 1.1349754569886502e-08, "loss": 3.7462, "step": 4093500 }, { "epoch": 45.47, "learning_rate": 1.133587277613664e-08, "loss": 3.7684, "step": 4094000 }, { "epoch": 45.47, "learning_rate": 1.132199098238678e-08, "loss": 3.7631, "step": 4094500 }, { "epoch": 45.48, "learning_rate": 1.1308109188636918e-08, "loss": 3.7457, "step": 4095000 }, { "epoch": 45.48, "learning_rate": 1.1294227394887057e-08, "loss": 3.7521, "step": 4095500 }, { "epoch": 45.49, "learning_rate": 1.1280345601137196e-08, "loss": 3.7518, "step": 4096000 }, { "epoch": 45.49, "learning_rate": 1.1266463807387334e-08, "loss": 3.7475, "step": 4096500 }, { "epoch": 45.5, "learning_rate": 1.1252582013637474e-08, "loss": 3.7414, "step": 4097000 }, { "epoch": 45.5, "learning_rate": 1.1238700219887612e-08, "loss": 3.7509, "step": 4097500 }, { "epoch": 45.51, "learning_rate": 1.122481842613775e-08, "loss": 3.7135, "step": 4098000 }, { "epoch": 45.52, "learning_rate": 1.1210936632387891e-08, "loss": 3.7353, "step": 4098500 }, { "epoch": 45.52, "learning_rate": 1.1197054838638028e-08, "loss": 3.7351, "step": 4099000 }, { "epoch": 45.53, "learning_rate": 1.1183173044888167e-08, "loss": 3.7446, "step": 4099500 }, { "epoch": 45.53, "learning_rate": 1.1169291251138306e-08, "loss": 3.7458, "step": 4100000 }, { "epoch": 45.54, "learning_rate": 1.1155409457388445e-08, "loss": 3.7647, "step": 4100500 }, { "epoch": 45.54, "learning_rate": 1.1141527663638584e-08, "loss": 3.7459, "step": 4101000 }, { "epoch": 45.55, "learning_rate": 1.1127645869888723e-08, "loss": 3.7482, "step": 4101500 }, { "epoch": 45.55, "learning_rate": 1.1113764076138862e-08, "loss": 3.7484, "step": 4102000 }, { "epoch": 45.56, "learning_rate": 1.1099882282389001e-08, "loss": 3.7419, "step": 4102500 }, { "epoch": 45.57, "learning_rate": 1.108600048863914e-08, "loss": 3.7481, "step": 4103000 }, { "epoch": 45.57, "learning_rate": 1.1072118694889277e-08, "loss": 3.7291, "step": 4103500 }, { "epoch": 45.58, "learning_rate": 1.1058236901139418e-08, "loss": 3.7579, "step": 4104000 }, { "epoch": 45.58, "learning_rate": 1.1044355107389557e-08, "loss": 3.754, "step": 4104500 }, { "epoch": 45.59, "learning_rate": 1.1030473313639694e-08, "loss": 3.7544, "step": 4105000 }, { "epoch": 45.59, "learning_rate": 1.1016591519889833e-08, "loss": 3.7459, "step": 4105500 }, { "epoch": 45.6, "learning_rate": 1.1002709726139972e-08, "loss": 3.7469, "step": 4106000 }, { "epoch": 45.6, "learning_rate": 1.0988827932390111e-08, "loss": 3.7461, "step": 4106500 }, { "epoch": 45.61, "learning_rate": 1.097494613864025e-08, "loss": 3.7401, "step": 4107000 }, { "epoch": 45.62, "learning_rate": 1.0961064344890389e-08, "loss": 3.756, "step": 4107500 }, { "epoch": 45.62, "learning_rate": 1.0947182551140528e-08, "loss": 3.7323, "step": 4108000 }, { "epoch": 45.63, "learning_rate": 1.0933300757390667e-08, "loss": 3.7526, "step": 4108500 }, { "epoch": 45.63, "learning_rate": 1.0919418963640806e-08, "loss": 3.7568, "step": 4109000 }, { "epoch": 45.64, "learning_rate": 1.0905537169890943e-08, "loss": 3.7228, "step": 4109500 }, { "epoch": 45.64, "learning_rate": 1.0891655376141084e-08, "loss": 3.7769, "step": 4110000 }, { "epoch": 45.65, "learning_rate": 1.0877773582391221e-08, "loss": 3.7644, "step": 4110500 }, { "epoch": 45.65, "learning_rate": 1.086389178864136e-08, "loss": 3.7292, "step": 4111000 }, { "epoch": 45.66, "learning_rate": 1.08500099948915e-08, "loss": 3.7508, "step": 4111500 }, { "epoch": 45.67, "learning_rate": 1.0836128201141638e-08, "loss": 3.7341, "step": 4112000 }, { "epoch": 45.67, "learning_rate": 1.0822246407391777e-08, "loss": 3.7353, "step": 4112500 }, { "epoch": 45.68, "learning_rate": 1.0808364613641916e-08, "loss": 3.7593, "step": 4113000 }, { "epoch": 45.68, "learning_rate": 1.0794482819892055e-08, "loss": 3.7551, "step": 4113500 }, { "epoch": 45.69, "learning_rate": 1.0780601026142192e-08, "loss": 3.7555, "step": 4114000 }, { "epoch": 45.69, "learning_rate": 1.0766719232392333e-08, "loss": 3.7646, "step": 4114500 }, { "epoch": 45.7, "learning_rate": 1.0752837438642472e-08, "loss": 3.7526, "step": 4115000 }, { "epoch": 45.7, "learning_rate": 1.0738955644892609e-08, "loss": 3.7397, "step": 4115500 }, { "epoch": 45.71, "learning_rate": 1.072507385114275e-08, "loss": 3.7402, "step": 4116000 }, { "epoch": 45.72, "learning_rate": 1.0711192057392887e-08, "loss": 3.7589, "step": 4116500 }, { "epoch": 45.72, "learning_rate": 1.0697310263643026e-08, "loss": 3.7486, "step": 4117000 }, { "epoch": 45.73, "learning_rate": 1.0683428469893165e-08, "loss": 3.7497, "step": 4117500 }, { "epoch": 45.73, "learning_rate": 1.0669546676143304e-08, "loss": 3.7544, "step": 4118000 }, { "epoch": 45.74, "learning_rate": 1.0655664882393443e-08, "loss": 3.7395, "step": 4118500 }, { "epoch": 45.74, "learning_rate": 1.0641783088643582e-08, "loss": 3.7352, "step": 4119000 }, { "epoch": 45.75, "learning_rate": 1.062790129489372e-08, "loss": 3.7522, "step": 4119500 }, { "epoch": 45.75, "learning_rate": 1.0614019501143858e-08, "loss": 3.7602, "step": 4120000 }, { "epoch": 45.76, "learning_rate": 1.0600137707393999e-08, "loss": 3.7724, "step": 4120500 }, { "epoch": 45.77, "learning_rate": 1.0586255913644138e-08, "loss": 3.7349, "step": 4121000 }, { "epoch": 45.77, "learning_rate": 1.0572374119894275e-08, "loss": 3.7577, "step": 4121500 }, { "epoch": 45.78, "learning_rate": 1.0558492326144415e-08, "loss": 3.7606, "step": 4122000 }, { "epoch": 45.78, "learning_rate": 1.0544610532394553e-08, "loss": 3.7307, "step": 4122500 }, { "epoch": 45.79, "learning_rate": 1.0530728738644692e-08, "loss": 3.7421, "step": 4123000 }, { "epoch": 45.79, "learning_rate": 1.051684694489483e-08, "loss": 3.7552, "step": 4123500 }, { "epoch": 45.8, "learning_rate": 1.050296515114497e-08, "loss": 3.7399, "step": 4124000 }, { "epoch": 45.8, "learning_rate": 1.0489083357395109e-08, "loss": 3.7693, "step": 4124500 }, { "epoch": 45.81, "learning_rate": 1.0475201563645248e-08, "loss": 3.7475, "step": 4125000 }, { "epoch": 45.82, "learning_rate": 1.0461319769895386e-08, "loss": 3.7391, "step": 4125500 }, { "epoch": 45.82, "learning_rate": 1.0447437976145525e-08, "loss": 3.7574, "step": 4126000 }, { "epoch": 45.83, "learning_rate": 1.0433556182395664e-08, "loss": 3.7356, "step": 4126500 }, { "epoch": 45.83, "learning_rate": 1.0419674388645802e-08, "loss": 3.7367, "step": 4127000 }, { "epoch": 45.84, "learning_rate": 1.0405792594895942e-08, "loss": 3.7585, "step": 4127500 }, { "epoch": 45.84, "learning_rate": 1.0391910801146081e-08, "loss": 3.7518, "step": 4128000 }, { "epoch": 45.85, "learning_rate": 1.0378029007396219e-08, "loss": 3.7324, "step": 4128500 }, { "epoch": 45.85, "learning_rate": 1.036414721364636e-08, "loss": 3.7376, "step": 4129000 }, { "epoch": 45.86, "learning_rate": 1.0350265419896496e-08, "loss": 3.7528, "step": 4129500 }, { "epoch": 45.87, "learning_rate": 1.0336383626146635e-08, "loss": 3.7487, "step": 4130000 }, { "epoch": 45.87, "learning_rate": 1.0322501832396774e-08, "loss": 3.7465, "step": 4130500 }, { "epoch": 45.88, "learning_rate": 1.0308620038646913e-08, "loss": 3.7546, "step": 4131000 }, { "epoch": 45.88, "learning_rate": 1.0294738244897052e-08, "loss": 3.7473, "step": 4131500 }, { "epoch": 45.89, "learning_rate": 1.0280856451147191e-08, "loss": 3.7433, "step": 4132000 }, { "epoch": 45.89, "learning_rate": 1.026697465739733e-08, "loss": 3.7562, "step": 4132500 }, { "epoch": 45.9, "learning_rate": 1.0253092863647468e-08, "loss": 3.7568, "step": 4133000 }, { "epoch": 45.9, "learning_rate": 1.0239211069897608e-08, "loss": 3.7314, "step": 4133500 }, { "epoch": 45.91, "learning_rate": 1.0225329276147747e-08, "loss": 3.7356, "step": 4134000 }, { "epoch": 45.92, "learning_rate": 1.0211447482397884e-08, "loss": 3.7454, "step": 4134500 }, { "epoch": 45.92, "learning_rate": 1.0197565688648025e-08, "loss": 3.7406, "step": 4135000 }, { "epoch": 45.93, "learning_rate": 1.0183683894898162e-08, "loss": 3.7525, "step": 4135500 }, { "epoch": 45.93, "learning_rate": 1.0169802101148301e-08, "loss": 3.7258, "step": 4136000 }, { "epoch": 45.94, "learning_rate": 1.015592030739844e-08, "loss": 3.7477, "step": 4136500 }, { "epoch": 45.94, "learning_rate": 1.0142038513648579e-08, "loss": 3.7516, "step": 4137000 }, { "epoch": 45.95, "learning_rate": 1.0128156719898718e-08, "loss": 3.7278, "step": 4137500 }, { "epoch": 45.95, "learning_rate": 1.0114274926148857e-08, "loss": 3.7652, "step": 4138000 }, { "epoch": 45.96, "learning_rate": 1.0100393132398996e-08, "loss": 3.719, "step": 4138500 }, { "epoch": 45.97, "learning_rate": 1.0086511338649133e-08, "loss": 3.7351, "step": 4139000 }, { "epoch": 45.97, "learning_rate": 1.0072629544899274e-08, "loss": 3.7496, "step": 4139500 }, { "epoch": 45.98, "learning_rate": 1.0058747751149411e-08, "loss": 3.7471, "step": 4140000 }, { "epoch": 45.98, "learning_rate": 1.004486595739955e-08, "loss": 3.7638, "step": 4140500 }, { "epoch": 45.99, "learning_rate": 1.003098416364969e-08, "loss": 3.7404, "step": 4141000 }, { "epoch": 45.99, "learning_rate": 1.0017102369899828e-08, "loss": 3.7646, "step": 4141500 }, { "epoch": 46.0, "learning_rate": 1.0003220576149967e-08, "loss": 3.7462, "step": 4142000 }, { "epoch": 46.0, "eval_loss": 3.824589490890503, "eval_runtime": 6.307, "eval_samples_per_second": 246.393, "step": 4142116 }, { "epoch": 46.0, "learning_rate": 9.989338782400106e-09, "loss": 3.7482, "step": 4142500 }, { "epoch": 46.01, "learning_rate": 9.975456988650245e-09, "loss": 3.7465, "step": 4143000 }, { "epoch": 46.02, "learning_rate": 9.961575194900384e-09, "loss": 3.7503, "step": 4143500 }, { "epoch": 46.02, "learning_rate": 9.947693401150523e-09, "loss": 3.7474, "step": 4144000 }, { "epoch": 46.03, "learning_rate": 9.933811607400662e-09, "loss": 3.7468, "step": 4144500 }, { "epoch": 46.03, "learning_rate": 9.9199298136508e-09, "loss": 3.7563, "step": 4145000 }, { "epoch": 46.04, "learning_rate": 9.90604801990094e-09, "loss": 3.7407, "step": 4145500 }, { "epoch": 46.04, "learning_rate": 9.892166226151077e-09, "loss": 3.7697, "step": 4146000 }, { "epoch": 46.05, "learning_rate": 9.878284432401218e-09, "loss": 3.7523, "step": 4146500 }, { "epoch": 46.05, "learning_rate": 9.864402638651355e-09, "loss": 3.7531, "step": 4147000 }, { "epoch": 46.06, "learning_rate": 9.850520844901494e-09, "loss": 3.7775, "step": 4147500 }, { "epoch": 46.07, "learning_rate": 9.836639051151633e-09, "loss": 3.7518, "step": 4148000 }, { "epoch": 46.07, "learning_rate": 9.822757257401772e-09, "loss": 3.7412, "step": 4148500 }, { "epoch": 46.08, "learning_rate": 9.80887546365191e-09, "loss": 3.7648, "step": 4149000 }, { "epoch": 46.08, "learning_rate": 9.79499366990205e-09, "loss": 3.7634, "step": 4149500 }, { "epoch": 46.09, "learning_rate": 9.781111876152189e-09, "loss": 3.755, "step": 4150000 }, { "epoch": 46.09, "learning_rate": 9.767230082402328e-09, "loss": 3.7599, "step": 4150500 }, { "epoch": 46.1, "learning_rate": 9.753348288652467e-09, "loss": 3.7446, "step": 4151000 }, { "epoch": 46.1, "learning_rate": 9.739466494902606e-09, "loss": 3.7538, "step": 4151500 }, { "epoch": 46.11, "learning_rate": 9.725584701152743e-09, "loss": 3.7337, "step": 4152000 }, { "epoch": 46.12, "learning_rate": 9.711702907402883e-09, "loss": 3.7634, "step": 4152500 }, { "epoch": 46.12, "learning_rate": 9.69782111365302e-09, "loss": 3.7337, "step": 4153000 }, { "epoch": 46.13, "learning_rate": 9.68393931990316e-09, "loss": 3.7546, "step": 4153500 }, { "epoch": 46.13, "learning_rate": 9.6700575261533e-09, "loss": 3.7528, "step": 4154000 }, { "epoch": 46.14, "learning_rate": 9.656175732403438e-09, "loss": 3.7457, "step": 4154500 }, { "epoch": 46.14, "learning_rate": 9.642293938653577e-09, "loss": 3.7325, "step": 4155000 }, { "epoch": 46.15, "learning_rate": 9.628412144903716e-09, "loss": 3.7416, "step": 4155500 }, { "epoch": 46.15, "learning_rate": 9.614530351153854e-09, "loss": 3.7404, "step": 4156000 }, { "epoch": 46.16, "learning_rate": 9.600648557403992e-09, "loss": 3.7493, "step": 4156500 }, { "epoch": 46.17, "learning_rate": 9.586766763654132e-09, "loss": 3.7475, "step": 4157000 }, { "epoch": 46.17, "learning_rate": 9.572884969904271e-09, "loss": 3.7655, "step": 4157500 }, { "epoch": 46.18, "learning_rate": 9.559003176154409e-09, "loss": 3.7527, "step": 4158000 }, { "epoch": 46.18, "learning_rate": 9.54512138240455e-09, "loss": 3.7587, "step": 4158500 }, { "epoch": 46.19, "learning_rate": 9.531239588654687e-09, "loss": 3.7539, "step": 4159000 }, { "epoch": 46.19, "learning_rate": 9.517357794904826e-09, "loss": 3.7548, "step": 4159500 }, { "epoch": 46.2, "learning_rate": 9.503476001154964e-09, "loss": 3.7661, "step": 4160000 }, { "epoch": 46.2, "learning_rate": 9.489594207405103e-09, "loss": 3.7509, "step": 4160500 }, { "epoch": 46.21, "learning_rate": 9.475712413655242e-09, "loss": 3.7366, "step": 4161000 }, { "epoch": 46.22, "learning_rate": 9.461830619905381e-09, "loss": 3.7553, "step": 4161500 }, { "epoch": 46.22, "learning_rate": 9.44794882615552e-09, "loss": 3.7445, "step": 4162000 }, { "epoch": 46.23, "learning_rate": 9.434067032405658e-09, "loss": 3.7339, "step": 4162500 }, { "epoch": 46.23, "learning_rate": 9.420185238655798e-09, "loss": 3.765, "step": 4163000 }, { "epoch": 46.24, "learning_rate": 9.406303444905936e-09, "loss": 3.7396, "step": 4163500 }, { "epoch": 46.24, "learning_rate": 9.392421651156074e-09, "loss": 3.7764, "step": 4164000 }, { "epoch": 46.25, "learning_rate": 9.378539857406215e-09, "loss": 3.7443, "step": 4164500 }, { "epoch": 46.25, "learning_rate": 9.364658063656352e-09, "loss": 3.7498, "step": 4165000 }, { "epoch": 46.26, "learning_rate": 9.350776269906491e-09, "loss": 3.7494, "step": 4165500 }, { "epoch": 46.27, "learning_rate": 9.33689447615663e-09, "loss": 3.749, "step": 4166000 }, { "epoch": 46.27, "learning_rate": 9.32301268240677e-09, "loss": 3.7586, "step": 4166500 }, { "epoch": 46.28, "learning_rate": 9.309130888656908e-09, "loss": 3.7361, "step": 4167000 }, { "epoch": 46.28, "learning_rate": 9.295249094907047e-09, "loss": 3.7269, "step": 4167500 }, { "epoch": 46.29, "learning_rate": 9.281367301157186e-09, "loss": 3.749, "step": 4168000 }, { "epoch": 46.29, "learning_rate": 9.267485507407325e-09, "loss": 3.7509, "step": 4168500 }, { "epoch": 46.3, "learning_rate": 9.253603713657464e-09, "loss": 3.7378, "step": 4169000 }, { "epoch": 46.3, "learning_rate": 9.239721919907601e-09, "loss": 3.7373, "step": 4169500 }, { "epoch": 46.31, "learning_rate": 9.225840126157742e-09, "loss": 3.748, "step": 4170000 }, { "epoch": 46.32, "learning_rate": 9.211958332407881e-09, "loss": 3.7466, "step": 4170500 }, { "epoch": 46.32, "learning_rate": 9.198076538658018e-09, "loss": 3.7457, "step": 4171000 }, { "epoch": 46.33, "learning_rate": 9.184194744908159e-09, "loss": 3.7494, "step": 4171500 }, { "epoch": 46.33, "learning_rate": 9.170312951158296e-09, "loss": 3.7453, "step": 4172000 }, { "epoch": 46.34, "learning_rate": 9.156431157408435e-09, "loss": 3.7418, "step": 4172500 }, { "epoch": 46.34, "learning_rate": 9.142549363658574e-09, "loss": 3.7366, "step": 4173000 }, { "epoch": 46.35, "learning_rate": 9.128667569908713e-09, "loss": 3.7323, "step": 4173500 }, { "epoch": 46.35, "learning_rate": 9.114785776158852e-09, "loss": 3.7559, "step": 4174000 }, { "epoch": 46.36, "learning_rate": 9.100903982408991e-09, "loss": 3.7543, "step": 4174500 }, { "epoch": 46.37, "learning_rate": 9.08702218865913e-09, "loss": 3.7335, "step": 4175000 }, { "epoch": 46.37, "learning_rate": 9.073140394909267e-09, "loss": 3.7483, "step": 4175500 }, { "epoch": 46.38, "learning_rate": 9.059258601159408e-09, "loss": 3.7488, "step": 4176000 }, { "epoch": 46.38, "learning_rate": 9.045376807409545e-09, "loss": 3.7475, "step": 4176500 }, { "epoch": 46.39, "learning_rate": 9.031495013659684e-09, "loss": 3.7447, "step": 4177000 }, { "epoch": 46.39, "learning_rate": 9.017613219909825e-09, "loss": 3.7255, "step": 4177500 }, { "epoch": 46.4, "learning_rate": 9.003731426159962e-09, "loss": 3.7488, "step": 4178000 }, { "epoch": 46.4, "learning_rate": 8.989849632410101e-09, "loss": 3.7595, "step": 4178500 }, { "epoch": 46.41, "learning_rate": 8.97596783866024e-09, "loss": 3.7672, "step": 4179000 }, { "epoch": 46.42, "learning_rate": 8.962086044910379e-09, "loss": 3.7499, "step": 4179500 }, { "epoch": 46.42, "learning_rate": 8.948204251160516e-09, "loss": 3.7323, "step": 4180000 }, { "epoch": 46.43, "learning_rate": 8.934322457410657e-09, "loss": 3.7302, "step": 4180500 }, { "epoch": 46.43, "learning_rate": 8.920440663660796e-09, "loss": 3.7605, "step": 4181000 }, { "epoch": 46.44, "learning_rate": 8.906558869910933e-09, "loss": 3.7395, "step": 4181500 }, { "epoch": 46.44, "learning_rate": 8.892677076161074e-09, "loss": 3.7641, "step": 4182000 }, { "epoch": 46.45, "learning_rate": 8.878795282411211e-09, "loss": 3.7529, "step": 4182500 }, { "epoch": 46.45, "learning_rate": 8.86491348866135e-09, "loss": 3.7306, "step": 4183000 }, { "epoch": 46.46, "learning_rate": 8.85103169491149e-09, "loss": 3.7585, "step": 4183500 }, { "epoch": 46.47, "learning_rate": 8.837149901161628e-09, "loss": 3.7317, "step": 4184000 }, { "epoch": 46.47, "learning_rate": 8.823268107411767e-09, "loss": 3.7636, "step": 4184500 }, { "epoch": 46.48, "learning_rate": 8.809386313661906e-09, "loss": 3.7619, "step": 4185000 }, { "epoch": 46.48, "learning_rate": 8.795504519912045e-09, "loss": 3.7452, "step": 4185500 }, { "epoch": 46.49, "learning_rate": 8.781622726162184e-09, "loss": 3.7616, "step": 4186000 }, { "epoch": 46.49, "learning_rate": 8.767740932412323e-09, "loss": 3.7291, "step": 4186500 }, { "epoch": 46.5, "learning_rate": 8.753859138662461e-09, "loss": 3.74, "step": 4187000 }, { "epoch": 46.5, "learning_rate": 8.7399773449126e-09, "loss": 3.7648, "step": 4187500 }, { "epoch": 46.51, "learning_rate": 8.72609555116274e-09, "loss": 3.7428, "step": 4188000 }, { "epoch": 46.52, "learning_rate": 8.712213757412877e-09, "loss": 3.7448, "step": 4188500 }, { "epoch": 46.52, "learning_rate": 8.698331963663017e-09, "loss": 3.7228, "step": 4189000 }, { "epoch": 46.53, "learning_rate": 8.684450169913155e-09, "loss": 3.7527, "step": 4189500 }, { "epoch": 46.53, "learning_rate": 8.670568376163294e-09, "loss": 3.7418, "step": 4190000 }, { "epoch": 46.54, "learning_rate": 8.656686582413432e-09, "loss": 3.745, "step": 4190500 }, { "epoch": 46.54, "learning_rate": 8.642804788663571e-09, "loss": 3.7309, "step": 4191000 }, { "epoch": 46.55, "learning_rate": 8.62892299491371e-09, "loss": 3.7464, "step": 4191500 }, { "epoch": 46.55, "learning_rate": 8.61504120116385e-09, "loss": 3.7476, "step": 4192000 }, { "epoch": 46.56, "learning_rate": 8.601159407413988e-09, "loss": 3.737, "step": 4192500 }, { "epoch": 46.57, "learning_rate": 8.587277613664126e-09, "loss": 3.7437, "step": 4193000 }, { "epoch": 46.57, "learning_rate": 8.573395819914266e-09, "loss": 3.7578, "step": 4193500 }, { "epoch": 46.58, "learning_rate": 8.559514026164405e-09, "loss": 3.7389, "step": 4194000 }, { "epoch": 46.58, "learning_rate": 8.545632232414542e-09, "loss": 3.7572, "step": 4194500 }, { "epoch": 46.59, "learning_rate": 8.531750438664683e-09, "loss": 3.7411, "step": 4195000 }, { "epoch": 46.59, "learning_rate": 8.51786864491482e-09, "loss": 3.754, "step": 4195500 }, { "epoch": 46.6, "learning_rate": 8.50398685116496e-09, "loss": 3.746, "step": 4196000 }, { "epoch": 46.6, "learning_rate": 8.4901050574151e-09, "loss": 3.7338, "step": 4196500 }, { "epoch": 46.61, "learning_rate": 8.476223263665237e-09, "loss": 3.776, "step": 4197000 }, { "epoch": 46.62, "learning_rate": 8.462341469915376e-09, "loss": 3.7661, "step": 4197500 }, { "epoch": 46.62, "learning_rate": 8.448459676165515e-09, "loss": 3.7635, "step": 4198000 }, { "epoch": 46.63, "learning_rate": 8.434577882415654e-09, "loss": 3.7478, "step": 4198500 }, { "epoch": 46.63, "learning_rate": 8.420696088665791e-09, "loss": 3.7508, "step": 4199000 }, { "epoch": 46.64, "learning_rate": 8.406814294915932e-09, "loss": 3.7457, "step": 4199500 }, { "epoch": 46.64, "learning_rate": 8.392932501166071e-09, "loss": 3.7412, "step": 4200000 }, { "epoch": 46.65, "learning_rate": 8.379050707416208e-09, "loss": 3.7442, "step": 4200500 }, { "epoch": 46.65, "learning_rate": 8.365168913666349e-09, "loss": 3.7372, "step": 4201000 }, { "epoch": 46.66, "learning_rate": 8.351287119916486e-09, "loss": 3.7292, "step": 4201500 }, { "epoch": 46.67, "learning_rate": 8.337405326166625e-09, "loss": 3.751, "step": 4202000 }, { "epoch": 46.67, "learning_rate": 8.323523532416764e-09, "loss": 3.7346, "step": 4202500 }, { "epoch": 46.68, "learning_rate": 8.309641738666903e-09, "loss": 3.7454, "step": 4203000 }, { "epoch": 46.68, "learning_rate": 8.295759944917042e-09, "loss": 3.745, "step": 4203500 }, { "epoch": 46.69, "learning_rate": 8.281878151167181e-09, "loss": 3.7355, "step": 4204000 }, { "epoch": 46.69, "learning_rate": 8.26799635741732e-09, "loss": 3.738, "step": 4204500 }, { "epoch": 46.7, "learning_rate": 8.254114563667459e-09, "loss": 3.7401, "step": 4205000 }, { "epoch": 46.7, "learning_rate": 8.240232769917598e-09, "loss": 3.7503, "step": 4205500 }, { "epoch": 46.71, "learning_rate": 8.226350976167735e-09, "loss": 3.7402, "step": 4206000 }, { "epoch": 46.72, "learning_rate": 8.212469182417874e-09, "loss": 3.7453, "step": 4206500 }, { "epoch": 46.72, "learning_rate": 8.198587388668015e-09, "loss": 3.7628, "step": 4207000 }, { "epoch": 46.73, "learning_rate": 8.184705594918152e-09, "loss": 3.7436, "step": 4207500 }, { "epoch": 46.73, "learning_rate": 8.170823801168291e-09, "loss": 3.7496, "step": 4208000 }, { "epoch": 46.74, "learning_rate": 8.15694200741843e-09, "loss": 3.7505, "step": 4208500 }, { "epoch": 46.74, "learning_rate": 8.143060213668569e-09, "loss": 3.7419, "step": 4209000 }, { "epoch": 46.75, "learning_rate": 8.129178419918708e-09, "loss": 3.7672, "step": 4209500 }, { "epoch": 46.75, "learning_rate": 8.115296626168847e-09, "loss": 3.7119, "step": 4210000 }, { "epoch": 46.76, "learning_rate": 8.101414832418986e-09, "loss": 3.7372, "step": 4210500 }, { "epoch": 46.76, "learning_rate": 8.087533038669125e-09, "loss": 3.7472, "step": 4211000 }, { "epoch": 46.77, "learning_rate": 8.073651244919264e-09, "loss": 3.7616, "step": 4211500 }, { "epoch": 46.78, "learning_rate": 8.059769451169401e-09, "loss": 3.7497, "step": 4212000 }, { "epoch": 46.78, "learning_rate": 8.045887657419542e-09, "loss": 3.7366, "step": 4212500 }, { "epoch": 46.79, "learning_rate": 8.03200586366968e-09, "loss": 3.766, "step": 4213000 }, { "epoch": 46.79, "learning_rate": 8.018124069919818e-09, "loss": 3.7244, "step": 4213500 }, { "epoch": 46.8, "learning_rate": 8.004242276169958e-09, "loss": 3.7508, "step": 4214000 }, { "epoch": 46.8, "learning_rate": 7.990360482420096e-09, "loss": 3.74, "step": 4214500 }, { "epoch": 46.81, "learning_rate": 7.976478688670235e-09, "loss": 3.7578, "step": 4215000 }, { "epoch": 46.81, "learning_rate": 7.962596894920374e-09, "loss": 3.7258, "step": 4215500 }, { "epoch": 46.82, "learning_rate": 7.948715101170513e-09, "loss": 3.7509, "step": 4216000 }, { "epoch": 46.83, "learning_rate": 7.934833307420652e-09, "loss": 3.7406, "step": 4216500 }, { "epoch": 46.83, "learning_rate": 7.92095151367079e-09, "loss": 3.7494, "step": 4217000 }, { "epoch": 46.84, "learning_rate": 7.90706971992093e-09, "loss": 3.7761, "step": 4217500 }, { "epoch": 46.84, "learning_rate": 7.893187926171067e-09, "loss": 3.7286, "step": 4218000 }, { "epoch": 46.85, "learning_rate": 7.879306132421207e-09, "loss": 3.7617, "step": 4218500 }, { "epoch": 46.85, "learning_rate": 7.865424338671345e-09, "loss": 3.754, "step": 4219000 }, { "epoch": 46.86, "learning_rate": 7.851542544921484e-09, "loss": 3.7426, "step": 4219500 }, { "epoch": 46.86, "learning_rate": 7.837660751171624e-09, "loss": 3.7342, "step": 4220000 }, { "epoch": 46.87, "learning_rate": 7.823778957421762e-09, "loss": 3.7505, "step": 4220500 }, { "epoch": 46.88, "learning_rate": 7.8098971636719e-09, "loss": 3.7531, "step": 4221000 }, { "epoch": 46.88, "learning_rate": 7.79601536992204e-09, "loss": 3.7449, "step": 4221500 }, { "epoch": 46.89, "learning_rate": 7.782133576172178e-09, "loss": 3.7328, "step": 4222000 }, { "epoch": 46.89, "learning_rate": 7.768251782422316e-09, "loss": 3.7622, "step": 4222500 }, { "epoch": 46.9, "learning_rate": 7.754369988672456e-09, "loss": 3.7446, "step": 4223000 }, { "epoch": 46.9, "learning_rate": 7.740488194922595e-09, "loss": 3.7417, "step": 4223500 }, { "epoch": 46.91, "learning_rate": 7.726606401172733e-09, "loss": 3.741, "step": 4224000 }, { "epoch": 46.91, "learning_rate": 7.712724607422873e-09, "loss": 3.7444, "step": 4224500 }, { "epoch": 46.92, "learning_rate": 7.69884281367301e-09, "loss": 3.7523, "step": 4225000 }, { "epoch": 46.93, "learning_rate": 7.68496101992315e-09, "loss": 3.7361, "step": 4225500 }, { "epoch": 46.93, "learning_rate": 7.671079226173288e-09, "loss": 3.7277, "step": 4226000 }, { "epoch": 46.94, "learning_rate": 7.657197432423427e-09, "loss": 3.7439, "step": 4226500 }, { "epoch": 46.94, "learning_rate": 7.643315638673566e-09, "loss": 3.7499, "step": 4227000 }, { "epoch": 46.95, "learning_rate": 7.629433844923705e-09, "loss": 3.7438, "step": 4227500 }, { "epoch": 46.95, "learning_rate": 7.615552051173844e-09, "loss": 3.7331, "step": 4228000 }, { "epoch": 46.96, "learning_rate": 7.601670257423983e-09, "loss": 3.7447, "step": 4228500 }, { "epoch": 46.96, "learning_rate": 7.587788463674122e-09, "loss": 3.7508, "step": 4229000 }, { "epoch": 46.97, "learning_rate": 7.573906669924261e-09, "loss": 3.7655, "step": 4229500 }, { "epoch": 46.98, "learning_rate": 7.5600248761744e-09, "loss": 3.7447, "step": 4230000 }, { "epoch": 46.98, "learning_rate": 7.546143082424539e-09, "loss": 3.7397, "step": 4230500 }, { "epoch": 46.99, "learning_rate": 7.532261288674676e-09, "loss": 3.7532, "step": 4231000 }, { "epoch": 46.99, "learning_rate": 7.518379494924817e-09, "loss": 3.7338, "step": 4231500 }, { "epoch": 47.0, "learning_rate": 7.504497701174954e-09, "loss": 3.7446, "step": 4232000 }, { "epoch": 47.0, "eval_loss": 3.8242743015289307, "eval_runtime": 6.3073, "eval_samples_per_second": 246.381, "step": 4232162 }, { "epoch": 47.0, "learning_rate": 7.490615907425093e-09, "loss": 3.7579, "step": 4232500 }, { "epoch": 47.01, "learning_rate": 7.476734113675232e-09, "loss": 3.7574, "step": 4233000 }, { "epoch": 47.01, "learning_rate": 7.462852319925371e-09, "loss": 3.7213, "step": 4233500 }, { "epoch": 47.02, "learning_rate": 7.44897052617551e-09, "loss": 3.7384, "step": 4234000 }, { "epoch": 47.03, "learning_rate": 7.435088732425648e-09, "loss": 3.7482, "step": 4234500 }, { "epoch": 47.03, "learning_rate": 7.421206938675788e-09, "loss": 3.7463, "step": 4235000 }, { "epoch": 47.04, "learning_rate": 7.407325144925926e-09, "loss": 3.7536, "step": 4235500 }, { "epoch": 47.04, "learning_rate": 7.393443351176065e-09, "loss": 3.7523, "step": 4236000 }, { "epoch": 47.05, "learning_rate": 7.379561557426205e-09, "loss": 3.752, "step": 4236500 }, { "epoch": 47.05, "learning_rate": 7.365679763676343e-09, "loss": 3.7335, "step": 4237000 }, { "epoch": 47.06, "learning_rate": 7.351797969926482e-09, "loss": 3.7644, "step": 4237500 }, { "epoch": 47.06, "learning_rate": 7.33791617617662e-09, "loss": 3.7377, "step": 4238000 }, { "epoch": 47.07, "learning_rate": 7.32403438242676e-09, "loss": 3.7583, "step": 4238500 }, { "epoch": 47.08, "learning_rate": 7.310152588676897e-09, "loss": 3.748, "step": 4239000 }, { "epoch": 47.08, "learning_rate": 7.296270794927037e-09, "loss": 3.7442, "step": 4239500 }, { "epoch": 47.09, "learning_rate": 7.282389001177177e-09, "loss": 3.7548, "step": 4240000 }, { "epoch": 47.09, "learning_rate": 7.268507207427314e-09, "loss": 3.7585, "step": 4240500 }, { "epoch": 47.1, "learning_rate": 7.254625413677454e-09, "loss": 3.7421, "step": 4241000 }, { "epoch": 47.1, "learning_rate": 7.240743619927592e-09, "loss": 3.753, "step": 4241500 }, { "epoch": 47.11, "learning_rate": 7.226861826177731e-09, "loss": 3.7334, "step": 4242000 }, { "epoch": 47.11, "learning_rate": 7.212980032427871e-09, "loss": 3.7381, "step": 4242500 }, { "epoch": 47.12, "learning_rate": 7.199098238678009e-09, "loss": 3.7534, "step": 4243000 }, { "epoch": 47.13, "learning_rate": 7.185216444928148e-09, "loss": 3.7541, "step": 4243500 }, { "epoch": 47.13, "learning_rate": 7.171334651178286e-09, "loss": 3.7599, "step": 4244000 }, { "epoch": 47.14, "learning_rate": 7.157452857428426e-09, "loss": 3.757, "step": 4244500 }, { "epoch": 47.14, "learning_rate": 7.143571063678564e-09, "loss": 3.7498, "step": 4245000 }, { "epoch": 47.15, "learning_rate": 7.129689269928703e-09, "loss": 3.7329, "step": 4245500 }, { "epoch": 47.15, "learning_rate": 7.1158074761788425e-09, "loss": 3.7626, "step": 4246000 }, { "epoch": 47.16, "learning_rate": 7.101925682428981e-09, "loss": 3.7494, "step": 4246500 }, { "epoch": 47.16, "learning_rate": 7.0880438886791196e-09, "loss": 3.7384, "step": 4247000 }, { "epoch": 47.17, "learning_rate": 7.074162094929258e-09, "loss": 3.7444, "step": 4247500 }, { "epoch": 47.18, "learning_rate": 7.0602803011793975e-09, "loss": 3.7411, "step": 4248000 }, { "epoch": 47.18, "learning_rate": 7.046398507429535e-09, "loss": 3.754, "step": 4248500 }, { "epoch": 47.19, "learning_rate": 7.0325167136796745e-09, "loss": 3.7453, "step": 4249000 }, { "epoch": 47.19, "learning_rate": 7.018634919929814e-09, "loss": 3.7349, "step": 4249500 }, { "epoch": 47.2, "learning_rate": 7.004753126179952e-09, "loss": 3.759, "step": 4250000 }, { "epoch": 47.2, "learning_rate": 6.9908713324300914e-09, "loss": 3.7364, "step": 4250500 }, { "epoch": 47.21, "learning_rate": 6.9769895386802295e-09, "loss": 3.7578, "step": 4251000 }, { "epoch": 47.21, "learning_rate": 6.9631077449303685e-09, "loss": 3.7372, "step": 4251500 }, { "epoch": 47.22, "learning_rate": 6.949225951180507e-09, "loss": 3.7757, "step": 4252000 }, { "epoch": 47.23, "learning_rate": 6.935344157430646e-09, "loss": 3.7667, "step": 4252500 }, { "epoch": 47.23, "learning_rate": 6.921462363680785e-09, "loss": 3.7323, "step": 4253000 }, { "epoch": 47.24, "learning_rate": 6.9075805699309235e-09, "loss": 3.7586, "step": 4253500 }, { "epoch": 47.24, "learning_rate": 6.893698776181063e-09, "loss": 3.7294, "step": 4254000 }, { "epoch": 47.25, "learning_rate": 6.879816982431201e-09, "loss": 3.7665, "step": 4254500 }, { "epoch": 47.25, "learning_rate": 6.86593518868134e-09, "loss": 3.7453, "step": 4255000 }, { "epoch": 47.26, "learning_rate": 6.8520533949314785e-09, "loss": 3.7365, "step": 4255500 }, { "epoch": 47.26, "learning_rate": 6.838171601181618e-09, "loss": 3.7523, "step": 4256000 }, { "epoch": 47.27, "learning_rate": 6.824289807431757e-09, "loss": 3.74, "step": 4256500 }, { "epoch": 47.28, "learning_rate": 6.810408013681895e-09, "loss": 3.7408, "step": 4257000 }, { "epoch": 47.28, "learning_rate": 6.796526219932035e-09, "loss": 3.7457, "step": 4257500 }, { "epoch": 47.29, "learning_rate": 6.7826444261821724e-09, "loss": 3.7513, "step": 4258000 }, { "epoch": 47.29, "learning_rate": 6.768762632432312e-09, "loss": 3.7356, "step": 4258500 }, { "epoch": 47.3, "learning_rate": 6.754880838682451e-09, "loss": 3.7465, "step": 4259000 }, { "epoch": 47.3, "learning_rate": 6.740999044932589e-09, "loss": 3.7641, "step": 4259500 }, { "epoch": 47.31, "learning_rate": 6.727117251182729e-09, "loss": 3.7496, "step": 4260000 }, { "epoch": 47.31, "learning_rate": 6.713235457432867e-09, "loss": 3.7393, "step": 4260500 }, { "epoch": 47.32, "learning_rate": 6.699353663683006e-09, "loss": 3.7531, "step": 4261000 }, { "epoch": 47.33, "learning_rate": 6.685471869933144e-09, "loss": 3.7458, "step": 4261500 }, { "epoch": 47.33, "learning_rate": 6.671590076183284e-09, "loss": 3.7333, "step": 4262000 }, { "epoch": 47.34, "learning_rate": 6.657708282433423e-09, "loss": 3.7629, "step": 4262500 }, { "epoch": 47.34, "learning_rate": 6.643826488683561e-09, "loss": 3.7322, "step": 4263000 }, { "epoch": 47.35, "learning_rate": 6.629944694933701e-09, "loss": 3.7509, "step": 4263500 }, { "epoch": 47.35, "learning_rate": 6.616062901183839e-09, "loss": 3.7388, "step": 4264000 }, { "epoch": 47.36, "learning_rate": 6.602181107433978e-09, "loss": 3.7486, "step": 4264500 }, { "epoch": 47.36, "learning_rate": 6.588299313684116e-09, "loss": 3.7601, "step": 4265000 }, { "epoch": 47.37, "learning_rate": 6.574417519934256e-09, "loss": 3.7358, "step": 4265500 }, { "epoch": 47.38, "learning_rate": 6.560535726184395e-09, "loss": 3.7471, "step": 4266000 }, { "epoch": 47.38, "learning_rate": 6.546653932434533e-09, "loss": 3.762, "step": 4266500 }, { "epoch": 47.39, "learning_rate": 6.532772138684672e-09, "loss": 3.7427, "step": 4267000 }, { "epoch": 47.39, "learning_rate": 6.51889034493481e-09, "loss": 3.748, "step": 4267500 }, { "epoch": 47.4, "learning_rate": 6.50500855118495e-09, "loss": 3.7439, "step": 4268000 }, { "epoch": 47.4, "learning_rate": 6.491126757435088e-09, "loss": 3.7493, "step": 4268500 }, { "epoch": 47.41, "learning_rate": 6.477244963685227e-09, "loss": 3.7257, "step": 4269000 }, { "epoch": 47.41, "learning_rate": 6.463363169935367e-09, "loss": 3.7338, "step": 4269500 }, { "epoch": 47.42, "learning_rate": 6.449481376185505e-09, "loss": 3.7524, "step": 4270000 }, { "epoch": 47.43, "learning_rate": 6.435599582435644e-09, "loss": 3.7315, "step": 4270500 }, { "epoch": 47.43, "learning_rate": 6.421717788685782e-09, "loss": 3.7389, "step": 4271000 }, { "epoch": 47.44, "learning_rate": 6.407835994935922e-09, "loss": 3.7326, "step": 4271500 }, { "epoch": 47.44, "learning_rate": 6.39395420118606e-09, "loss": 3.7458, "step": 4272000 }, { "epoch": 47.45, "learning_rate": 6.380072407436199e-09, "loss": 3.7548, "step": 4272500 }, { "epoch": 47.45, "learning_rate": 6.366190613686339e-09, "loss": 3.7648, "step": 4273000 }, { "epoch": 47.46, "learning_rate": 6.352308819936476e-09, "loss": 3.7574, "step": 4273500 }, { "epoch": 47.46, "learning_rate": 6.338427026186616e-09, "loss": 3.7652, "step": 4274000 }, { "epoch": 47.47, "learning_rate": 6.324545232436754e-09, "loss": 3.7503, "step": 4274500 }, { "epoch": 47.48, "learning_rate": 6.310663438686893e-09, "loss": 3.748, "step": 4275000 }, { "epoch": 47.48, "learning_rate": 6.2967816449370326e-09, "loss": 3.7334, "step": 4275500 }, { "epoch": 47.49, "learning_rate": 6.282899851187171e-09, "loss": 3.7136, "step": 4276000 }, { "epoch": 47.49, "learning_rate": 6.26901805743731e-09, "loss": 3.7517, "step": 4276500 }, { "epoch": 47.5, "learning_rate": 6.255136263687448e-09, "loss": 3.7631, "step": 4277000 }, { "epoch": 47.5, "learning_rate": 6.2412544699375876e-09, "loss": 3.7392, "step": 4277500 }, { "epoch": 47.51, "learning_rate": 6.2273726761877265e-09, "loss": 3.7512, "step": 4278000 }, { "epoch": 47.51, "learning_rate": 6.213490882437865e-09, "loss": 3.7409, "step": 4278500 }, { "epoch": 47.52, "learning_rate": 6.199609088688004e-09, "loss": 3.7504, "step": 4279000 }, { "epoch": 47.53, "learning_rate": 6.1857272949381426e-09, "loss": 3.7566, "step": 4279500 }, { "epoch": 47.53, "learning_rate": 6.171845501188281e-09, "loss": 3.7324, "step": 4280000 }, { "epoch": 47.54, "learning_rate": 6.1579637074384205e-09, "loss": 3.7633, "step": 4280500 }, { "epoch": 47.54, "learning_rate": 6.1440819136885594e-09, "loss": 3.7647, "step": 4281000 }, { "epoch": 47.55, "learning_rate": 6.1302001199386975e-09, "loss": 3.7463, "step": 4281500 }, { "epoch": 47.55, "learning_rate": 6.1163183261888365e-09, "loss": 3.7659, "step": 4282000 }, { "epoch": 47.56, "learning_rate": 6.1024365324389755e-09, "loss": 3.7252, "step": 4282500 }, { "epoch": 47.56, "learning_rate": 6.0885547386891136e-09, "loss": 3.7308, "step": 4283000 }, { "epoch": 47.57, "learning_rate": 6.0746729449392525e-09, "loss": 3.7476, "step": 4283500 }, { "epoch": 47.58, "learning_rate": 6.060791151189392e-09, "loss": 3.7416, "step": 4284000 }, { "epoch": 47.58, "learning_rate": 6.0469093574395304e-09, "loss": 3.7505, "step": 4284500 }, { "epoch": 47.59, "learning_rate": 6.033027563689669e-09, "loss": 3.7479, "step": 4285000 }, { "epoch": 47.59, "learning_rate": 6.019145769939808e-09, "loss": 3.7348, "step": 4285500 }, { "epoch": 47.6, "learning_rate": 6.005263976189947e-09, "loss": 3.7442, "step": 4286000 }, { "epoch": 47.6, "learning_rate": 5.9913821824400854e-09, "loss": 3.749, "step": 4286500 }, { "epoch": 47.61, "learning_rate": 5.977500388690225e-09, "loss": 3.7436, "step": 4287000 }, { "epoch": 47.61, "learning_rate": 5.963618594940363e-09, "loss": 3.7438, "step": 4287500 }, { "epoch": 47.62, "learning_rate": 5.949736801190502e-09, "loss": 3.7576, "step": 4288000 }, { "epoch": 47.63, "learning_rate": 5.935855007440641e-09, "loss": 3.7379, "step": 4288500 }, { "epoch": 47.63, "learning_rate": 5.92197321369078e-09, "loss": 3.7452, "step": 4289000 }, { "epoch": 47.64, "learning_rate": 5.908091419940918e-09, "loss": 3.7453, "step": 4289500 }, { "epoch": 47.64, "learning_rate": 5.894209626191057e-09, "loss": 3.731, "step": 4290000 }, { "epoch": 47.65, "learning_rate": 5.880327832441197e-09, "loss": 3.7422, "step": 4290500 }, { "epoch": 47.65, "learning_rate": 5.866446038691335e-09, "loss": 3.7458, "step": 4291000 }, { "epoch": 47.66, "learning_rate": 5.852564244941474e-09, "loss": 3.7311, "step": 4291500 }, { "epoch": 47.66, "learning_rate": 5.838682451191613e-09, "loss": 3.7443, "step": 4292000 }, { "epoch": 47.67, "learning_rate": 5.824800657441751e-09, "loss": 3.751, "step": 4292500 }, { "epoch": 47.68, "learning_rate": 5.81091886369189e-09, "loss": 3.7512, "step": 4293000 }, { "epoch": 47.68, "learning_rate": 5.79703706994203e-09, "loss": 3.7437, "step": 4293500 }, { "epoch": 47.69, "learning_rate": 5.783155276192168e-09, "loss": 3.7528, "step": 4294000 }, { "epoch": 47.69, "learning_rate": 5.769273482442307e-09, "loss": 3.7476, "step": 4294500 }, { "epoch": 47.7, "learning_rate": 5.755391688692446e-09, "loss": 3.7665, "step": 4295000 }, { "epoch": 47.7, "learning_rate": 5.741509894942584e-09, "loss": 3.7433, "step": 4295500 }, { "epoch": 47.71, "learning_rate": 5.727628101192723e-09, "loss": 3.7335, "step": 4296000 }, { "epoch": 47.71, "learning_rate": 5.713746307442862e-09, "loss": 3.7512, "step": 4296500 }, { "epoch": 47.72, "learning_rate": 5.699864513693001e-09, "loss": 3.7378, "step": 4297000 }, { "epoch": 47.73, "learning_rate": 5.68598271994314e-09, "loss": 3.7372, "step": 4297500 }, { "epoch": 47.73, "learning_rate": 5.672100926193279e-09, "loss": 3.759, "step": 4298000 }, { "epoch": 47.74, "learning_rate": 5.658219132443418e-09, "loss": 3.7443, "step": 4298500 }, { "epoch": 47.74, "learning_rate": 5.644337338693556e-09, "loss": 3.7466, "step": 4299000 }, { "epoch": 47.75, "learning_rate": 5.630455544943695e-09, "loss": 3.7543, "step": 4299500 }, { "epoch": 47.75, "learning_rate": 5.616573751193834e-09, "loss": 3.7359, "step": 4300000 }, { "epoch": 47.76, "learning_rate": 5.602691957443973e-09, "loss": 3.7403, "step": 4300500 }, { "epoch": 47.76, "learning_rate": 5.588810163694112e-09, "loss": 3.7444, "step": 4301000 }, { "epoch": 47.77, "learning_rate": 5.574928369944251e-09, "loss": 3.7319, "step": 4301500 }, { "epoch": 47.78, "learning_rate": 5.561046576194389e-09, "loss": 3.7551, "step": 4302000 }, { "epoch": 47.78, "learning_rate": 5.547164782444528e-09, "loss": 3.7523, "step": 4302500 }, { "epoch": 47.79, "learning_rate": 5.533282988694667e-09, "loss": 3.7417, "step": 4303000 }, { "epoch": 47.79, "learning_rate": 5.519401194944806e-09, "loss": 3.7365, "step": 4303500 }, { "epoch": 47.8, "learning_rate": 5.505519401194945e-09, "loss": 3.7219, "step": 4304000 }, { "epoch": 47.8, "learning_rate": 5.491637607445084e-09, "loss": 3.7592, "step": 4304500 }, { "epoch": 47.81, "learning_rate": 5.477755813695222e-09, "loss": 3.7389, "step": 4305000 }, { "epoch": 47.81, "learning_rate": 5.463874019945361e-09, "loss": 3.7461, "step": 4305500 }, { "epoch": 47.82, "learning_rate": 5.4499922261955e-09, "loss": 3.758, "step": 4306000 }, { "epoch": 47.83, "learning_rate": 5.436110432445639e-09, "loss": 3.7574, "step": 4306500 }, { "epoch": 47.83, "learning_rate": 5.422228638695778e-09, "loss": 3.7653, "step": 4307000 }, { "epoch": 47.84, "learning_rate": 5.408346844945917e-09, "loss": 3.7657, "step": 4307500 }, { "epoch": 47.84, "learning_rate": 5.3944650511960556e-09, "loss": 3.7303, "step": 4308000 }, { "epoch": 47.85, "learning_rate": 5.380583257446194e-09, "loss": 3.751, "step": 4308500 }, { "epoch": 47.85, "learning_rate": 5.366701463696333e-09, "loss": 3.742, "step": 4309000 }, { "epoch": 47.86, "learning_rate": 5.352819669946472e-09, "loss": 3.748, "step": 4309500 }, { "epoch": 47.86, "learning_rate": 5.3389378761966106e-09, "loss": 3.7503, "step": 4310000 }, { "epoch": 47.87, "learning_rate": 5.3250560824467495e-09, "loss": 3.7446, "step": 4310500 }, { "epoch": 47.88, "learning_rate": 5.3111742886968885e-09, "loss": 3.7476, "step": 4311000 }, { "epoch": 47.88, "learning_rate": 5.297292494947027e-09, "loss": 3.7416, "step": 4311500 }, { "epoch": 47.89, "learning_rate": 5.2834107011971655e-09, "loss": 3.7469, "step": 4312000 }, { "epoch": 47.89, "learning_rate": 5.2695289074473045e-09, "loss": 3.7595, "step": 4312500 }, { "epoch": 47.9, "learning_rate": 5.255647113697443e-09, "loss": 3.741, "step": 4313000 }, { "epoch": 47.9, "learning_rate": 5.2417653199475824e-09, "loss": 3.7428, "step": 4313500 }, { "epoch": 47.91, "learning_rate": 5.227883526197721e-09, "loss": 3.7363, "step": 4314000 }, { "epoch": 47.91, "learning_rate": 5.2140017324478595e-09, "loss": 3.7287, "step": 4314500 }, { "epoch": 47.92, "learning_rate": 5.2001199386979985e-09, "loss": 3.7548, "step": 4315000 }, { "epoch": 47.93, "learning_rate": 5.186238144948137e-09, "loss": 3.7676, "step": 4315500 }, { "epoch": 47.93, "learning_rate": 5.172356351198276e-09, "loss": 3.7503, "step": 4316000 }, { "epoch": 47.94, "learning_rate": 5.158474557448415e-09, "loss": 3.7547, "step": 4316500 }, { "epoch": 47.94, "learning_rate": 5.144592763698554e-09, "loss": 3.7436, "step": 4317000 }, { "epoch": 47.95, "learning_rate": 5.130710969948692e-09, "loss": 3.7506, "step": 4317500 }, { "epoch": 47.95, "learning_rate": 5.116829176198831e-09, "loss": 3.7512, "step": 4318000 }, { "epoch": 47.96, "learning_rate": 5.10294738244897e-09, "loss": 3.7576, "step": 4318500 }, { "epoch": 47.96, "learning_rate": 5.089065588699109e-09, "loss": 3.752, "step": 4319000 }, { "epoch": 47.97, "learning_rate": 5.075183794949247e-09, "loss": 3.7454, "step": 4319500 }, { "epoch": 47.98, "learning_rate": 5.061302001199387e-09, "loss": 3.7301, "step": 4320000 }, { "epoch": 47.98, "learning_rate": 5.047420207449526e-09, "loss": 3.7606, "step": 4320500 }, { "epoch": 47.99, "learning_rate": 5.033538413699664e-09, "loss": 3.7354, "step": 4321000 }, { "epoch": 47.99, "learning_rate": 5.019656619949803e-09, "loss": 3.7535, "step": 4321500 }, { "epoch": 48.0, "learning_rate": 5.005774826199942e-09, "loss": 3.7427, "step": 4322000 }, { "epoch": 48.0, "eval_loss": 3.824249267578125, "eval_runtime": 6.2995, "eval_samples_per_second": 246.684, "step": 4322208 }, { "epoch": 48.0, "learning_rate": 4.99189303245008e-09, "loss": 3.7413, "step": 4322500 }, { "epoch": 48.01, "learning_rate": 4.978011238700219e-09, "loss": 3.761, "step": 4323000 }, { "epoch": 48.01, "learning_rate": 4.964129444950359e-09, "loss": 3.7313, "step": 4323500 }, { "epoch": 48.02, "learning_rate": 4.950247651200497e-09, "loss": 3.7562, "step": 4324000 }, { "epoch": 48.03, "learning_rate": 4.936365857450636e-09, "loss": 3.7567, "step": 4324500 }, { "epoch": 48.03, "learning_rate": 4.922484063700775e-09, "loss": 3.7307, "step": 4325000 }, { "epoch": 48.04, "learning_rate": 4.908602269950913e-09, "loss": 3.7446, "step": 4325500 }, { "epoch": 48.04, "learning_rate": 4.894720476201052e-09, "loss": 3.7502, "step": 4326000 }, { "epoch": 48.05, "learning_rate": 4.880838682451192e-09, "loss": 3.7549, "step": 4326500 }, { "epoch": 48.05, "learning_rate": 4.86695688870133e-09, "loss": 3.7369, "step": 4327000 }, { "epoch": 48.06, "learning_rate": 4.853075094951469e-09, "loss": 3.7435, "step": 4327500 }, { "epoch": 48.06, "learning_rate": 4.839193301201608e-09, "loss": 3.7563, "step": 4328000 }, { "epoch": 48.07, "learning_rate": 4.825311507451747e-09, "loss": 3.7402, "step": 4328500 }, { "epoch": 48.08, "learning_rate": 4.811429713701885e-09, "loss": 3.736, "step": 4329000 }, { "epoch": 48.08, "learning_rate": 4.797547919952024e-09, "loss": 3.734, "step": 4329500 }, { "epoch": 48.09, "learning_rate": 4.783666126202164e-09, "loss": 3.7503, "step": 4330000 }, { "epoch": 48.09, "learning_rate": 4.769784332452302e-09, "loss": 3.762, "step": 4330500 }, { "epoch": 48.1, "learning_rate": 4.755902538702441e-09, "loss": 3.76, "step": 4331000 }, { "epoch": 48.1, "learning_rate": 4.74202074495258e-09, "loss": 3.7506, "step": 4331500 }, { "epoch": 48.11, "learning_rate": 4.728138951202718e-09, "loss": 3.7372, "step": 4332000 }, { "epoch": 48.11, "learning_rate": 4.714257157452857e-09, "loss": 3.7512, "step": 4332500 }, { "epoch": 48.12, "learning_rate": 4.700375363702997e-09, "loss": 3.7584, "step": 4333000 }, { "epoch": 48.13, "learning_rate": 4.686493569953135e-09, "loss": 3.7337, "step": 4333500 }, { "epoch": 48.13, "learning_rate": 4.672611776203274e-09, "loss": 3.7273, "step": 4334000 }, { "epoch": 48.14, "learning_rate": 4.658729982453413e-09, "loss": 3.7547, "step": 4334500 }, { "epoch": 48.14, "learning_rate": 4.644848188703551e-09, "loss": 3.7307, "step": 4335000 }, { "epoch": 48.15, "learning_rate": 4.63096639495369e-09, "loss": 3.7276, "step": 4335500 }, { "epoch": 48.15, "learning_rate": 4.617084601203829e-09, "loss": 3.7379, "step": 4336000 }, { "epoch": 48.16, "learning_rate": 4.603202807453968e-09, "loss": 3.7674, "step": 4336500 }, { "epoch": 48.16, "learning_rate": 4.589321013704107e-09, "loss": 3.76, "step": 4337000 }, { "epoch": 48.17, "learning_rate": 4.575439219954246e-09, "loss": 3.7587, "step": 4337500 }, { "epoch": 48.18, "learning_rate": 4.561557426204384e-09, "loss": 3.7482, "step": 4338000 }, { "epoch": 48.18, "learning_rate": 4.547675632454523e-09, "loss": 3.7387, "step": 4338500 }, { "epoch": 48.19, "learning_rate": 4.533793838704662e-09, "loss": 3.7455, "step": 4339000 }, { "epoch": 48.19, "learning_rate": 4.519912044954801e-09, "loss": 3.7585, "step": 4339500 }, { "epoch": 48.2, "learning_rate": 4.50603025120494e-09, "loss": 3.7477, "step": 4340000 }, { "epoch": 48.2, "learning_rate": 4.4921484574550786e-09, "loss": 3.7481, "step": 4340500 }, { "epoch": 48.21, "learning_rate": 4.4782666637052175e-09, "loss": 3.7569, "step": 4341000 }, { "epoch": 48.21, "learning_rate": 4.464384869955356e-09, "loss": 3.7658, "step": 4341500 }, { "epoch": 48.22, "learning_rate": 4.450503076205495e-09, "loss": 3.7601, "step": 4342000 }, { "epoch": 48.23, "learning_rate": 4.4366212824556336e-09, "loss": 3.7424, "step": 4342500 }, { "epoch": 48.23, "learning_rate": 4.4227394887057725e-09, "loss": 3.7402, "step": 4343000 }, { "epoch": 48.24, "learning_rate": 4.4088576949559115e-09, "loss": 3.7434, "step": 4343500 }, { "epoch": 48.24, "learning_rate": 4.3949759012060504e-09, "loss": 3.7456, "step": 4344000 }, { "epoch": 48.25, "learning_rate": 4.3810941074561885e-09, "loss": 3.7526, "step": 4344500 }, { "epoch": 48.25, "learning_rate": 4.3672123137063275e-09, "loss": 3.7507, "step": 4345000 }, { "epoch": 48.26, "learning_rate": 4.3533305199564665e-09, "loss": 3.7498, "step": 4345500 }, { "epoch": 48.26, "learning_rate": 4.3394487262066046e-09, "loss": 3.7383, "step": 4346000 }, { "epoch": 48.27, "learning_rate": 4.325566932456744e-09, "loss": 3.774, "step": 4346500 }, { "epoch": 48.28, "learning_rate": 4.311685138706883e-09, "loss": 3.7397, "step": 4347000 }, { "epoch": 48.28, "learning_rate": 4.2978033449570215e-09, "loss": 3.7565, "step": 4347500 }, { "epoch": 48.29, "learning_rate": 4.28392155120716e-09, "loss": 3.7412, "step": 4348000 }, { "epoch": 48.29, "learning_rate": 4.270039757457299e-09, "loss": 3.7558, "step": 4348500 }, { "epoch": 48.3, "learning_rate": 4.256157963707438e-09, "loss": 3.739, "step": 4349000 }, { "epoch": 48.3, "learning_rate": 4.242276169957577e-09, "loss": 3.7347, "step": 4349500 }, { "epoch": 48.31, "learning_rate": 4.228394376207716e-09, "loss": 3.7446, "step": 4350000 }, { "epoch": 48.31, "learning_rate": 4.214512582457855e-09, "loss": 3.7396, "step": 4350500 }, { "epoch": 48.32, "learning_rate": 4.200630788707993e-09, "loss": 3.7384, "step": 4351000 }, { "epoch": 48.33, "learning_rate": 4.186748994958132e-09, "loss": 3.7331, "step": 4351500 }, { "epoch": 48.33, "learning_rate": 4.172867201208271e-09, "loss": 3.7452, "step": 4352000 }, { "epoch": 48.34, "learning_rate": 4.158985407458409e-09, "loss": 3.7511, "step": 4352500 }, { "epoch": 48.34, "learning_rate": 4.145103613708549e-09, "loss": 3.7357, "step": 4353000 }, { "epoch": 48.35, "learning_rate": 4.131221819958688e-09, "loss": 3.7459, "step": 4353500 }, { "epoch": 48.35, "learning_rate": 4.117340026208826e-09, "loss": 3.7424, "step": 4354000 }, { "epoch": 48.36, "learning_rate": 4.103458232458965e-09, "loss": 3.7428, "step": 4354500 }, { "epoch": 48.36, "learning_rate": 4.089576438709104e-09, "loss": 3.7326, "step": 4355000 }, { "epoch": 48.37, "learning_rate": 4.075694644959242e-09, "loss": 3.7446, "step": 4355500 }, { "epoch": 48.38, "learning_rate": 4.061812851209382e-09, "loss": 3.7469, "step": 4356000 }, { "epoch": 48.38, "learning_rate": 4.047931057459521e-09, "loss": 3.7358, "step": 4356500 }, { "epoch": 48.39, "learning_rate": 4.034049263709659e-09, "loss": 3.724, "step": 4357000 }, { "epoch": 48.39, "learning_rate": 4.020167469959798e-09, "loss": 3.7405, "step": 4357500 }, { "epoch": 48.4, "learning_rate": 4.006285676209937e-09, "loss": 3.7326, "step": 4358000 }, { "epoch": 48.4, "learning_rate": 3.992403882460076e-09, "loss": 3.76, "step": 4358500 }, { "epoch": 48.41, "learning_rate": 3.978522088710214e-09, "loss": 3.7301, "step": 4359000 }, { "epoch": 48.41, "learning_rate": 3.964640294960354e-09, "loss": 3.774, "step": 4359500 }, { "epoch": 48.42, "learning_rate": 3.950758501210492e-09, "loss": 3.7366, "step": 4360000 }, { "epoch": 48.43, "learning_rate": 3.936876707460631e-09, "loss": 3.7594, "step": 4360500 }, { "epoch": 48.43, "learning_rate": 3.92299491371077e-09, "loss": 3.7262, "step": 4361000 }, { "epoch": 48.44, "learning_rate": 3.909113119960909e-09, "loss": 3.7613, "step": 4361500 }, { "epoch": 48.44, "learning_rate": 3.895231326211047e-09, "loss": 3.7586, "step": 4362000 }, { "epoch": 48.45, "learning_rate": 3.881349532461186e-09, "loss": 3.7425, "step": 4362500 }, { "epoch": 48.45, "learning_rate": 3.867467738711326e-09, "loss": 3.7375, "step": 4363000 }, { "epoch": 48.46, "learning_rate": 3.853585944961464e-09, "loss": 3.7531, "step": 4363500 }, { "epoch": 48.46, "learning_rate": 3.839704151211603e-09, "loss": 3.7472, "step": 4364000 }, { "epoch": 48.47, "learning_rate": 3.825822357461742e-09, "loss": 3.736, "step": 4364500 }, { "epoch": 48.48, "learning_rate": 3.81194056371188e-09, "loss": 3.7336, "step": 4365000 }, { "epoch": 48.48, "learning_rate": 3.798058769962019e-09, "loss": 3.7498, "step": 4365500 }, { "epoch": 48.49, "learning_rate": 3.784176976212159e-09, "loss": 3.7462, "step": 4366000 }, { "epoch": 48.49, "learning_rate": 3.770295182462297e-09, "loss": 3.7505, "step": 4366500 }, { "epoch": 48.5, "learning_rate": 3.756413388712436e-09, "loss": 3.7319, "step": 4367000 }, { "epoch": 48.5, "learning_rate": 3.742531594962575e-09, "loss": 3.7197, "step": 4367500 }, { "epoch": 48.51, "learning_rate": 3.728649801212713e-09, "loss": 3.7403, "step": 4368000 }, { "epoch": 48.51, "learning_rate": 3.7147680074628518e-09, "loss": 3.7683, "step": 4368500 }, { "epoch": 48.52, "learning_rate": 3.7008862137129907e-09, "loss": 3.7513, "step": 4369000 }, { "epoch": 48.53, "learning_rate": 3.68700441996313e-09, "loss": 3.7484, "step": 4369500 }, { "epoch": 48.53, "learning_rate": 3.6731226262132687e-09, "loss": 3.7541, "step": 4370000 }, { "epoch": 48.54, "learning_rate": 3.6592408324634076e-09, "loss": 3.7547, "step": 4370500 }, { "epoch": 48.54, "learning_rate": 3.645359038713546e-09, "loss": 3.7576, "step": 4371000 }, { "epoch": 48.55, "learning_rate": 3.631477244963685e-09, "loss": 3.7592, "step": 4371500 }, { "epoch": 48.55, "learning_rate": 3.6175954512138236e-09, "loss": 3.7575, "step": 4372000 }, { "epoch": 48.56, "learning_rate": 3.603713657463963e-09, "loss": 3.7601, "step": 4372500 }, { "epoch": 48.56, "learning_rate": 3.5898318637141016e-09, "loss": 3.7476, "step": 4373000 }, { "epoch": 48.57, "learning_rate": 3.5759500699642405e-09, "loss": 3.7387, "step": 4373500 }, { "epoch": 48.58, "learning_rate": 3.562068276214379e-09, "loss": 3.7594, "step": 4374000 }, { "epoch": 48.58, "learning_rate": 3.548186482464518e-09, "loss": 3.7584, "step": 4374500 }, { "epoch": 48.59, "learning_rate": 3.5343046887146565e-09, "loss": 3.7279, "step": 4375000 }, { "epoch": 48.59, "learning_rate": 3.5204228949647955e-09, "loss": 3.7369, "step": 4375500 }, { "epoch": 48.6, "learning_rate": 3.506541101214935e-09, "loss": 3.7355, "step": 4376000 }, { "epoch": 48.6, "learning_rate": 3.4926593074650734e-09, "loss": 3.7466, "step": 4376500 }, { "epoch": 48.61, "learning_rate": 3.478777513715212e-09, "loss": 3.7471, "step": 4377000 }, { "epoch": 48.61, "learning_rate": 3.464895719965351e-09, "loss": 3.7259, "step": 4377500 }, { "epoch": 48.62, "learning_rate": 3.4510139262154895e-09, "loss": 3.737, "step": 4378000 }, { "epoch": 48.63, "learning_rate": 3.4371321324656284e-09, "loss": 3.74, "step": 4378500 }, { "epoch": 48.63, "learning_rate": 3.4232503387157678e-09, "loss": 3.7592, "step": 4379000 }, { "epoch": 48.64, "learning_rate": 3.4093685449659063e-09, "loss": 3.7585, "step": 4379500 }, { "epoch": 48.64, "learning_rate": 3.3954867512160453e-09, "loss": 3.7596, "step": 4380000 }, { "epoch": 48.65, "learning_rate": 3.381604957466184e-09, "loss": 3.7442, "step": 4380500 }, { "epoch": 48.65, "learning_rate": 3.3677231637163224e-09, "loss": 3.7309, "step": 4381000 }, { "epoch": 48.66, "learning_rate": 3.3538413699664613e-09, "loss": 3.751, "step": 4381500 }, { "epoch": 48.66, "learning_rate": 3.3399595762166e-09, "loss": 3.7478, "step": 4382000 }, { "epoch": 48.67, "learning_rate": 3.3260777824667392e-09, "loss": 3.7532, "step": 4382500 }, { "epoch": 48.68, "learning_rate": 3.312195988716878e-09, "loss": 3.7336, "step": 4383000 }, { "epoch": 48.68, "learning_rate": 3.2983141949670167e-09, "loss": 3.7752, "step": 4383500 }, { "epoch": 48.69, "learning_rate": 3.2844324012171557e-09, "loss": 3.7333, "step": 4384000 }, { "epoch": 48.69, "learning_rate": 3.2705506074672942e-09, "loss": 3.7346, "step": 4384500 }, { "epoch": 48.7, "learning_rate": 3.2566688137174328e-09, "loss": 3.7482, "step": 4385000 }, { "epoch": 48.7, "learning_rate": 3.2427870199675717e-09, "loss": 3.7437, "step": 4385500 }, { "epoch": 48.71, "learning_rate": 3.228905226217711e-09, "loss": 3.7538, "step": 4386000 }, { "epoch": 48.71, "learning_rate": 3.2150234324678496e-09, "loss": 3.7316, "step": 4386500 }, { "epoch": 48.72, "learning_rate": 3.2011416387179886e-09, "loss": 3.7489, "step": 4387000 }, { "epoch": 48.73, "learning_rate": 3.187259844968127e-09, "loss": 3.7448, "step": 4387500 }, { "epoch": 48.73, "learning_rate": 3.173378051218266e-09, "loss": 3.7478, "step": 4388000 }, { "epoch": 48.74, "learning_rate": 3.1594962574684046e-09, "loss": 3.7706, "step": 4388500 }, { "epoch": 48.74, "learning_rate": 3.145614463718544e-09, "loss": 3.7455, "step": 4389000 }, { "epoch": 48.75, "learning_rate": 3.131732669968683e-09, "loss": 3.7359, "step": 4389500 }, { "epoch": 48.75, "learning_rate": 3.1178508762188215e-09, "loss": 3.7488, "step": 4390000 }, { "epoch": 48.76, "learning_rate": 3.10396908246896e-09, "loss": 3.7531, "step": 4390500 }, { "epoch": 48.76, "learning_rate": 3.090087288719099e-09, "loss": 3.7573, "step": 4391000 }, { "epoch": 48.77, "learning_rate": 3.076205494969238e-09, "loss": 3.7429, "step": 4391500 }, { "epoch": 48.78, "learning_rate": 3.0623237012193765e-09, "loss": 3.7476, "step": 4392000 }, { "epoch": 48.78, "learning_rate": 3.0484419074695154e-09, "loss": 3.7535, "step": 4392500 }, { "epoch": 48.79, "learning_rate": 3.0345601137196544e-09, "loss": 3.7491, "step": 4393000 }, { "epoch": 48.79, "learning_rate": 3.020678319969793e-09, "loss": 3.7546, "step": 4393500 }, { "epoch": 48.8, "learning_rate": 3.006796526219932e-09, "loss": 3.7614, "step": 4394000 }, { "epoch": 48.8, "learning_rate": 2.9929147324700704e-09, "loss": 3.743, "step": 4394500 }, { "epoch": 48.81, "learning_rate": 2.97903293872021e-09, "loss": 3.7431, "step": 4395000 }, { "epoch": 48.81, "learning_rate": 2.9651511449703483e-09, "loss": 3.7512, "step": 4395500 }, { "epoch": 48.82, "learning_rate": 2.951269351220487e-09, "loss": 3.7339, "step": 4396000 }, { "epoch": 48.83, "learning_rate": 2.9373875574706263e-09, "loss": 3.7485, "step": 4396500 }, { "epoch": 48.83, "learning_rate": 2.923505763720765e-09, "loss": 3.7315, "step": 4397000 }, { "epoch": 48.84, "learning_rate": 2.9096239699709033e-09, "loss": 3.7569, "step": 4397500 }, { "epoch": 48.84, "learning_rate": 2.8957421762210427e-09, "loss": 3.7529, "step": 4398000 }, { "epoch": 48.85, "learning_rate": 2.8818603824711812e-09, "loss": 3.7447, "step": 4398500 }, { "epoch": 48.85, "learning_rate": 2.86797858872132e-09, "loss": 3.7557, "step": 4399000 }, { "epoch": 48.86, "learning_rate": 2.8540967949714587e-09, "loss": 3.7354, "step": 4399500 }, { "epoch": 48.86, "learning_rate": 2.8402150012215977e-09, "loss": 3.7433, "step": 4400000 }, { "epoch": 48.87, "learning_rate": 2.8263332074717367e-09, "loss": 3.745, "step": 4400500 }, { "epoch": 48.88, "learning_rate": 2.812451413721875e-09, "loss": 3.7475, "step": 4401000 }, { "epoch": 48.88, "learning_rate": 2.798569619972014e-09, "loss": 3.7395, "step": 4401500 }, { "epoch": 48.89, "learning_rate": 2.784687826222153e-09, "loss": 3.746, "step": 4402000 }, { "epoch": 48.89, "learning_rate": 2.7708060324722916e-09, "loss": 3.7468, "step": 4402500 }, { "epoch": 48.9, "learning_rate": 2.7569242387224306e-09, "loss": 3.7423, "step": 4403000 }, { "epoch": 48.9, "learning_rate": 2.7430424449725696e-09, "loss": 3.7408, "step": 4403500 }, { "epoch": 48.91, "learning_rate": 2.729160651222708e-09, "loss": 3.7488, "step": 4404000 }, { "epoch": 48.91, "learning_rate": 2.715278857472847e-09, "loss": 3.7566, "step": 4404500 }, { "epoch": 48.92, "learning_rate": 2.701397063722986e-09, "loss": 3.7398, "step": 4405000 }, { "epoch": 48.92, "learning_rate": 2.6875152699731246e-09, "loss": 3.7627, "step": 4405500 }, { "epoch": 48.93, "learning_rate": 2.6736334762232635e-09, "loss": 3.7559, "step": 4406000 }, { "epoch": 48.94, "learning_rate": 2.6597516824734025e-09, "loss": 3.757, "step": 4406500 }, { "epoch": 48.94, "learning_rate": 2.645869888723541e-09, "loss": 3.7432, "step": 4407000 }, { "epoch": 48.95, "learning_rate": 2.63198809497368e-09, "loss": 3.747, "step": 4407500 }, { "epoch": 48.95, "learning_rate": 2.618106301223819e-09, "loss": 3.741, "step": 4408000 }, { "epoch": 48.96, "learning_rate": 2.6042245074739575e-09, "loss": 3.766, "step": 4408500 }, { "epoch": 48.96, "learning_rate": 2.5903427137240964e-09, "loss": 3.7385, "step": 4409000 }, { "epoch": 48.97, "learning_rate": 2.5764609199742354e-09, "loss": 3.7701, "step": 4409500 }, { "epoch": 48.97, "learning_rate": 2.5625791262243743e-09, "loss": 3.7594, "step": 4410000 }, { "epoch": 48.98, "learning_rate": 2.548697332474513e-09, "loss": 3.7521, "step": 4410500 }, { "epoch": 48.99, "learning_rate": 2.5348155387246514e-09, "loss": 3.7384, "step": 4411000 }, { "epoch": 48.99, "learning_rate": 2.5209337449747908e-09, "loss": 3.7661, "step": 4411500 }, { "epoch": 49.0, "learning_rate": 2.5070519512249293e-09, "loss": 3.7427, "step": 4412000 }, { "epoch": 49.0, "eval_loss": 3.824198007583618, "eval_runtime": 6.3065, "eval_samples_per_second": 246.412, "step": 4412254 }, { "epoch": 49.0, "learning_rate": 2.493170157475068e-09, "loss": 3.7514, "step": 4412500 }, { "epoch": 49.01, "learning_rate": 2.4792883637252072e-09, "loss": 3.7573, "step": 4413000 }, { "epoch": 49.01, "learning_rate": 2.4654065699753458e-09, "loss": 3.7368, "step": 4413500 }, { "epoch": 49.02, "learning_rate": 2.4515247762254847e-09, "loss": 3.7344, "step": 4414000 }, { "epoch": 49.02, "learning_rate": 2.4376429824756237e-09, "loss": 3.7523, "step": 4414500 }, { "epoch": 49.03, "learning_rate": 2.4237611887257622e-09, "loss": 3.7547, "step": 4415000 }, { "epoch": 49.04, "learning_rate": 2.409879394975901e-09, "loss": 3.7464, "step": 4415500 }, { "epoch": 49.04, "learning_rate": 2.39599760122604e-09, "loss": 3.7586, "step": 4416000 }, { "epoch": 49.05, "learning_rate": 2.3821158074761787e-09, "loss": 3.7505, "step": 4416500 }, { "epoch": 49.05, "learning_rate": 2.3682340137263176e-09, "loss": 3.7578, "step": 4417000 }, { "epoch": 49.06, "learning_rate": 2.354352219976456e-09, "loss": 3.7678, "step": 4417500 }, { "epoch": 49.06, "learning_rate": 2.340470426226595e-09, "loss": 3.7323, "step": 4418000 }, { "epoch": 49.07, "learning_rate": 2.326588632476734e-09, "loss": 3.7412, "step": 4418500 }, { "epoch": 49.07, "learning_rate": 2.3127068387268726e-09, "loss": 3.7396, "step": 4419000 }, { "epoch": 49.08, "learning_rate": 2.2988250449770116e-09, "loss": 3.7359, "step": 4419500 }, { "epoch": 49.09, "learning_rate": 2.2849432512271505e-09, "loss": 3.7364, "step": 4420000 }, { "epoch": 49.09, "learning_rate": 2.271061457477289e-09, "loss": 3.7276, "step": 4420500 }, { "epoch": 49.1, "learning_rate": 2.2571796637274284e-09, "loss": 3.7448, "step": 4421000 }, { "epoch": 49.1, "learning_rate": 2.243297869977567e-09, "loss": 3.7429, "step": 4421500 }, { "epoch": 49.11, "learning_rate": 2.2294160762277055e-09, "loss": 3.7547, "step": 4422000 }, { "epoch": 49.11, "learning_rate": 2.2155342824778445e-09, "loss": 3.7279, "step": 4422500 }, { "epoch": 49.12, "learning_rate": 2.2016524887279834e-09, "loss": 3.735, "step": 4423000 }, { "epoch": 49.12, "learning_rate": 2.187770694978122e-09, "loss": 3.7509, "step": 4423500 }, { "epoch": 49.13, "learning_rate": 2.173888901228261e-09, "loss": 3.7432, "step": 4424000 }, { "epoch": 49.14, "learning_rate": 2.1600071074784e-09, "loss": 3.757, "step": 4424500 }, { "epoch": 49.14, "learning_rate": 2.146125313728539e-09, "loss": 3.7542, "step": 4425000 }, { "epoch": 49.15, "learning_rate": 2.1322435199786774e-09, "loss": 3.7613, "step": 4425500 }, { "epoch": 49.15, "learning_rate": 2.1183617262288163e-09, "loss": 3.7442, "step": 4426000 }, { "epoch": 49.16, "learning_rate": 2.1044799324789553e-09, "loss": 3.7572, "step": 4426500 }, { "epoch": 49.16, "learning_rate": 2.090598138729094e-09, "loss": 3.7369, "step": 4427000 }, { "epoch": 49.17, "learning_rate": 2.076716344979233e-09, "loss": 3.7495, "step": 4427500 }, { "epoch": 49.17, "learning_rate": 2.0628345512293718e-09, "loss": 3.7577, "step": 4428000 }, { "epoch": 49.18, "learning_rate": 2.0489527574795103e-09, "loss": 3.7458, "step": 4428500 }, { "epoch": 49.19, "learning_rate": 2.0350709637296492e-09, "loss": 3.7673, "step": 4429000 }, { "epoch": 49.19, "learning_rate": 2.021189169979788e-09, "loss": 3.7531, "step": 4429500 }, { "epoch": 49.2, "learning_rate": 2.0073073762299267e-09, "loss": 3.7386, "step": 4430000 }, { "epoch": 49.2, "learning_rate": 1.9934255824800657e-09, "loss": 3.7456, "step": 4430500 }, { "epoch": 49.21, "learning_rate": 1.9795437887302047e-09, "loss": 3.7621, "step": 4431000 }, { "epoch": 49.21, "learning_rate": 1.965661994980343e-09, "loss": 3.7583, "step": 4431500 }, { "epoch": 49.22, "learning_rate": 1.951780201230482e-09, "loss": 3.7574, "step": 4432000 }, { "epoch": 49.22, "learning_rate": 1.937898407480621e-09, "loss": 3.7487, "step": 4432500 }, { "epoch": 49.23, "learning_rate": 1.9240166137307597e-09, "loss": 3.7506, "step": 4433000 }, { "epoch": 49.24, "learning_rate": 1.9101348199808986e-09, "loss": 3.7421, "step": 4433500 }, { "epoch": 49.24, "learning_rate": 1.896253026231037e-09, "loss": 3.722, "step": 4434000 }, { "epoch": 49.25, "learning_rate": 1.882371232481176e-09, "loss": 3.757, "step": 4434500 }, { "epoch": 49.25, "learning_rate": 1.868489438731315e-09, "loss": 3.7406, "step": 4435000 }, { "epoch": 49.26, "learning_rate": 1.8546076449814538e-09, "loss": 3.7292, "step": 4435500 }, { "epoch": 49.26, "learning_rate": 1.8407258512315928e-09, "loss": 3.766, "step": 4436000 }, { "epoch": 49.27, "learning_rate": 1.8268440574817315e-09, "loss": 3.7409, "step": 4436500 }, { "epoch": 49.27, "learning_rate": 1.8129622637318703e-09, "loss": 3.7467, "step": 4437000 }, { "epoch": 49.28, "learning_rate": 1.7990804699820092e-09, "loss": 3.7699, "step": 4437500 }, { "epoch": 49.29, "learning_rate": 1.785198676232148e-09, "loss": 3.7499, "step": 4438000 }, { "epoch": 49.29, "learning_rate": 1.7713168824822867e-09, "loss": 3.7527, "step": 4438500 }, { "epoch": 49.3, "learning_rate": 1.7574350887324257e-09, "loss": 3.7476, "step": 4439000 }, { "epoch": 49.3, "learning_rate": 1.7435532949825644e-09, "loss": 3.731, "step": 4439500 }, { "epoch": 49.31, "learning_rate": 1.7296715012327032e-09, "loss": 3.7352, "step": 4440000 }, { "epoch": 49.31, "learning_rate": 1.715789707482842e-09, "loss": 3.7597, "step": 4440500 }, { "epoch": 49.32, "learning_rate": 1.7019079137329809e-09, "loss": 3.7482, "step": 4441000 }, { "epoch": 49.32, "learning_rate": 1.6880261199831196e-09, "loss": 3.7368, "step": 4441500 }, { "epoch": 49.33, "learning_rate": 1.6741443262332584e-09, "loss": 3.7453, "step": 4442000 }, { "epoch": 49.34, "learning_rate": 1.6602625324833975e-09, "loss": 3.7434, "step": 4442500 }, { "epoch": 49.34, "learning_rate": 1.646380738733536e-09, "loss": 3.7398, "step": 4443000 }, { "epoch": 49.35, "learning_rate": 1.6324989449836748e-09, "loss": 3.7241, "step": 4443500 }, { "epoch": 49.35, "learning_rate": 1.618617151233814e-09, "loss": 3.7415, "step": 4444000 }, { "epoch": 49.36, "learning_rate": 1.6047353574839527e-09, "loss": 3.7426, "step": 4444500 }, { "epoch": 49.36, "learning_rate": 1.5908535637340913e-09, "loss": 3.7419, "step": 4445000 }, { "epoch": 49.37, "learning_rate": 1.57697176998423e-09, "loss": 3.7611, "step": 4445500 }, { "epoch": 49.37, "learning_rate": 1.5630899762343692e-09, "loss": 3.7504, "step": 4446000 }, { "epoch": 49.38, "learning_rate": 1.5492081824845077e-09, "loss": 3.7541, "step": 4446500 }, { "epoch": 49.39, "learning_rate": 1.5353263887346467e-09, "loss": 3.7546, "step": 4447000 }, { "epoch": 49.39, "learning_rate": 1.5214445949847854e-09, "loss": 3.7475, "step": 4447500 }, { "epoch": 49.4, "learning_rate": 1.5075628012349244e-09, "loss": 3.7358, "step": 4448000 }, { "epoch": 49.4, "learning_rate": 1.4936810074850631e-09, "loss": 3.7506, "step": 4448500 }, { "epoch": 49.41, "learning_rate": 1.4797992137352019e-09, "loss": 3.7492, "step": 4449000 }, { "epoch": 49.41, "learning_rate": 1.4659174199853408e-09, "loss": 3.7515, "step": 4449500 }, { "epoch": 49.42, "learning_rate": 1.4520356262354796e-09, "loss": 3.736, "step": 4450000 }, { "epoch": 49.42, "learning_rate": 1.4381538324856183e-09, "loss": 3.7339, "step": 4450500 }, { "epoch": 49.43, "learning_rate": 1.4242720387357573e-09, "loss": 3.7638, "step": 4451000 }, { "epoch": 49.44, "learning_rate": 1.410390244985896e-09, "loss": 3.75, "step": 4451500 }, { "epoch": 49.44, "learning_rate": 1.3965084512360348e-09, "loss": 3.7474, "step": 4452000 }, { "epoch": 49.45, "learning_rate": 1.3826266574861735e-09, "loss": 3.7398, "step": 4452500 }, { "epoch": 49.45, "learning_rate": 1.3687448637363125e-09, "loss": 3.7462, "step": 4453000 }, { "epoch": 49.46, "learning_rate": 1.3548630699864514e-09, "loss": 3.7402, "step": 4453500 }, { "epoch": 49.46, "learning_rate": 1.34098127623659e-09, "loss": 3.7444, "step": 4454000 }, { "epoch": 49.47, "learning_rate": 1.327099482486729e-09, "loss": 3.743, "step": 4454500 }, { "epoch": 49.47, "learning_rate": 1.3132176887368677e-09, "loss": 3.7348, "step": 4455000 }, { "epoch": 49.48, "learning_rate": 1.2993358949870066e-09, "loss": 3.7489, "step": 4455500 }, { "epoch": 49.49, "learning_rate": 1.2854541012371454e-09, "loss": 3.7387, "step": 4456000 }, { "epoch": 49.49, "learning_rate": 1.2715723074872841e-09, "loss": 3.7473, "step": 4456500 }, { "epoch": 49.5, "learning_rate": 1.257690513737423e-09, "loss": 3.7576, "step": 4457000 }, { "epoch": 49.5, "learning_rate": 1.2438087199875618e-09, "loss": 3.7461, "step": 4457500 }, { "epoch": 49.51, "learning_rate": 1.2299269262377006e-09, "loss": 3.7505, "step": 4458000 }, { "epoch": 49.51, "learning_rate": 1.2160451324878395e-09, "loss": 3.7514, "step": 4458500 }, { "epoch": 49.52, "learning_rate": 1.2021633387379783e-09, "loss": 3.7436, "step": 4459000 }, { "epoch": 49.52, "learning_rate": 1.188281544988117e-09, "loss": 3.7425, "step": 4459500 }, { "epoch": 49.53, "learning_rate": 1.174399751238256e-09, "loss": 3.7549, "step": 4460000 }, { "epoch": 49.54, "learning_rate": 1.1605179574883947e-09, "loss": 3.7402, "step": 4460500 }, { "epoch": 49.54, "learning_rate": 1.1466361637385337e-09, "loss": 3.7342, "step": 4461000 }, { "epoch": 49.55, "learning_rate": 1.1327543699886722e-09, "loss": 3.7424, "step": 4461500 }, { "epoch": 49.55, "learning_rate": 1.1188725762388112e-09, "loss": 3.7396, "step": 4462000 }, { "epoch": 49.56, "learning_rate": 1.1049907824889502e-09, "loss": 3.7593, "step": 4462500 }, { "epoch": 49.56, "learning_rate": 1.091108988739089e-09, "loss": 3.7552, "step": 4463000 }, { "epoch": 49.57, "learning_rate": 1.0772271949892277e-09, "loss": 3.7461, "step": 4463500 }, { "epoch": 49.57, "learning_rate": 1.0633454012393664e-09, "loss": 3.7367, "step": 4464000 }, { "epoch": 49.58, "learning_rate": 1.0494636074895054e-09, "loss": 3.7456, "step": 4464500 }, { "epoch": 49.59, "learning_rate": 1.0355818137396441e-09, "loss": 3.7521, "step": 4465000 }, { "epoch": 49.59, "learning_rate": 1.0217000199897829e-09, "loss": 3.7379, "step": 4465500 }, { "epoch": 49.6, "learning_rate": 1.0078182262399218e-09, "loss": 3.7427, "step": 4466000 }, { "epoch": 49.6, "learning_rate": 9.939364324900606e-10, "loss": 3.756, "step": 4466500 }, { "epoch": 49.61, "learning_rate": 9.800546387401993e-10, "loss": 3.7639, "step": 4467000 }, { "epoch": 49.61, "learning_rate": 9.661728449903383e-10, "loss": 3.7395, "step": 4467500 }, { "epoch": 49.62, "learning_rate": 9.52291051240477e-10, "loss": 3.7458, "step": 4468000 }, { "epoch": 49.62, "learning_rate": 9.38409257490616e-10, "loss": 3.749, "step": 4468500 }, { "epoch": 49.63, "learning_rate": 9.245274637407546e-10, "loss": 3.7483, "step": 4469000 }, { "epoch": 49.64, "learning_rate": 9.106456699908935e-10, "loss": 3.7516, "step": 4469500 }, { "epoch": 49.64, "learning_rate": 8.967638762410324e-10, "loss": 3.7545, "step": 4470000 }, { "epoch": 49.65, "learning_rate": 8.828820824911711e-10, "loss": 3.7323, "step": 4470500 }, { "epoch": 49.65, "learning_rate": 8.6900028874131e-10, "loss": 3.7421, "step": 4471000 }, { "epoch": 49.66, "learning_rate": 8.551184949914489e-10, "loss": 3.727, "step": 4471500 }, { "epoch": 49.66, "learning_rate": 8.412367012415876e-10, "loss": 3.7473, "step": 4472000 }, { "epoch": 49.67, "learning_rate": 8.273549074917265e-10, "loss": 3.7502, "step": 4472500 }, { "epoch": 49.67, "learning_rate": 8.134731137418652e-10, "loss": 3.7533, "step": 4473000 }, { "epoch": 49.68, "learning_rate": 7.995913199920041e-10, "loss": 3.737, "step": 4473500 }, { "epoch": 49.69, "learning_rate": 7.857095262421429e-10, "loss": 3.7678, "step": 4474000 }, { "epoch": 49.69, "learning_rate": 7.718277324922817e-10, "loss": 3.7366, "step": 4474500 }, { "epoch": 49.7, "learning_rate": 7.579459387424205e-10, "loss": 3.7476, "step": 4475000 }, { "epoch": 49.7, "learning_rate": 7.440641449925594e-10, "loss": 3.7469, "step": 4475500 }, { "epoch": 49.71, "learning_rate": 7.301823512426981e-10, "loss": 3.7447, "step": 4476000 }, { "epoch": 49.71, "learning_rate": 7.16300557492837e-10, "loss": 3.7465, "step": 4476500 }, { "epoch": 49.72, "learning_rate": 7.024187637429757e-10, "loss": 3.7516, "step": 4477000 }, { "epoch": 49.72, "learning_rate": 6.885369699931146e-10, "loss": 3.765, "step": 4477500 }, { "epoch": 49.73, "learning_rate": 6.746551762432534e-10, "loss": 3.7353, "step": 4478000 }, { "epoch": 49.74, "learning_rate": 6.607733824933922e-10, "loss": 3.7609, "step": 4478500 }, { "epoch": 49.74, "learning_rate": 6.46891588743531e-10, "loss": 3.7521, "step": 4479000 }, { "epoch": 49.75, "learning_rate": 6.330097949936699e-10, "loss": 3.7518, "step": 4479500 }, { "epoch": 49.75, "learning_rate": 6.191280012438087e-10, "loss": 3.7477, "step": 4480000 }, { "epoch": 49.76, "learning_rate": 6.052462074939475e-10, "loss": 3.7675, "step": 4480500 }, { "epoch": 49.76, "learning_rate": 5.913644137440863e-10, "loss": 3.7429, "step": 4481000 }, { "epoch": 49.77, "learning_rate": 5.774826199942251e-10, "loss": 3.7585, "step": 4481500 }, { "epoch": 49.77, "learning_rate": 5.63600826244364e-10, "loss": 3.7566, "step": 4482000 }, { "epoch": 49.78, "learning_rate": 5.497190324945028e-10, "loss": 3.7519, "step": 4482500 }, { "epoch": 49.79, "learning_rate": 5.358372387446416e-10, "loss": 3.7361, "step": 4483000 }, { "epoch": 49.79, "learning_rate": 5.219554449947804e-10, "loss": 3.7402, "step": 4483500 }, { "epoch": 49.8, "learning_rate": 5.080736512449192e-10, "loss": 3.7263, "step": 4484000 }, { "epoch": 49.8, "learning_rate": 4.941918574950581e-10, "loss": 3.7414, "step": 4484500 }, { "epoch": 49.81, "learning_rate": 4.803100637451968e-10, "loss": 3.7291, "step": 4485000 }, { "epoch": 49.81, "learning_rate": 4.664282699953357e-10, "loss": 3.7426, "step": 4485500 }, { "epoch": 49.82, "learning_rate": 4.525464762454745e-10, "loss": 3.7538, "step": 4486000 }, { "epoch": 49.82, "learning_rate": 4.386646824956134e-10, "loss": 3.7286, "step": 4486500 }, { "epoch": 49.83, "learning_rate": 4.247828887457522e-10, "loss": 3.7406, "step": 4487000 }, { "epoch": 49.84, "learning_rate": 4.10901094995891e-10, "loss": 3.7444, "step": 4487500 }, { "epoch": 49.84, "learning_rate": 3.9701930124602974e-10, "loss": 3.7448, "step": 4488000 }, { "epoch": 49.85, "learning_rate": 3.831375074961686e-10, "loss": 3.7188, "step": 4488500 }, { "epoch": 49.85, "learning_rate": 3.692557137463074e-10, "loss": 3.7447, "step": 4489000 }, { "epoch": 49.86, "learning_rate": 3.5537391999644625e-10, "loss": 3.7544, "step": 4489500 }, { "epoch": 49.86, "learning_rate": 3.4149212624658505e-10, "loss": 3.7333, "step": 4490000 }, { "epoch": 49.87, "learning_rate": 3.276103324967239e-10, "loss": 3.7406, "step": 4490500 }, { "epoch": 49.87, "learning_rate": 3.137285387468627e-10, "loss": 3.7704, "step": 4491000 }, { "epoch": 49.88, "learning_rate": 2.998467449970015e-10, "loss": 3.7435, "step": 4491500 }, { "epoch": 49.89, "learning_rate": 2.8596495124714035e-10, "loss": 3.7519, "step": 4492000 }, { "epoch": 49.89, "learning_rate": 2.7208315749727915e-10, "loss": 3.7512, "step": 4492500 }, { "epoch": 49.9, "learning_rate": 2.58201363747418e-10, "loss": 3.7613, "step": 4493000 }, { "epoch": 49.9, "learning_rate": 2.443195699975568e-10, "loss": 3.7335, "step": 4493500 }, { "epoch": 49.91, "learning_rate": 2.3043777624769563e-10, "loss": 3.7446, "step": 4494000 }, { "epoch": 49.91, "learning_rate": 2.1655598249783443e-10, "loss": 3.7563, "step": 4494500 }, { "epoch": 49.92, "learning_rate": 2.0267418874797323e-10, "loss": 3.7504, "step": 4495000 }, { "epoch": 49.92, "learning_rate": 1.8879239499811206e-10, "loss": 3.7485, "step": 4495500 }, { "epoch": 49.93, "learning_rate": 1.7491060124825088e-10, "loss": 3.7562, "step": 4496000 }, { "epoch": 49.94, "learning_rate": 1.610288074983897e-10, "loss": 3.7513, "step": 4496500 }, { "epoch": 49.94, "learning_rate": 1.471470137485285e-10, "loss": 3.7387, "step": 4497000 }, { "epoch": 49.95, "learning_rate": 1.3326521999866734e-10, "loss": 3.7599, "step": 4497500 }, { "epoch": 49.95, "learning_rate": 1.1938342624880616e-10, "loss": 3.7616, "step": 4498000 }, { "epoch": 49.96, "learning_rate": 1.0550163249894498e-10, "loss": 3.7502, "step": 4498500 }, { "epoch": 49.96, "learning_rate": 9.16198387490838e-11, "loss": 3.7519, "step": 4499000 }, { "epoch": 49.97, "learning_rate": 7.773804499922262e-11, "loss": 3.74, "step": 4499500 }, { "epoch": 49.97, "learning_rate": 6.385625124936143e-11, "loss": 3.741, "step": 4500000 }, { "epoch": 49.98, "learning_rate": 4.9974457499500255e-11, "loss": 3.7423, "step": 4500500 }, { "epoch": 49.99, "learning_rate": 3.609266374963907e-11, "loss": 3.7529, "step": 4501000 }, { "epoch": 49.99, "learning_rate": 2.221086999977789e-11, "loss": 3.7412, "step": 4501500 }, { "epoch": 50.0, "learning_rate": 8.329076249916709e-12, "loss": 3.7532, "step": 4502000 }, { "epoch": 50.0, "eval_loss": 3.824197769165039, "eval_runtime": 6.3023, "eval_samples_per_second": 246.578, "step": 4502300 } ], "max_steps": 4502300, "num_train_epochs": 50, "total_flos": 2.1542227070270976e+17, "trial_name": null, "trial_params": null }