diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,16153 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 1312119, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999999809468501e-05, + "loss": 11.0183, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904734250476e-05, + "loss": 8.1808, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999809468500952e-05, + "loss": 7.4836, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999714202751428e-05, + "loss": 7.0935, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999618937001903e-05, + "loss": 6.8477, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9995236712523794e-05, + "loss": 6.6176, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999428596034354e-05, + "loss": 6.4164, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9993333302848294e-05, + "loss": 6.2092, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999238064535305e-05, + "loss": 6.0244, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999142798785781e-05, + "loss": 5.9575, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999047533036257e-05, + "loss": 5.7715, + "step": 5000 + }, + { + "epoch": 0.0, + "learning_rate": 4.998952267286733e-05, + "loss": 5.6966, + "step": 5500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9988570015372085e-05, + "loss": 5.516, + "step": 6000 + }, + { + "epoch": 0.0, + "learning_rate": 4.998761735787684e-05, + "loss": 5.4432, + "step": 6500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986664700381595e-05, + "loss": 5.3452, + "step": 7000 + }, + { + "epoch": 0.01, + "learning_rate": 4.998571204288636e-05, + "loss": 5.21, + "step": 7500 + }, + { + "epoch": 0.01, + "learning_rate": 4.998475938539112e-05, + "loss": 5.1467, + "step": 8000 + }, + { + "epoch": 0.01, + "learning_rate": 4.998380672789587e-05, + "loss": 5.0636, + "step": 8500 + }, + { + "epoch": 0.01, + "learning_rate": 4.998285597571562e-05, + "loss": 4.989, + "step": 9000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981905223535366e-05, + "loss": 4.8955, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980952566040125e-05, + "loss": 4.8076, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 4.997999990854488e-05, + "loss": 4.7676, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979047251049634e-05, + "loss": 4.7127, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 4.997809649886939e-05, + "loss": 4.5816, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 4.997714384137415e-05, + "loss": 4.507, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 4.99761911838789e-05, + "loss": 4.4491, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 4.997523852638366e-05, + "loss": 4.4011, + "step": 13000 + }, + { + "epoch": 0.01, + "learning_rate": 4.997428586888842e-05, + "loss": 4.3559, + "step": 13500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973333211393174e-05, + "loss": 4.3054, + "step": 14000 + }, + { + "epoch": 0.01, + "learning_rate": 4.997238055389793e-05, + "loss": 4.276, + "step": 14500 + }, + { + "epoch": 0.01, + "learning_rate": 4.997142789640269e-05, + "loss": 4.2676, + "step": 15000 + }, + { + "epoch": 0.01, + "learning_rate": 4.997047523890745e-05, + "loss": 4.165, + "step": 15500 + }, + { + "epoch": 0.01, + "learning_rate": 4.996952258141221e-05, + "loss": 4.1877, + "step": 16000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9968569923916966e-05, + "loss": 4.1484, + "step": 16500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9967617266421724e-05, + "loss": 4.1325, + "step": 17000 + }, + { + "epoch": 0.01, + "learning_rate": 4.996666651424147e-05, + "loss": 4.0963, + "step": 17500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9965713856746224e-05, + "loss": 4.0634, + "step": 18000 + }, + { + "epoch": 0.01, + "learning_rate": 4.996476119925099e-05, + "loss": 4.0009, + "step": 18500 + }, + { + "epoch": 0.01, + "learning_rate": 4.996381044707074e-05, + "loss": 3.9726, + "step": 19000 + }, + { + "epoch": 0.01, + "learning_rate": 4.996285778957549e-05, + "loss": 3.9568, + "step": 19500 + }, + { + "epoch": 0.02, + "learning_rate": 4.996190513208025e-05, + "loss": 3.9486, + "step": 20000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960952474585005e-05, + "loss": 3.9449, + "step": 20500 + }, + { + "epoch": 0.02, + "learning_rate": 4.995999981708976e-05, + "loss": 3.9204, + "step": 21000 + }, + { + "epoch": 0.02, + "learning_rate": 4.995904715959452e-05, + "loss": 3.9092, + "step": 21500 + }, + { + "epoch": 0.02, + "learning_rate": 4.995809450209928e-05, + "loss": 3.8644, + "step": 22000 + }, + { + "epoch": 0.02, + "learning_rate": 4.995714374991903e-05, + "loss": 3.8551, + "step": 22500 + }, + { + "epoch": 0.02, + "learning_rate": 4.995619109242378e-05, + "loss": 3.8465, + "step": 23000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9955238434928545e-05, + "loss": 3.8054, + "step": 23500 + }, + { + "epoch": 0.02, + "learning_rate": 4.99542857774333e-05, + "loss": 3.8097, + "step": 24000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9953333119938055e-05, + "loss": 3.7812, + "step": 24500 + }, + { + "epoch": 0.02, + "learning_rate": 4.995238046244281e-05, + "loss": 3.7919, + "step": 25000 + }, + { + "epoch": 0.02, + "learning_rate": 4.995142780494757e-05, + "loss": 3.7534, + "step": 25500 + }, + { + "epoch": 0.02, + "learning_rate": 4.995047514745233e-05, + "loss": 3.7539, + "step": 26000 + }, + { + "epoch": 0.02, + "learning_rate": 4.994952439527208e-05, + "loss": 3.7195, + "step": 26500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9948571737776836e-05, + "loss": 3.7117, + "step": 27000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9947620985596584e-05, + "loss": 3.7003, + "step": 27500 + }, + { + "epoch": 0.02, + "learning_rate": 4.994666832810134e-05, + "loss": 3.6942, + "step": 28000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9945715670606094e-05, + "loss": 3.671, + "step": 28500 + }, + { + "epoch": 0.02, + "learning_rate": 4.994476301311086e-05, + "loss": 3.68, + "step": 29000 + }, + { + "epoch": 0.02, + "learning_rate": 4.994381035561561e-05, + "loss": 3.6613, + "step": 29500 + }, + { + "epoch": 0.02, + "learning_rate": 4.994285769812037e-05, + "loss": 3.6584, + "step": 30000 + }, + { + "epoch": 0.02, + "eval_accuracy": 0.40299365911369156, + "eval_loss": 3.563416004180908, + "eval_runtime": 9454.6759, + "eval_samples_per_second": 29.085, + "eval_steps_per_second": 7.271, + "step": 30000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9941905040625134e-05, + "loss": 3.6474, + "step": 30500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940952383129886e-05, + "loss": 3.6035, + "step": 31000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939999725634644e-05, + "loss": 3.6543, + "step": 31500 + }, + { + "epoch": 0.02, + "learning_rate": 4.99390470681394e-05, + "loss": 3.6094, + "step": 32000 + }, + { + "epoch": 0.02, + "learning_rate": 4.993809631595915e-05, + "loss": 3.6101, + "step": 32500 + }, + { + "epoch": 0.03, + "learning_rate": 4.99371455637789e-05, + "loss": 3.5973, + "step": 33000 + }, + { + "epoch": 0.03, + "learning_rate": 4.993619290628366e-05, + "loss": 3.6071, + "step": 33500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9935240248788415e-05, + "loss": 3.5643, + "step": 34000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9934287591293174e-05, + "loss": 3.5414, + "step": 34500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9933334933797925e-05, + "loss": 3.54, + "step": 35000 + }, + { + "epoch": 0.03, + "learning_rate": 4.993238227630268e-05, + "loss": 3.5529, + "step": 35500 + }, + { + "epoch": 0.03, + "learning_rate": 4.993143152412244e-05, + "loss": 3.5395, + "step": 36000 + }, + { + "epoch": 0.03, + "learning_rate": 4.993047886662719e-05, + "loss": 3.5193, + "step": 36500 + }, + { + "epoch": 0.03, + "learning_rate": 4.992952620913195e-05, + "loss": 3.5087, + "step": 37000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9928573551636706e-05, + "loss": 3.4989, + "step": 37500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9927620894141465e-05, + "loss": 3.5179, + "step": 38000 + }, + { + "epoch": 0.03, + "learning_rate": 4.992666823664622e-05, + "loss": 3.5175, + "step": 38500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9925715579150975e-05, + "loss": 3.4805, + "step": 39000 + }, + { + "epoch": 0.03, + "learning_rate": 4.992476482697073e-05, + "loss": 3.4935, + "step": 39500 + }, + { + "epoch": 0.03, + "learning_rate": 4.992381216947549e-05, + "loss": 3.4869, + "step": 40000 + }, + { + "epoch": 0.03, + "learning_rate": 4.992285951198024e-05, + "loss": 3.4929, + "step": 40500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9921906854485e-05, + "loss": 3.4931, + "step": 41000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920954196989756e-05, + "loss": 3.4775, + "step": 41500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9920003444809504e-05, + "loss": 3.4333, + "step": 42000 + }, + { + "epoch": 0.03, + "learning_rate": 4.991905078731426e-05, + "loss": 3.4168, + "step": 42500 + }, + { + "epoch": 0.03, + "learning_rate": 4.991809812981902e-05, + "loss": 3.4649, + "step": 43000 + }, + { + "epoch": 0.03, + "learning_rate": 4.991714547232378e-05, + "loss": 3.4142, + "step": 43500 + }, + { + "epoch": 0.03, + "learning_rate": 4.991619472014353e-05, + "loss": 3.4316, + "step": 44000 + }, + { + "epoch": 0.03, + "learning_rate": 4.991524206264828e-05, + "loss": 3.4588, + "step": 44500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9914289405153044e-05, + "loss": 3.4399, + "step": 45000 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913336747657796e-05, + "loss": 3.4236, + "step": 45500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9912384090162554e-05, + "loss": 3.3971, + "step": 46000 + }, + { + "epoch": 0.04, + "learning_rate": 4.991143143266732e-05, + "loss": 3.4369, + "step": 46500 + }, + { + "epoch": 0.04, + "learning_rate": 4.991047877517207e-05, + "loss": 3.384, + "step": 47000 + }, + { + "epoch": 0.04, + "learning_rate": 4.990952611767683e-05, + "loss": 3.3818, + "step": 47500 + }, + { + "epoch": 0.04, + "learning_rate": 4.990857346018159e-05, + "loss": 3.3393, + "step": 48000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9907620802686345e-05, + "loss": 3.3585, + "step": 48500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9906670050506094e-05, + "loss": 3.3456, + "step": 49000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9905717393010845e-05, + "loss": 3.4045, + "step": 49500 + }, + { + "epoch": 0.04, + "learning_rate": 4.99047666408306e-05, + "loss": 3.3669, + "step": 50000 + }, + { + "epoch": 0.04, + "learning_rate": 4.990381398333536e-05, + "loss": 3.3354, + "step": 50500 + }, + { + "epoch": 0.04, + "learning_rate": 4.990286132584011e-05, + "loss": 3.3635, + "step": 51000 + }, + { + "epoch": 0.04, + "learning_rate": 4.990190866834487e-05, + "loss": 3.3211, + "step": 51500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9900956010849626e-05, + "loss": 3.3472, + "step": 52000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9900003353354385e-05, + "loss": 3.3663, + "step": 52500 + }, + { + "epoch": 0.04, + "learning_rate": 4.989905260117413e-05, + "loss": 3.3315, + "step": 53000 + }, + { + "epoch": 0.04, + "learning_rate": 4.989809994367889e-05, + "loss": 3.3305, + "step": 53500 + }, + { + "epoch": 0.04, + "learning_rate": 4.989714728618365e-05, + "loss": 3.3257, + "step": 54000 + }, + { + "epoch": 0.04, + "learning_rate": 4.989619462868841e-05, + "loss": 3.2911, + "step": 54500 + }, + { + "epoch": 0.04, + "learning_rate": 4.989524387650815e-05, + "loss": 3.3134, + "step": 55000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9894291219012914e-05, + "loss": 3.3043, + "step": 55500 + }, + { + "epoch": 0.04, + "learning_rate": 4.989333856151767e-05, + "loss": 3.291, + "step": 56000 + }, + { + "epoch": 0.04, + "learning_rate": 4.9892385904022424e-05, + "loss": 3.3177, + "step": 56500 + }, + { + "epoch": 0.04, + "learning_rate": 4.989143324652719e-05, + "loss": 3.3043, + "step": 57000 + }, + { + "epoch": 0.04, + "learning_rate": 4.989048058903194e-05, + "loss": 3.2928, + "step": 57500 + }, + { + "epoch": 0.04, + "learning_rate": 4.98895279315367e-05, + "loss": 3.2675, + "step": 58000 + }, + { + "epoch": 0.04, + "learning_rate": 4.988857527404146e-05, + "loss": 3.2713, + "step": 58500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9887624521861206e-05, + "loss": 3.2884, + "step": 59000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9886671864365964e-05, + "loss": 3.304, + "step": 59500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9885719206870716e-05, + "loss": 3.2648, + "step": 60000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.4417787159886874, + "eval_loss": 3.2018163204193115, + "eval_runtime": 9409.4577, + "eval_samples_per_second": 29.225, + "eval_steps_per_second": 7.306, + "step": 60000 + }, + { + "epoch": 0.05, + "learning_rate": 4.988476654937548e-05, + "loss": 3.2698, + "step": 60500 + }, + { + "epoch": 0.05, + "learning_rate": 4.988381389188024e-05, + "loss": 3.2763, + "step": 61000 + }, + { + "epoch": 0.05, + "learning_rate": 4.988286123438499e-05, + "loss": 3.2728, + "step": 61500 + }, + { + "epoch": 0.05, + "learning_rate": 4.988190857688975e-05, + "loss": 3.259, + "step": 62000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9880957824709504e-05, + "loss": 3.256, + "step": 62500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9880005167214255e-05, + "loss": 3.2937, + "step": 63000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9879052509719014e-05, + "loss": 3.2598, + "step": 63500 + }, + { + "epoch": 0.05, + "learning_rate": 4.987809985222377e-05, + "loss": 3.2054, + "step": 64000 + }, + { + "epoch": 0.05, + "learning_rate": 4.987714719472853e-05, + "loss": 3.2119, + "step": 64500 + }, + { + "epoch": 0.05, + "learning_rate": 4.987619453723329e-05, + "loss": 3.2475, + "step": 65000 + }, + { + "epoch": 0.05, + "learning_rate": 4.987524187973805e-05, + "loss": 3.2348, + "step": 65500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9874291127557795e-05, + "loss": 3.2456, + "step": 66000 + }, + { + "epoch": 0.05, + "learning_rate": 4.987333847006255e-05, + "loss": 3.2274, + "step": 66500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9872385812567305e-05, + "loss": 3.2269, + "step": 67000 + }, + { + "epoch": 0.05, + "learning_rate": 4.987143315507207e-05, + "loss": 3.2275, + "step": 67500 + }, + { + "epoch": 0.05, + "learning_rate": 4.987048049757682e-05, + "loss": 3.2366, + "step": 68000 + }, + { + "epoch": 0.05, + "learning_rate": 4.986952784008158e-05, + "loss": 3.2054, + "step": 68500 + }, + { + "epoch": 0.05, + "learning_rate": 4.986857708790133e-05, + "loss": 3.2311, + "step": 69000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9867624430406086e-05, + "loss": 3.1811, + "step": 69500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9866671772910844e-05, + "loss": 3.1802, + "step": 70000 + }, + { + "epoch": 0.05, + "learning_rate": 4.9865719115415596e-05, + "loss": 3.2048, + "step": 70500 + }, + { + "epoch": 0.05, + "learning_rate": 4.986476645792036e-05, + "loss": 3.2102, + "step": 71000 + }, + { + "epoch": 0.05, + "learning_rate": 4.986381380042512e-05, + "loss": 3.1934, + "step": 71500 + }, + { + "epoch": 0.05, + "learning_rate": 4.986286114292987e-05, + "loss": 3.1902, + "step": 72000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9861908485434636e-05, + "loss": 3.1633, + "step": 72500 + }, + { + "epoch": 0.06, + "learning_rate": 4.986095582793939e-05, + "loss": 3.1777, + "step": 73000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9860003170444146e-05, + "loss": 3.1979, + "step": 73500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9859052418263894e-05, + "loss": 3.2044, + "step": 74000 + }, + { + "epoch": 0.06, + "learning_rate": 4.985809976076865e-05, + "loss": 3.1902, + "step": 74500 + }, + { + "epoch": 0.06, + "learning_rate": 4.985714710327341e-05, + "loss": 3.1951, + "step": 75000 + }, + { + "epoch": 0.06, + "learning_rate": 4.985619635109316e-05, + "loss": 3.1676, + "step": 75500 + }, + { + "epoch": 0.06, + "learning_rate": 4.985524369359792e-05, + "loss": 3.1914, + "step": 76000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9854291036102675e-05, + "loss": 3.1693, + "step": 76500 + }, + { + "epoch": 0.06, + "learning_rate": 4.985333837860743e-05, + "loss": 3.1765, + "step": 77000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9852385721112185e-05, + "loss": 3.1891, + "step": 77500 + }, + { + "epoch": 0.06, + "learning_rate": 4.985143306361695e-05, + "loss": 3.183, + "step": 78000 + }, + { + "epoch": 0.06, + "learning_rate": 4.98504804061217e-05, + "loss": 3.1294, + "step": 78500 + }, + { + "epoch": 0.06, + "learning_rate": 4.984952774862646e-05, + "loss": 3.1808, + "step": 79000 + }, + { + "epoch": 0.06, + "learning_rate": 4.98485789017612e-05, + "loss": 3.1302, + "step": 79500 + }, + { + "epoch": 0.06, + "learning_rate": 4.984762624426596e-05, + "loss": 3.1417, + "step": 80000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9846675492085705e-05, + "loss": 3.1261, + "step": 80500 + }, + { + "epoch": 0.06, + "learning_rate": 4.984572283459046e-05, + "loss": 3.1386, + "step": 81000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9844770177095215e-05, + "loss": 3.1012, + "step": 81500 + }, + { + "epoch": 0.06, + "learning_rate": 4.984381751959998e-05, + "loss": 3.1128, + "step": 82000 + }, + { + "epoch": 0.06, + "learning_rate": 4.984286486210474e-05, + "loss": 3.1119, + "step": 82500 + }, + { + "epoch": 0.06, + "learning_rate": 4.984191220460949e-05, + "loss": 3.1521, + "step": 83000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9840959547114255e-05, + "loss": 3.1592, + "step": 83500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9840006889619006e-05, + "loss": 3.1215, + "step": 84000 + }, + { + "epoch": 0.06, + "learning_rate": 4.9839054232123764e-05, + "loss": 3.1312, + "step": 84500 + }, + { + "epoch": 0.06, + "learning_rate": 4.983810157462852e-05, + "loss": 3.1187, + "step": 85000 + }, + { + "epoch": 0.07, + "learning_rate": 4.983715082244827e-05, + "loss": 3.1045, + "step": 85500 + }, + { + "epoch": 0.07, + "learning_rate": 4.983619816495303e-05, + "loss": 3.1085, + "step": 86000 + }, + { + "epoch": 0.07, + "learning_rate": 4.983524550745778e-05, + "loss": 3.1197, + "step": 86500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9834292849962546e-05, + "loss": 3.1308, + "step": 87000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9833342097782294e-05, + "loss": 3.0803, + "step": 87500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9832389440287046e-05, + "loss": 3.1056, + "step": 88000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9831436782791804e-05, + "loss": 3.0895, + "step": 88500 + }, + { + "epoch": 0.07, + "learning_rate": 4.983048412529657e-05, + "loss": 3.0868, + "step": 89000 + }, + { + "epoch": 0.07, + "learning_rate": 4.982953146780132e-05, + "loss": 3.1079, + "step": 89500 + }, + { + "epoch": 0.07, + "learning_rate": 4.982858071562107e-05, + "loss": 3.0978, + "step": 90000 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.4609307256219239, + "eval_loss": 3.0301129817962646, + "eval_runtime": 9409.0489, + "eval_samples_per_second": 29.226, + "eval_steps_per_second": 7.306, + "step": 90000 + }, + { + "epoch": 0.07, + "learning_rate": 4.982762996344082e-05, + "loss": 3.1018, + "step": 90500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9826677305945575e-05, + "loss": 3.1006, + "step": 91000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9825724648450334e-05, + "loss": 3.0716, + "step": 91500 + }, + { + "epoch": 0.07, + "learning_rate": 4.982477199095509e-05, + "loss": 3.0794, + "step": 92000 + }, + { + "epoch": 0.07, + "learning_rate": 4.982381933345985e-05, + "loss": 3.1029, + "step": 92500 + }, + { + "epoch": 0.07, + "learning_rate": 4.982286667596461e-05, + "loss": 3.088, + "step": 93000 + }, + { + "epoch": 0.07, + "learning_rate": 4.982191401846936e-05, + "loss": 3.0811, + "step": 93500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9820961360974125e-05, + "loss": 3.0673, + "step": 94000 + }, + { + "epoch": 0.07, + "learning_rate": 4.982000870347888e-05, + "loss": 3.045, + "step": 94500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9819057951298625e-05, + "loss": 3.063, + "step": 95000 + }, + { + "epoch": 0.07, + "learning_rate": 4.981810529380338e-05, + "loss": 3.0866, + "step": 95500 + }, + { + "epoch": 0.07, + "learning_rate": 4.981715263630814e-05, + "loss": 3.067, + "step": 96000 + }, + { + "epoch": 0.07, + "learning_rate": 4.98161999788129e-05, + "loss": 3.057, + "step": 96500 + }, + { + "epoch": 0.07, + "learning_rate": 4.981524732131766e-05, + "loss": 3.0727, + "step": 97000 + }, + { + "epoch": 0.07, + "learning_rate": 4.9814296569137406e-05, + "loss": 3.0557, + "step": 97500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9813343911642165e-05, + "loss": 3.0411, + "step": 98000 + }, + { + "epoch": 0.08, + "learning_rate": 4.981239125414692e-05, + "loss": 3.0588, + "step": 98500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9811438596651674e-05, + "loss": 3.0624, + "step": 99000 + }, + { + "epoch": 0.08, + "learning_rate": 4.981048593915644e-05, + "loss": 3.0663, + "step": 99500 + }, + { + "epoch": 0.08, + "learning_rate": 4.980953328166119e-05, + "loss": 3.0606, + "step": 100000 + }, + { + "epoch": 0.08, + "learning_rate": 4.980858062416595e-05, + "loss": 3.0231, + "step": 100500 + }, + { + "epoch": 0.08, + "learning_rate": 4.98076298719857e-05, + "loss": 3.0685, + "step": 101000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9806677214490456e-05, + "loss": 3.0324, + "step": 101500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9805724556995214e-05, + "loss": 3.0673, + "step": 102000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9804771899499966e-05, + "loss": 3.0388, + "step": 102500 + }, + { + "epoch": 0.08, + "learning_rate": 4.980381924200473e-05, + "loss": 3.0676, + "step": 103000 + }, + { + "epoch": 0.08, + "learning_rate": 4.980286848982448e-05, + "loss": 3.0401, + "step": 103500 + }, + { + "epoch": 0.08, + "learning_rate": 4.980191583232923e-05, + "loss": 3.0467, + "step": 104000 + }, + { + "epoch": 0.08, + "learning_rate": 4.980096317483399e-05, + "loss": 3.0953, + "step": 104500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9800010517338754e-05, + "loss": 3.0276, + "step": 105000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799057859843505e-05, + "loss": 3.0124, + "step": 105500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9798105202348264e-05, + "loss": 3.0765, + "step": 106000 + }, + { + "epoch": 0.08, + "learning_rate": 4.979715445016802e-05, + "loss": 3.0445, + "step": 106500 + }, + { + "epoch": 0.08, + "learning_rate": 4.979620179267277e-05, + "loss": 3.0133, + "step": 107000 + }, + { + "epoch": 0.08, + "learning_rate": 4.979524913517753e-05, + "loss": 3.0313, + "step": 107500 + }, + { + "epoch": 0.08, + "learning_rate": 4.979429647768229e-05, + "loss": 3.0353, + "step": 108000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9793343820187045e-05, + "loss": 3.0559, + "step": 108500 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792393068006793e-05, + "loss": 3.0513, + "step": 109000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9791440410511545e-05, + "loss": 3.0325, + "step": 109500 + }, + { + "epoch": 0.08, + "learning_rate": 4.979048775301631e-05, + "loss": 2.9789, + "step": 110000 + }, + { + "epoch": 0.08, + "learning_rate": 4.978953509552106e-05, + "loss": 3.0123, + "step": 110500 + }, + { + "epoch": 0.08, + "learning_rate": 4.978858243802582e-05, + "loss": 3.0376, + "step": 111000 + }, + { + "epoch": 0.08, + "learning_rate": 4.978763168584557e-05, + "loss": 2.9944, + "step": 111500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9786679028350326e-05, + "loss": 3.0379, + "step": 112000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9785726370855085e-05, + "loss": 3.037, + "step": 112500 + }, + { + "epoch": 0.09, + "learning_rate": 4.978477371335984e-05, + "loss": 3.033, + "step": 113000 + }, + { + "epoch": 0.09, + "learning_rate": 4.978382296117959e-05, + "loss": 3.0064, + "step": 113500 + }, + { + "epoch": 0.09, + "learning_rate": 4.978287030368435e-05, + "loss": 3.0156, + "step": 114000 + }, + { + "epoch": 0.09, + "learning_rate": 4.978191764618911e-05, + "loss": 2.982, + "step": 114500 + }, + { + "epoch": 0.09, + "learning_rate": 4.978096498869386e-05, + "loss": 2.9832, + "step": 115000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9780012331198624e-05, + "loss": 2.9951, + "step": 115500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9779059673703376e-05, + "loss": 3.0045, + "step": 116000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9778107016208134e-05, + "loss": 3.0056, + "step": 116500 + }, + { + "epoch": 0.09, + "learning_rate": 4.977715626402788e-05, + "loss": 3.0038, + "step": 117000 + }, + { + "epoch": 0.09, + "learning_rate": 4.977620360653264e-05, + "loss": 3.0, + "step": 117500 + }, + { + "epoch": 0.09, + "learning_rate": 4.97752509490374e-05, + "loss": 2.9905, + "step": 118000 + }, + { + "epoch": 0.09, + "learning_rate": 4.977429829154216e-05, + "loss": 2.9814, + "step": 118500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9773345634046916e-05, + "loss": 3.015, + "step": 119000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772392976551674e-05, + "loss": 2.9816, + "step": 119500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9771440319056425e-05, + "loss": 2.9834, + "step": 120000 + }, + { + "epoch": 0.09, + "eval_accuracy": 0.4731336645831918, + "eval_loss": 2.9267640113830566, + "eval_runtime": 9410.96, + "eval_samples_per_second": 29.22, + "eval_steps_per_second": 7.305, + "step": 120000 + }, + { + "epoch": 0.09, + "learning_rate": 4.977048766156119e-05, + "loss": 3.017, + "step": 120500 + }, + { + "epoch": 0.09, + "learning_rate": 4.976953690938094e-05, + "loss": 2.9735, + "step": 121000 + }, + { + "epoch": 0.09, + "learning_rate": 4.976858425188569e-05, + "loss": 2.9824, + "step": 121500 + }, + { + "epoch": 0.09, + "learning_rate": 4.976763349970544e-05, + "loss": 3.014, + "step": 122000 + }, + { + "epoch": 0.09, + "learning_rate": 4.9766680842210204e-05, + "loss": 2.986, + "step": 122500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9765730090029945e-05, + "loss": 2.9826, + "step": 123000 + }, + { + "epoch": 0.09, + "learning_rate": 4.97647774325347e-05, + "loss": 2.9521, + "step": 123500 + }, + { + "epoch": 0.09, + "learning_rate": 4.976382477503946e-05, + "loss": 2.9768, + "step": 124000 + }, + { + "epoch": 0.09, + "learning_rate": 4.976287211754422e-05, + "loss": 2.9776, + "step": 124500 + }, + { + "epoch": 0.1, + "learning_rate": 4.976191946004898e-05, + "loss": 2.9986, + "step": 125000 + }, + { + "epoch": 0.1, + "learning_rate": 4.976096680255373e-05, + "loss": 3.0055, + "step": 125500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9760014145058495e-05, + "loss": 2.9619, + "step": 126000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9759061487563246e-05, + "loss": 2.9778, + "step": 126500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9758108830068005e-05, + "loss": 2.9979, + "step": 127000 + }, + { + "epoch": 0.1, + "learning_rate": 4.975715617257277e-05, + "loss": 2.9515, + "step": 127500 + }, + { + "epoch": 0.1, + "learning_rate": 4.975620542039251e-05, + "loss": 2.9911, + "step": 128000 + }, + { + "epoch": 0.1, + "learning_rate": 4.975525276289727e-05, + "loss": 2.999, + "step": 128500 + }, + { + "epoch": 0.1, + "learning_rate": 4.975430010540203e-05, + "loss": 2.9594, + "step": 129000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9753347447906786e-05, + "loss": 2.9552, + "step": 129500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9752394790411544e-05, + "loss": 2.9616, + "step": 130000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9751442132916296e-05, + "loss": 2.9647, + "step": 130500 + }, + { + "epoch": 0.1, + "learning_rate": 4.975048947542106e-05, + "loss": 2.9816, + "step": 131000 + }, + { + "epoch": 0.1, + "learning_rate": 4.974953681792582e-05, + "loss": 2.9796, + "step": 131500 + }, + { + "epoch": 0.1, + "learning_rate": 4.974858416043057e-05, + "loss": 2.9673, + "step": 132000 + }, + { + "epoch": 0.1, + "learning_rate": 4.974763340825032e-05, + "loss": 2.956, + "step": 132500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9746680750755084e-05, + "loss": 2.9462, + "step": 133000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9745728093259836e-05, + "loss": 2.952, + "step": 133500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9744775435764594e-05, + "loss": 3.0012, + "step": 134000 + }, + { + "epoch": 0.1, + "learning_rate": 4.974382277826935e-05, + "loss": 2.9776, + "step": 134500 + }, + { + "epoch": 0.1, + "learning_rate": 4.974287012077411e-05, + "loss": 2.9598, + "step": 135000 + }, + { + "epoch": 0.1, + "learning_rate": 4.974191936859386e-05, + "loss": 2.9493, + "step": 135500 + }, + { + "epoch": 0.1, + "learning_rate": 4.974096671109861e-05, + "loss": 2.9561, + "step": 136000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9740014053603375e-05, + "loss": 2.9514, + "step": 136500 + }, + { + "epoch": 0.1, + "learning_rate": 4.973906139610813e-05, + "loss": 2.9681, + "step": 137000 + }, + { + "epoch": 0.1, + "learning_rate": 4.9738112549242865e-05, + "loss": 2.9495, + "step": 137500 + }, + { + "epoch": 0.11, + "learning_rate": 4.973716179706261e-05, + "loss": 2.9459, + "step": 138000 + }, + { + "epoch": 0.11, + "learning_rate": 4.973620913956737e-05, + "loss": 2.9566, + "step": 138500 + }, + { + "epoch": 0.11, + "learning_rate": 4.973525648207213e-05, + "loss": 2.9313, + "step": 139000 + }, + { + "epoch": 0.11, + "learning_rate": 4.973430382457689e-05, + "loss": 2.9254, + "step": 139500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9733351167081647e-05, + "loss": 2.9166, + "step": 140000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9732398509586405e-05, + "loss": 2.9289, + "step": 140500 + }, + { + "epoch": 0.11, + "learning_rate": 4.973144585209116e-05, + "loss": 2.9558, + "step": 141000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9730493194595915e-05, + "loss": 2.9195, + "step": 141500 + }, + { + "epoch": 0.11, + "learning_rate": 4.972954053710068e-05, + "loss": 2.9223, + "step": 142000 + }, + { + "epoch": 0.11, + "learning_rate": 4.972858978492043e-05, + "loss": 2.9649, + "step": 142500 + }, + { + "epoch": 0.11, + "learning_rate": 4.972763712742518e-05, + "loss": 2.9333, + "step": 143000 + }, + { + "epoch": 0.11, + "learning_rate": 4.972668446992994e-05, + "loss": 2.9285, + "step": 143500 + }, + { + "epoch": 0.11, + "learning_rate": 4.97257318124347e-05, + "loss": 2.9219, + "step": 144000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9724779154939454e-05, + "loss": 2.953, + "step": 144500 + }, + { + "epoch": 0.11, + "learning_rate": 4.972382649744421e-05, + "loss": 2.9132, + "step": 145000 + }, + { + "epoch": 0.11, + "learning_rate": 4.972287383994897e-05, + "loss": 2.9079, + "step": 145500 + }, + { + "epoch": 0.11, + "learning_rate": 4.972192118245373e-05, + "loss": 2.9099, + "step": 146000 + }, + { + "epoch": 0.11, + "learning_rate": 4.972097043027348e-05, + "loss": 2.9591, + "step": 146500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9720017772778236e-05, + "loss": 2.9362, + "step": 147000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9719067020597984e-05, + "loss": 2.8915, + "step": 147500 + }, + { + "epoch": 0.11, + "learning_rate": 4.971811436310274e-05, + "loss": 2.9103, + "step": 148000 + }, + { + "epoch": 0.11, + "learning_rate": 4.9717161705607494e-05, + "loss": 2.9463, + "step": 148500 + }, + { + "epoch": 0.11, + "learning_rate": 4.971620904811226e-05, + "loss": 2.9158, + "step": 149000 + }, + { + "epoch": 0.11, + "learning_rate": 4.971525639061701e-05, + "loss": 2.9283, + "step": 149500 + }, + { + "epoch": 0.11, + "learning_rate": 4.971430563843676e-05, + "loss": 2.9484, + "step": 150000 + }, + { + "epoch": 0.11, + "eval_accuracy": 0.4822463949408844, + "eval_loss": 2.8511881828308105, + "eval_runtime": 9418.6976, + "eval_samples_per_second": 29.196, + "eval_steps_per_second": 7.299, + "step": 150000 + }, + { + "epoch": 0.11, + "learning_rate": 4.971335298094152e-05, + "loss": 2.9239, + "step": 150500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9712400323446275e-05, + "loss": 2.9036, + "step": 151000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9711447665951034e-05, + "loss": 2.915, + "step": 151500 + }, + { + "epoch": 0.12, + "learning_rate": 4.971049500845579e-05, + "loss": 2.9037, + "step": 152000 + }, + { + "epoch": 0.12, + "learning_rate": 4.970954235096055e-05, + "loss": 2.9162, + "step": 152500 + }, + { + "epoch": 0.12, + "learning_rate": 4.970858969346531e-05, + "loss": 2.9086, + "step": 153000 + }, + { + "epoch": 0.12, + "learning_rate": 4.970763703597006e-05, + "loss": 2.9218, + "step": 153500 + }, + { + "epoch": 0.12, + "learning_rate": 4.970668628378981e-05, + "loss": 2.904, + "step": 154000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9705735531609556e-05, + "loss": 2.915, + "step": 154500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9704782874114315e-05, + "loss": 2.9285, + "step": 155000 + }, + { + "epoch": 0.12, + "learning_rate": 4.970383021661907e-05, + "loss": 2.9272, + "step": 155500 + }, + { + "epoch": 0.12, + "learning_rate": 4.970287755912383e-05, + "loss": 2.9167, + "step": 156000 + }, + { + "epoch": 0.12, + "learning_rate": 4.970192490162859e-05, + "loss": 2.9075, + "step": 156500 + }, + { + "epoch": 0.12, + "learning_rate": 4.970097224413335e-05, + "loss": 2.9193, + "step": 157000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9700021491953096e-05, + "loss": 2.9223, + "step": 157500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9699068834457854e-05, + "loss": 2.9181, + "step": 158000 + }, + { + "epoch": 0.12, + "learning_rate": 4.969811617696261e-05, + "loss": 2.9265, + "step": 158500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9697163519467364e-05, + "loss": 2.9026, + "step": 159000 + }, + { + "epoch": 0.12, + "learning_rate": 4.969621086197212e-05, + "loss": 2.8882, + "step": 159500 + }, + { + "epoch": 0.12, + "learning_rate": 4.969525820447689e-05, + "loss": 2.9127, + "step": 160000 + }, + { + "epoch": 0.12, + "learning_rate": 4.969430554698164e-05, + "loss": 2.9058, + "step": 160500 + }, + { + "epoch": 0.12, + "learning_rate": 4.96933528894864e-05, + "loss": 2.8779, + "step": 161000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9692400231991156e-05, + "loss": 2.9242, + "step": 161500 + }, + { + "epoch": 0.12, + "learning_rate": 4.9691447574495914e-05, + "loss": 2.894, + "step": 162000 + }, + { + "epoch": 0.12, + "learning_rate": 4.969049682231566e-05, + "loss": 2.8995, + "step": 162500 + }, + { + "epoch": 0.12, + "learning_rate": 4.968954416482042e-05, + "loss": 2.904, + "step": 163000 + }, + { + "epoch": 0.12, + "learning_rate": 4.968859150732518e-05, + "loss": 2.8794, + "step": 163500 + }, + { + "epoch": 0.12, + "learning_rate": 4.968763884982993e-05, + "loss": 2.9044, + "step": 164000 + }, + { + "epoch": 0.13, + "learning_rate": 4.968668619233469e-05, + "loss": 2.8428, + "step": 164500 + }, + { + "epoch": 0.13, + "learning_rate": 4.9685733534839454e-05, + "loss": 2.8494, + "step": 165000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9684782782659195e-05, + "loss": 2.9191, + "step": 165500 + }, + { + "epoch": 0.13, + "learning_rate": 4.9683830125163954e-05, + "loss": 2.8787, + "step": 166000 + }, + { + "epoch": 0.13, + "learning_rate": 4.968287746766871e-05, + "loss": 2.9042, + "step": 166500 + }, + { + "epoch": 0.13, + "learning_rate": 4.968192481017347e-05, + "loss": 2.9144, + "step": 167000 + }, + { + "epoch": 0.13, + "learning_rate": 4.968097405799322e-05, + "loss": 2.8736, + "step": 167500 + }, + { + "epoch": 0.13, + "learning_rate": 4.968002140049798e-05, + "loss": 2.876, + "step": 168000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9679068743002735e-05, + "loss": 2.8678, + "step": 168500 + }, + { + "epoch": 0.13, + "learning_rate": 4.967811799082248e-05, + "loss": 2.8589, + "step": 169000 + }, + { + "epoch": 0.13, + "learning_rate": 4.967716533332724e-05, + "loss": 2.9275, + "step": 169500 + }, + { + "epoch": 0.13, + "learning_rate": 4.967621267583199e-05, + "loss": 2.8589, + "step": 170000 + }, + { + "epoch": 0.13, + "learning_rate": 4.967526001833676e-05, + "loss": 2.8833, + "step": 170500 + }, + { + "epoch": 0.13, + "learning_rate": 4.967430736084151e-05, + "loss": 2.8853, + "step": 171000 + }, + { + "epoch": 0.13, + "learning_rate": 4.967335470334627e-05, + "loss": 2.8735, + "step": 171500 + }, + { + "epoch": 0.13, + "learning_rate": 4.9672402045851026e-05, + "loss": 2.8955, + "step": 172000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9671451293670774e-05, + "loss": 2.8867, + "step": 172500 + }, + { + "epoch": 0.13, + "learning_rate": 4.967049863617553e-05, + "loss": 2.8953, + "step": 173000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9669545978680284e-05, + "loss": 2.8616, + "step": 173500 + }, + { + "epoch": 0.13, + "learning_rate": 4.966859332118505e-05, + "loss": 2.863, + "step": 174000 + }, + { + "epoch": 0.13, + "learning_rate": 4.966764066368981e-05, + "loss": 2.858, + "step": 174500 + }, + { + "epoch": 0.13, + "learning_rate": 4.966668800619456e-05, + "loss": 2.8498, + "step": 175000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9665735348699324e-05, + "loss": 2.8584, + "step": 175500 + }, + { + "epoch": 0.13, + "learning_rate": 4.9664782691204076e-05, + "loss": 2.9082, + "step": 176000 + }, + { + "epoch": 0.13, + "learning_rate": 4.9663830033708834e-05, + "loss": 2.8263, + "step": 176500 + }, + { + "epoch": 0.13, + "learning_rate": 4.966287737621359e-05, + "loss": 2.8803, + "step": 177000 + }, + { + "epoch": 0.14, + "learning_rate": 4.966192662403334e-05, + "loss": 2.8448, + "step": 177500 + }, + { + "epoch": 0.14, + "learning_rate": 4.96609739665381e-05, + "loss": 2.8578, + "step": 178000 + }, + { + "epoch": 0.14, + "learning_rate": 4.966002130904285e-05, + "loss": 2.8743, + "step": 178500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9659068651547615e-05, + "loss": 2.8525, + "step": 179000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9658115994052374e-05, + "loss": 2.8555, + "step": 179500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9657165241872115e-05, + "loss": 2.8477, + "step": 180000 + }, + { + "epoch": 0.14, + "eval_accuracy": 0.4890978909293843, + "eval_loss": 2.795215129852295, + "eval_runtime": 9432.8544, + "eval_samples_per_second": 29.152, + "eval_steps_per_second": 7.288, + "step": 180000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9656212584376874e-05, + "loss": 2.8582, + "step": 180500 + }, + { + "epoch": 0.14, + "learning_rate": 4.965526183219663e-05, + "loss": 2.8457, + "step": 181000 + }, + { + "epoch": 0.14, + "learning_rate": 4.965430917470138e-05, + "loss": 2.8208, + "step": 181500 + }, + { + "epoch": 0.14, + "learning_rate": 4.965335651720614e-05, + "loss": 2.8652, + "step": 182000 + }, + { + "epoch": 0.14, + "learning_rate": 4.96524038597109e-05, + "loss": 2.8593, + "step": 182500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9651451202215655e-05, + "loss": 2.8295, + "step": 183000 + }, + { + "epoch": 0.14, + "learning_rate": 4.965049854472041e-05, + "loss": 2.86, + "step": 183500 + }, + { + "epoch": 0.14, + "learning_rate": 4.964954588722517e-05, + "loss": 2.8383, + "step": 184000 + }, + { + "epoch": 0.14, + "learning_rate": 4.964859322972993e-05, + "loss": 2.8651, + "step": 184500 + }, + { + "epoch": 0.14, + "learning_rate": 4.964764438286467e-05, + "loss": 2.8398, + "step": 185000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9646691725369426e-05, + "loss": 2.8199, + "step": 185500 + }, + { + "epoch": 0.14, + "learning_rate": 4.964573906787418e-05, + "loss": 2.8475, + "step": 186000 + }, + { + "epoch": 0.14, + "learning_rate": 4.964478641037894e-05, + "loss": 2.8669, + "step": 186500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9643833752883694e-05, + "loss": 2.7931, + "step": 187000 + }, + { + "epoch": 0.14, + "learning_rate": 4.964288300070344e-05, + "loss": 2.8364, + "step": 187500 + }, + { + "epoch": 0.14, + "learning_rate": 4.96419303432082e-05, + "loss": 2.8269, + "step": 188000 + }, + { + "epoch": 0.14, + "learning_rate": 4.964097768571296e-05, + "loss": 2.8357, + "step": 188500 + }, + { + "epoch": 0.14, + "learning_rate": 4.964002502821772e-05, + "loss": 2.836, + "step": 189000 + }, + { + "epoch": 0.14, + "learning_rate": 4.9639072370722476e-05, + "loss": 2.8448, + "step": 189500 + }, + { + "epoch": 0.14, + "learning_rate": 4.9638119713227234e-05, + "loss": 2.8249, + "step": 190000 + }, + { + "epoch": 0.15, + "learning_rate": 4.963716705573199e-05, + "loss": 2.8253, + "step": 190500 + }, + { + "epoch": 0.15, + "learning_rate": 4.9636214398236744e-05, + "loss": 2.8479, + "step": 191000 + }, + { + "epoch": 0.15, + "learning_rate": 4.963526174074151e-05, + "loss": 2.8745, + "step": 191500 + }, + { + "epoch": 0.15, + "learning_rate": 4.963431098856126e-05, + "loss": 2.8462, + "step": 192000 + }, + { + "epoch": 0.15, + "learning_rate": 4.963335833106601e-05, + "loss": 2.8358, + "step": 192500 + }, + { + "epoch": 0.15, + "learning_rate": 4.963240757888576e-05, + "loss": 2.8586, + "step": 193000 + }, + { + "epoch": 0.15, + "learning_rate": 4.963145492139052e-05, + "loss": 2.8598, + "step": 193500 + }, + { + "epoch": 0.15, + "learning_rate": 4.9630502263895274e-05, + "loss": 2.8436, + "step": 194000 + }, + { + "epoch": 0.15, + "learning_rate": 4.962954960640003e-05, + "loss": 2.8385, + "step": 194500 + }, + { + "epoch": 0.15, + "learning_rate": 4.962859694890479e-05, + "loss": 2.8045, + "step": 195000 + }, + { + "epoch": 0.15, + "learning_rate": 4.962764429140955e-05, + "loss": 2.8111, + "step": 195500 + }, + { + "epoch": 0.15, + "learning_rate": 4.96266916339143e-05, + "loss": 2.8257, + "step": 196000 + }, + { + "epoch": 0.15, + "learning_rate": 4.962573897641906e-05, + "loss": 2.87, + "step": 196500 + }, + { + "epoch": 0.15, + "learning_rate": 4.9624788224238813e-05, + "loss": 2.8326, + "step": 197000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9623835566743565e-05, + "loss": 2.8528, + "step": 197500 + }, + { + "epoch": 0.15, + "learning_rate": 4.962288290924832e-05, + "loss": 2.8219, + "step": 198000 + }, + { + "epoch": 0.15, + "learning_rate": 4.962193025175308e-05, + "loss": 2.843, + "step": 198500 + }, + { + "epoch": 0.15, + "learning_rate": 4.962097759425784e-05, + "loss": 2.8355, + "step": 199000 + }, + { + "epoch": 0.15, + "learning_rate": 4.962002684207759e-05, + "loss": 2.851, + "step": 199500 + }, + { + "epoch": 0.15, + "learning_rate": 4.9619074184582346e-05, + "loss": 2.7945, + "step": 200000 + }, + { + "epoch": 0.15, + "learning_rate": 4.9618121527087105e-05, + "loss": 2.8149, + "step": 200500 + }, + { + "epoch": 0.15, + "learning_rate": 4.961716886959186e-05, + "loss": 2.8337, + "step": 201000 + }, + { + "epoch": 0.15, + "learning_rate": 4.961621811741161e-05, + "loss": 2.853, + "step": 201500 + }, + { + "epoch": 0.15, + "learning_rate": 4.961526736523135e-05, + "loss": 2.8083, + "step": 202000 + }, + { + "epoch": 0.15, + "learning_rate": 4.961431470773612e-05, + "loss": 2.8415, + "step": 202500 + }, + { + "epoch": 0.15, + "learning_rate": 4.9613363955555866e-05, + "loss": 2.8454, + "step": 203000 + }, + { + "epoch": 0.16, + "learning_rate": 4.961241129806062e-05, + "loss": 2.8115, + "step": 203500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9611458640565376e-05, + "loss": 2.8149, + "step": 204000 + }, + { + "epoch": 0.16, + "learning_rate": 4.961050598307014e-05, + "loss": 2.817, + "step": 204500 + }, + { + "epoch": 0.16, + "learning_rate": 4.960955332557489e-05, + "loss": 2.8427, + "step": 205000 + }, + { + "epoch": 0.16, + "learning_rate": 4.960860066807965e-05, + "loss": 2.8041, + "step": 205500 + }, + { + "epoch": 0.16, + "learning_rate": 4.960764801058441e-05, + "loss": 2.8263, + "step": 206000 + }, + { + "epoch": 0.16, + "learning_rate": 4.960669535308917e-05, + "loss": 2.8128, + "step": 206500 + }, + { + "epoch": 0.16, + "learning_rate": 4.960574269559392e-05, + "loss": 2.8204, + "step": 207000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9604790038098684e-05, + "loss": 2.7701, + "step": 207500 + }, + { + "epoch": 0.16, + "learning_rate": 4.960383738060344e-05, + "loss": 2.8026, + "step": 208000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9602884723108194e-05, + "loss": 2.8336, + "step": 208500 + }, + { + "epoch": 0.16, + "learning_rate": 4.960193206561295e-05, + "loss": 2.8053, + "step": 209000 + }, + { + "epoch": 0.16, + "learning_rate": 4.960097940811771e-05, + "loss": 2.8329, + "step": 209500 + }, + { + "epoch": 0.16, + "learning_rate": 4.960002675062247e-05, + "loss": 2.8102, + "step": 210000 + }, + { + "epoch": 0.16, + "eval_accuracy": 0.49527599043120424, + "eval_loss": 2.747093677520752, + "eval_runtime": 9430.05, + "eval_samples_per_second": 29.161, + "eval_steps_per_second": 7.29, + "step": 210000 + }, + { + "epoch": 0.16, + "learning_rate": 4.959907599844222e-05, + "loss": 2.7952, + "step": 210500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9598123340946975e-05, + "loss": 2.8051, + "step": 211000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9597170683451733e-05, + "loss": 2.832, + "step": 211500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9596218025956485e-05, + "loss": 2.7877, + "step": 212000 + }, + { + "epoch": 0.16, + "learning_rate": 4.959526727377623e-05, + "loss": 2.8293, + "step": 212500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9594314616281e-05, + "loss": 2.8296, + "step": 213000 + }, + { + "epoch": 0.16, + "learning_rate": 4.959336195878575e-05, + "loss": 2.8155, + "step": 213500 + }, + { + "epoch": 0.16, + "learning_rate": 4.959240930129051e-05, + "loss": 2.8317, + "step": 214000 + }, + { + "epoch": 0.16, + "learning_rate": 4.9591458549110256e-05, + "loss": 2.8036, + "step": 214500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9590505891615015e-05, + "loss": 2.8087, + "step": 215000 + }, + { + "epoch": 0.16, + "learning_rate": 4.958955323411977e-05, + "loss": 2.8455, + "step": 215500 + }, + { + "epoch": 0.16, + "learning_rate": 4.958860057662453e-05, + "loss": 2.8149, + "step": 216000 + }, + { + "epoch": 0.17, + "learning_rate": 4.958764791912929e-05, + "loss": 2.7848, + "step": 216500 + }, + { + "epoch": 0.17, + "learning_rate": 4.958669526163405e-05, + "loss": 2.8028, + "step": 217000 + }, + { + "epoch": 0.17, + "learning_rate": 4.9585744509453796e-05, + "loss": 2.8419, + "step": 217500 + }, + { + "epoch": 0.17, + "learning_rate": 4.958479185195855e-05, + "loss": 2.8478, + "step": 218000 + }, + { + "epoch": 0.17, + "learning_rate": 4.95838410997783e-05, + "loss": 2.7895, + "step": 218500 + }, + { + "epoch": 0.17, + "learning_rate": 4.958288844228306e-05, + "loss": 2.7878, + "step": 219000 + }, + { + "epoch": 0.17, + "learning_rate": 4.958193578478781e-05, + "loss": 2.8194, + "step": 219500 + }, + { + "epoch": 0.17, + "learning_rate": 4.958098312729257e-05, + "loss": 2.7735, + "step": 220000 + }, + { + "epoch": 0.17, + "learning_rate": 4.958003046979733e-05, + "loss": 2.8111, + "step": 220500 + }, + { + "epoch": 0.17, + "learning_rate": 4.957907971761708e-05, + "loss": 2.8361, + "step": 221000 + }, + { + "epoch": 0.17, + "learning_rate": 4.9578127060121836e-05, + "loss": 2.7916, + "step": 221500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9577174402626594e-05, + "loss": 2.7754, + "step": 222000 + }, + { + "epoch": 0.17, + "learning_rate": 4.957622174513135e-05, + "loss": 2.7829, + "step": 222500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9575269087636104e-05, + "loss": 2.8057, + "step": 223000 + }, + { + "epoch": 0.17, + "learning_rate": 4.957431643014087e-05, + "loss": 2.8106, + "step": 223500 + }, + { + "epoch": 0.17, + "learning_rate": 4.957336377264563e-05, + "loss": 2.8135, + "step": 224000 + }, + { + "epoch": 0.17, + "learning_rate": 4.957241111515038e-05, + "loss": 2.7848, + "step": 224500 + }, + { + "epoch": 0.17, + "learning_rate": 4.957146036297013e-05, + "loss": 2.8022, + "step": 225000 + }, + { + "epoch": 0.17, + "learning_rate": 4.957050770547489e-05, + "loss": 2.7634, + "step": 225500 + }, + { + "epoch": 0.17, + "learning_rate": 4.956955504797964e-05, + "loss": 2.8107, + "step": 226000 + }, + { + "epoch": 0.17, + "learning_rate": 4.95686023904844e-05, + "loss": 2.7913, + "step": 226500 + }, + { + "epoch": 0.17, + "learning_rate": 4.956765163830415e-05, + "loss": 2.7764, + "step": 227000 + }, + { + "epoch": 0.17, + "learning_rate": 4.956669898080891e-05, + "loss": 2.7623, + "step": 227500 + }, + { + "epoch": 0.17, + "learning_rate": 4.9565748228628657e-05, + "loss": 2.7985, + "step": 228000 + }, + { + "epoch": 0.17, + "learning_rate": 4.9564795571133415e-05, + "loss": 2.7821, + "step": 228500 + }, + { + "epoch": 0.17, + "learning_rate": 4.956384291363817e-05, + "loss": 2.7838, + "step": 229000 + }, + { + "epoch": 0.17, + "learning_rate": 4.956289025614293e-05, + "loss": 2.7913, + "step": 229500 + }, + { + "epoch": 0.18, + "learning_rate": 4.956193759864768e-05, + "loss": 2.7662, + "step": 230000 + }, + { + "epoch": 0.18, + "learning_rate": 4.956098494115244e-05, + "loss": 2.804, + "step": 230500 + }, + { + "epoch": 0.18, + "learning_rate": 4.95600322836572e-05, + "loss": 2.796, + "step": 231000 + }, + { + "epoch": 0.18, + "learning_rate": 4.955907962616196e-05, + "loss": 2.8101, + "step": 231500 + }, + { + "epoch": 0.18, + "learning_rate": 4.9558126968666716e-05, + "loss": 2.8038, + "step": 232000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9557176216486464e-05, + "loss": 2.7837, + "step": 232500 + }, + { + "epoch": 0.18, + "learning_rate": 4.955622355899122e-05, + "loss": 2.8052, + "step": 233000 + }, + { + "epoch": 0.18, + "learning_rate": 4.955527090149598e-05, + "loss": 2.7893, + "step": 233500 + }, + { + "epoch": 0.18, + "learning_rate": 4.955431824400074e-05, + "loss": 2.751, + "step": 234000 + }, + { + "epoch": 0.18, + "learning_rate": 4.95533655865055e-05, + "loss": 2.7621, + "step": 234500 + }, + { + "epoch": 0.18, + "learning_rate": 4.955241292901025e-05, + "loss": 2.7687, + "step": 235000 + }, + { + "epoch": 0.18, + "learning_rate": 4.955146027151501e-05, + "loss": 2.7471, + "step": 235500 + }, + { + "epoch": 0.18, + "learning_rate": 4.955050761401977e-05, + "loss": 2.7743, + "step": 236000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9549556861839514e-05, + "loss": 2.8044, + "step": 236500 + }, + { + "epoch": 0.18, + "learning_rate": 4.954860420434427e-05, + "loss": 2.7905, + "step": 237000 + }, + { + "epoch": 0.18, + "learning_rate": 4.954765154684903e-05, + "loss": 2.7859, + "step": 237500 + }, + { + "epoch": 0.18, + "learning_rate": 4.954670079466878e-05, + "loss": 2.7853, + "step": 238000 + }, + { + "epoch": 0.18, + "learning_rate": 4.954574813717354e-05, + "loss": 2.7774, + "step": 238500 + }, + { + "epoch": 0.18, + "learning_rate": 4.9544795479678295e-05, + "loss": 2.7867, + "step": 239000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9543842822183054e-05, + "loss": 2.7452, + "step": 239500 + }, + { + "epoch": 0.18, + "learning_rate": 4.95428920700028e-05, + "loss": 2.7967, + "step": 240000 + }, + { + "epoch": 0.18, + "eval_accuracy": 0.4996462228988974, + "eval_loss": 2.710792303085327, + "eval_runtime": 9435.8846, + "eval_samples_per_second": 29.143, + "eval_steps_per_second": 7.286, + "step": 240000 + }, + { + "epoch": 0.18, + "learning_rate": 4.954193941250755e-05, + "loss": 2.7987, + "step": 240500 + }, + { + "epoch": 0.18, + "learning_rate": 4.954098675501231e-05, + "loss": 2.7928, + "step": 241000 + }, + { + "epoch": 0.18, + "learning_rate": 4.954003600283206e-05, + "loss": 2.8072, + "step": 241500 + }, + { + "epoch": 0.18, + "learning_rate": 4.953908334533682e-05, + "loss": 2.783, + "step": 242000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9538130687841576e-05, + "loss": 2.7966, + "step": 242500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9537178030346335e-05, + "loss": 2.7721, + "step": 243000 + }, + { + "epoch": 0.19, + "learning_rate": 4.953622537285109e-05, + "loss": 2.785, + "step": 243500 + }, + { + "epoch": 0.19, + "learning_rate": 4.953527271535585e-05, + "loss": 2.7477, + "step": 244000 + }, + { + "epoch": 0.19, + "learning_rate": 4.95343200578606e-05, + "loss": 2.7637, + "step": 244500 + }, + { + "epoch": 0.19, + "learning_rate": 4.953336740036537e-05, + "loss": 2.7794, + "step": 245000 + }, + { + "epoch": 0.19, + "learning_rate": 4.9532416648185116e-05, + "loss": 2.7604, + "step": 245500 + }, + { + "epoch": 0.19, + "learning_rate": 4.953146399068987e-05, + "loss": 2.7663, + "step": 246000 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530511333194626e-05, + "loss": 2.7636, + "step": 246500 + }, + { + "epoch": 0.19, + "learning_rate": 4.952956058101438e-05, + "loss": 2.7356, + "step": 247000 + }, + { + "epoch": 0.19, + "learning_rate": 4.952860792351913e-05, + "loss": 2.7926, + "step": 247500 + }, + { + "epoch": 0.19, + "learning_rate": 4.952765526602389e-05, + "loss": 2.7601, + "step": 248000 + }, + { + "epoch": 0.19, + "learning_rate": 4.952670260852865e-05, + "loss": 2.7536, + "step": 248500 + }, + { + "epoch": 0.19, + "learning_rate": 4.952574995103341e-05, + "loss": 2.7557, + "step": 249000 + }, + { + "epoch": 0.19, + "learning_rate": 4.9524797293538166e-05, + "loss": 2.7436, + "step": 249500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9523844636042924e-05, + "loss": 2.7933, + "step": 250000 + }, + { + "epoch": 0.19, + "learning_rate": 4.952289197854768e-05, + "loss": 2.7836, + "step": 250500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9521939321052434e-05, + "loss": 2.7916, + "step": 251000 + }, + { + "epoch": 0.19, + "learning_rate": 4.952098666355719e-05, + "loss": 2.784, + "step": 251500 + }, + { + "epoch": 0.19, + "learning_rate": 4.952003591137695e-05, + "loss": 2.8058, + "step": 252000 + }, + { + "epoch": 0.19, + "learning_rate": 4.95190832538817e-05, + "loss": 2.7708, + "step": 252500 + }, + { + "epoch": 0.19, + "learning_rate": 4.951813059638646e-05, + "loss": 2.7649, + "step": 253000 + }, + { + "epoch": 0.19, + "learning_rate": 4.9517177938891215e-05, + "loss": 2.7618, + "step": 253500 + }, + { + "epoch": 0.19, + "learning_rate": 4.9516227186710964e-05, + "loss": 2.7666, + "step": 254000 + }, + { + "epoch": 0.19, + "learning_rate": 4.951527452921572e-05, + "loss": 2.7531, + "step": 254500 + }, + { + "epoch": 0.19, + "learning_rate": 4.951432187172048e-05, + "loss": 2.7349, + "step": 255000 + }, + { + "epoch": 0.19, + "learning_rate": 4.951336921422524e-05, + "loss": 2.7424, + "step": 255500 + }, + { + "epoch": 0.2, + "learning_rate": 4.951241655673e-05, + "loss": 2.7557, + "step": 256000 + }, + { + "epoch": 0.2, + "learning_rate": 4.951146389923475e-05, + "loss": 2.7773, + "step": 256500 + }, + { + "epoch": 0.2, + "learning_rate": 4.951051124173951e-05, + "loss": 2.7801, + "step": 257000 + }, + { + "epoch": 0.2, + "learning_rate": 4.9509558584244265e-05, + "loss": 2.7726, + "step": 257500 + }, + { + "epoch": 0.2, + "learning_rate": 4.950860783206401e-05, + "loss": 2.7549, + "step": 258000 + }, + { + "epoch": 0.2, + "learning_rate": 4.950765517456877e-05, + "loss": 2.751, + "step": 258500 + }, + { + "epoch": 0.2, + "learning_rate": 4.950670251707353e-05, + "loss": 2.7645, + "step": 259000 + }, + { + "epoch": 0.2, + "learning_rate": 4.950574985957829e-05, + "loss": 2.7528, + "step": 259500 + }, + { + "epoch": 0.2, + "learning_rate": 4.9504797202083046e-05, + "loss": 2.7187, + "step": 260000 + }, + { + "epoch": 0.2, + "learning_rate": 4.950384644990279e-05, + "loss": 2.7688, + "step": 260500 + }, + { + "epoch": 0.2, + "learning_rate": 4.950289379240755e-05, + "loss": 2.7486, + "step": 261000 + }, + { + "epoch": 0.2, + "learning_rate": 4.950194113491231e-05, + "loss": 2.7852, + "step": 261500 + }, + { + "epoch": 0.2, + "learning_rate": 4.950098847741706e-05, + "loss": 2.78, + "step": 262000 + }, + { + "epoch": 0.2, + "learning_rate": 4.950003581992183e-05, + "loss": 2.7747, + "step": 262500 + }, + { + "epoch": 0.2, + "learning_rate": 4.949908316242658e-05, + "loss": 2.7546, + "step": 263000 + }, + { + "epoch": 0.2, + "learning_rate": 4.949813241024633e-05, + "loss": 2.7687, + "step": 263500 + }, + { + "epoch": 0.2, + "learning_rate": 4.9497179752751086e-05, + "loss": 2.7588, + "step": 264000 + }, + { + "epoch": 0.2, + "learning_rate": 4.9496227095255844e-05, + "loss": 2.7483, + "step": 264500 + }, + { + "epoch": 0.2, + "learning_rate": 4.94952744377606e-05, + "loss": 2.7599, + "step": 265000 + }, + { + "epoch": 0.2, + "learning_rate": 4.949432368558035e-05, + "loss": 2.73, + "step": 265500 + }, + { + "epoch": 0.2, + "learning_rate": 4.949337102808511e-05, + "loss": 2.7373, + "step": 266000 + }, + { + "epoch": 0.2, + "learning_rate": 4.949241837058987e-05, + "loss": 2.7502, + "step": 266500 + }, + { + "epoch": 0.2, + "learning_rate": 4.949146571309462e-05, + "loss": 2.7494, + "step": 267000 + }, + { + "epoch": 0.2, + "learning_rate": 4.949051305559938e-05, + "loss": 2.7689, + "step": 267500 + }, + { + "epoch": 0.2, + "learning_rate": 4.948956230341913e-05, + "loss": 2.7502, + "step": 268000 + }, + { + "epoch": 0.2, + "learning_rate": 4.9488609645923884e-05, + "loss": 2.7764, + "step": 268500 + }, + { + "epoch": 0.21, + "learning_rate": 4.948765698842864e-05, + "loss": 2.7502, + "step": 269000 + }, + { + "epoch": 0.21, + "learning_rate": 4.94867043309334e-05, + "loss": 2.7163, + "step": 269500 + }, + { + "epoch": 0.21, + "learning_rate": 4.948575167343816e-05, + "loss": 2.7153, + "step": 270000 + }, + { + "epoch": 0.21, + "eval_accuracy": 0.5035932644127394, + "eval_loss": 2.677302837371826, + "eval_runtime": 9440.3567, + "eval_samples_per_second": 29.129, + "eval_steps_per_second": 7.282, + "step": 270000 + }, + { + "epoch": 0.21, + "learning_rate": 4.948480092125791e-05, + "loss": 2.7439, + "step": 270500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9483848263762665e-05, + "loss": 2.7588, + "step": 271000 + }, + { + "epoch": 0.21, + "learning_rate": 4.948289560626742e-05, + "loss": 2.7715, + "step": 271500 + }, + { + "epoch": 0.21, + "learning_rate": 4.948194294877218e-05, + "loss": 2.7281, + "step": 272000 + }, + { + "epoch": 0.21, + "learning_rate": 4.948099219659193e-05, + "loss": 2.7371, + "step": 272500 + }, + { + "epoch": 0.21, + "learning_rate": 4.948003953909668e-05, + "loss": 2.7099, + "step": 273000 + }, + { + "epoch": 0.21, + "learning_rate": 4.9479086881601446e-05, + "loss": 2.7504, + "step": 273500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9478136129421195e-05, + "loss": 2.7276, + "step": 274000 + }, + { + "epoch": 0.21, + "learning_rate": 4.9477183471925946e-05, + "loss": 2.7398, + "step": 274500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9476230814430704e-05, + "loss": 2.7525, + "step": 275000 + }, + { + "epoch": 0.21, + "learning_rate": 4.947527815693546e-05, + "loss": 2.7582, + "step": 275500 + }, + { + "epoch": 0.21, + "learning_rate": 4.947432549944022e-05, + "loss": 2.7391, + "step": 276000 + }, + { + "epoch": 0.21, + "learning_rate": 4.947337284194498e-05, + "loss": 2.7516, + "step": 276500 + }, + { + "epoch": 0.21, + "learning_rate": 4.947242018444974e-05, + "loss": 2.7765, + "step": 277000 + }, + { + "epoch": 0.21, + "learning_rate": 4.9471467526954496e-05, + "loss": 2.7253, + "step": 277500 + }, + { + "epoch": 0.21, + "learning_rate": 4.947051486945925e-05, + "loss": 2.764, + "step": 278000 + }, + { + "epoch": 0.21, + "learning_rate": 4.946956221196401e-05, + "loss": 2.7455, + "step": 278500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9468609554468764e-05, + "loss": 2.7432, + "step": 279000 + }, + { + "epoch": 0.21, + "learning_rate": 4.946765689697352e-05, + "loss": 2.7165, + "step": 279500 + }, + { + "epoch": 0.21, + "learning_rate": 4.946670614479327e-05, + "loss": 2.7309, + "step": 280000 + }, + { + "epoch": 0.21, + "learning_rate": 4.946575348729803e-05, + "loss": 2.7511, + "step": 280500 + }, + { + "epoch": 0.21, + "learning_rate": 4.946480082980279e-05, + "loss": 2.7209, + "step": 281000 + }, + { + "epoch": 0.21, + "learning_rate": 4.946384817230754e-05, + "loss": 2.7342, + "step": 281500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9462895514812304e-05, + "loss": 2.7181, + "step": 282000 + }, + { + "epoch": 0.22, + "learning_rate": 4.946194476263205e-05, + "loss": 2.7304, + "step": 282500 + }, + { + "epoch": 0.22, + "learning_rate": 4.9460992105136804e-05, + "loss": 2.7433, + "step": 283000 + }, + { + "epoch": 0.22, + "learning_rate": 4.946003944764156e-05, + "loss": 2.7141, + "step": 283500 + }, + { + "epoch": 0.22, + "learning_rate": 4.945908679014633e-05, + "loss": 2.7364, + "step": 284000 + }, + { + "epoch": 0.22, + "learning_rate": 4.945813413265108e-05, + "loss": 2.7202, + "step": 284500 + }, + { + "epoch": 0.22, + "learning_rate": 4.945718147515584e-05, + "loss": 2.7397, + "step": 285000 + }, + { + "epoch": 0.22, + "learning_rate": 4.9456228817660595e-05, + "loss": 2.7349, + "step": 285500 + }, + { + "epoch": 0.22, + "learning_rate": 4.945527616016535e-05, + "loss": 2.7261, + "step": 286000 + }, + { + "epoch": 0.22, + "learning_rate": 4.94543254079851e-05, + "loss": 2.7468, + "step": 286500 + }, + { + "epoch": 0.22, + "learning_rate": 4.945337275048986e-05, + "loss": 2.741, + "step": 287000 + }, + { + "epoch": 0.22, + "learning_rate": 4.945242009299462e-05, + "loss": 2.7476, + "step": 287500 + }, + { + "epoch": 0.22, + "learning_rate": 4.945146743549937e-05, + "loss": 2.764, + "step": 288000 + }, + { + "epoch": 0.22, + "learning_rate": 4.945051668331912e-05, + "loss": 2.7357, + "step": 288500 + }, + { + "epoch": 0.22, + "learning_rate": 4.944956402582388e-05, + "loss": 2.7472, + "step": 289000 + }, + { + "epoch": 0.22, + "learning_rate": 4.9448611368328634e-05, + "loss": 2.7483, + "step": 289500 + }, + { + "epoch": 0.22, + "learning_rate": 4.944765871083339e-05, + "loss": 2.7366, + "step": 290000 + }, + { + "epoch": 0.22, + "learning_rate": 4.944670605333815e-05, + "loss": 2.7329, + "step": 290500 + }, + { + "epoch": 0.22, + "learning_rate": 4.944575339584291e-05, + "loss": 2.6905, + "step": 291000 + }, + { + "epoch": 0.22, + "learning_rate": 4.944480073834767e-05, + "loss": 2.71, + "step": 291500 + }, + { + "epoch": 0.22, + "learning_rate": 4.9443848080852426e-05, + "loss": 2.7157, + "step": 292000 + }, + { + "epoch": 0.22, + "learning_rate": 4.9442895423357184e-05, + "loss": 2.7285, + "step": 292500 + }, + { + "epoch": 0.22, + "learning_rate": 4.944194467117693e-05, + "loss": 2.7128, + "step": 293000 + }, + { + "epoch": 0.22, + "learning_rate": 4.944099391899668e-05, + "loss": 2.7213, + "step": 293500 + }, + { + "epoch": 0.22, + "learning_rate": 4.944004126150143e-05, + "loss": 2.7189, + "step": 294000 + }, + { + "epoch": 0.22, + "learning_rate": 4.94390886040062e-05, + "loss": 2.7038, + "step": 294500 + }, + { + "epoch": 0.22, + "learning_rate": 4.943813594651095e-05, + "loss": 2.7175, + "step": 295000 + }, + { + "epoch": 0.23, + "learning_rate": 4.943718328901571e-05, + "loss": 2.7151, + "step": 295500 + }, + { + "epoch": 0.23, + "learning_rate": 4.943623063152047e-05, + "loss": 2.7271, + "step": 296000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9435277974025224e-05, + "loss": 2.7508, + "step": 296500 + }, + { + "epoch": 0.23, + "learning_rate": 4.943432531652998e-05, + "loss": 2.6948, + "step": 297000 + }, + { + "epoch": 0.23, + "learning_rate": 4.943337265903474e-05, + "loss": 2.7273, + "step": 297500 + }, + { + "epoch": 0.23, + "learning_rate": 4.94324200015395e-05, + "loss": 2.728, + "step": 298000 + }, + { + "epoch": 0.23, + "learning_rate": 4.943147115467424e-05, + "loss": 2.76, + "step": 298500 + }, + { + "epoch": 0.23, + "learning_rate": 4.943051849717899e-05, + "loss": 2.7177, + "step": 299000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9429565839683753e-05, + "loss": 2.7213, + "step": 299500 + }, + { + "epoch": 0.23, + "learning_rate": 4.942861318218851e-05, + "loss": 2.7472, + "step": 300000 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.5076805733110031, + "eval_loss": 2.6502606868743896, + "eval_runtime": 9429.9896, + "eval_samples_per_second": 29.161, + "eval_steps_per_second": 7.29, + "step": 300000 + }, + { + "epoch": 0.23, + "learning_rate": 4.942766243000825e-05, + "loss": 2.7177, + "step": 300500 + }, + { + "epoch": 0.23, + "learning_rate": 4.942670977251301e-05, + "loss": 2.6843, + "step": 301000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9425757115017777e-05, + "loss": 2.7062, + "step": 301500 + }, + { + "epoch": 0.23, + "learning_rate": 4.942480445752253e-05, + "loss": 2.7284, + "step": 302000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9423851800027286e-05, + "loss": 2.7322, + "step": 302500 + }, + { + "epoch": 0.23, + "learning_rate": 4.9422901047847035e-05, + "loss": 2.7356, + "step": 303000 + }, + { + "epoch": 0.23, + "learning_rate": 4.942194839035179e-05, + "loss": 2.7371, + "step": 303500 + }, + { + "epoch": 0.23, + "learning_rate": 4.942099573285655e-05, + "loss": 2.7274, + "step": 304000 + }, + { + "epoch": 0.23, + "learning_rate": 4.94200430753613e-05, + "loss": 2.7091, + "step": 304500 + }, + { + "epoch": 0.23, + "learning_rate": 4.941909041786607e-05, + "loss": 2.7325, + "step": 305000 + }, + { + "epoch": 0.23, + "learning_rate": 4.9418137760370826e-05, + "loss": 2.6878, + "step": 305500 + }, + { + "epoch": 0.23, + "learning_rate": 4.941718510287558e-05, + "loss": 2.7247, + "step": 306000 + }, + { + "epoch": 0.23, + "learning_rate": 4.941623244538034e-05, + "loss": 2.7304, + "step": 306500 + }, + { + "epoch": 0.23, + "learning_rate": 4.9415279787885094e-05, + "loss": 2.7058, + "step": 307000 + }, + { + "epoch": 0.23, + "learning_rate": 4.941433094101983e-05, + "loss": 2.7205, + "step": 307500 + }, + { + "epoch": 0.23, + "learning_rate": 4.941337828352459e-05, + "loss": 2.6802, + "step": 308000 + }, + { + "epoch": 0.24, + "learning_rate": 4.941242562602935e-05, + "loss": 2.7081, + "step": 308500 + }, + { + "epoch": 0.24, + "learning_rate": 4.941147296853411e-05, + "loss": 2.7157, + "step": 309000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9410520311038866e-05, + "loss": 2.7099, + "step": 309500 + }, + { + "epoch": 0.24, + "learning_rate": 4.940956765354362e-05, + "loss": 2.7206, + "step": 310000 + }, + { + "epoch": 0.24, + "learning_rate": 4.940861690136337e-05, + "loss": 2.7085, + "step": 310500 + }, + { + "epoch": 0.24, + "learning_rate": 4.940766424386813e-05, + "loss": 2.7348, + "step": 311000 + }, + { + "epoch": 0.24, + "learning_rate": 4.940671158637288e-05, + "loss": 2.7232, + "step": 311500 + }, + { + "epoch": 0.24, + "learning_rate": 4.940575892887764e-05, + "loss": 2.7226, + "step": 312000 + }, + { + "epoch": 0.24, + "learning_rate": 4.94048062713824e-05, + "loss": 2.6962, + "step": 312500 + }, + { + "epoch": 0.24, + "learning_rate": 4.940385551920215e-05, + "loss": 2.7015, + "step": 313000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9402902861706905e-05, + "loss": 2.7018, + "step": 313500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9401950204211663e-05, + "loss": 2.672, + "step": 314000 + }, + { + "epoch": 0.24, + "learning_rate": 4.940099754671642e-05, + "loss": 2.7106, + "step": 314500 + }, + { + "epoch": 0.24, + "learning_rate": 4.940004488922118e-05, + "loss": 2.7232, + "step": 315000 + }, + { + "epoch": 0.24, + "learning_rate": 4.939909223172594e-05, + "loss": 2.7032, + "step": 315500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9398139574230697e-05, + "loss": 2.6919, + "step": 316000 + }, + { + "epoch": 0.24, + "learning_rate": 4.939718691673545e-05, + "loss": 2.6878, + "step": 316500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9396234259240206e-05, + "loss": 2.6981, + "step": 317000 + }, + { + "epoch": 0.24, + "learning_rate": 4.939528350705996e-05, + "loss": 2.7027, + "step": 317500 + }, + { + "epoch": 0.24, + "learning_rate": 4.939433084956471e-05, + "loss": 2.7368, + "step": 318000 + }, + { + "epoch": 0.24, + "learning_rate": 4.939337819206947e-05, + "loss": 2.6475, + "step": 318500 + }, + { + "epoch": 0.24, + "learning_rate": 4.939242553457423e-05, + "loss": 2.7051, + "step": 319000 + }, + { + "epoch": 0.24, + "learning_rate": 4.939147478239398e-05, + "loss": 2.7129, + "step": 319500 + }, + { + "epoch": 0.24, + "learning_rate": 4.9390524030213726e-05, + "loss": 2.7081, + "step": 320000 + }, + { + "epoch": 0.24, + "learning_rate": 4.9389571372718484e-05, + "loss": 2.6798, + "step": 320500 + }, + { + "epoch": 0.24, + "learning_rate": 4.938861871522324e-05, + "loss": 2.6816, + "step": 321000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9387666057728e-05, + "loss": 2.7074, + "step": 321500 + }, + { + "epoch": 0.25, + "learning_rate": 4.938671530554775e-05, + "loss": 2.6997, + "step": 322000 + }, + { + "epoch": 0.25, + "learning_rate": 4.93857626480525e-05, + "loss": 2.6977, + "step": 322500 + }, + { + "epoch": 0.25, + "learning_rate": 4.938481189587225e-05, + "loss": 2.6978, + "step": 323000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9383859238377014e-05, + "loss": 2.6974, + "step": 323500 + }, + { + "epoch": 0.25, + "learning_rate": 4.9382906580881766e-05, + "loss": 2.718, + "step": 324000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9381953923386524e-05, + "loss": 2.6742, + "step": 324500 + }, + { + "epoch": 0.25, + "learning_rate": 4.938100126589128e-05, + "loss": 2.6692, + "step": 325000 + }, + { + "epoch": 0.25, + "learning_rate": 4.938004860839604e-05, + "loss": 2.7049, + "step": 325500 + }, + { + "epoch": 0.25, + "learning_rate": 4.937909595090079e-05, + "loss": 2.7122, + "step": 326000 + }, + { + "epoch": 0.25, + "learning_rate": 4.937814329340556e-05, + "loss": 2.7056, + "step": 326500 + }, + { + "epoch": 0.25, + "learning_rate": 4.9377190635910315e-05, + "loss": 2.6765, + "step": 327000 + }, + { + "epoch": 0.25, + "learning_rate": 4.937623797841507e-05, + "loss": 2.6906, + "step": 327500 + }, + { + "epoch": 0.25, + "learning_rate": 4.937528532091983e-05, + "loss": 2.6812, + "step": 328000 + }, + { + "epoch": 0.25, + "learning_rate": 4.937433266342458e-05, + "loss": 2.6699, + "step": 328500 + }, + { + "epoch": 0.25, + "learning_rate": 4.937338191124433e-05, + "loss": 2.6788, + "step": 329000 + }, + { + "epoch": 0.25, + "learning_rate": 4.937242925374909e-05, + "loss": 2.6995, + "step": 329500 + }, + { + "epoch": 0.25, + "learning_rate": 4.937147659625385e-05, + "loss": 2.6633, + "step": 330000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.5107118901596502, + "eval_loss": 2.6264419555664062, + "eval_runtime": 9409.4565, + "eval_samples_per_second": 29.225, + "eval_steps_per_second": 7.306, + "step": 330000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9370523938758607e-05, + "loss": 2.677, + "step": 330500 + }, + { + "epoch": 0.25, + "learning_rate": 4.9369571281263365e-05, + "loss": 2.6793, + "step": 331000 + }, + { + "epoch": 0.25, + "learning_rate": 4.936861862376812e-05, + "loss": 2.6978, + "step": 331500 + }, + { + "epoch": 0.25, + "learning_rate": 4.936766596627288e-05, + "loss": 2.7031, + "step": 332000 + }, + { + "epoch": 0.25, + "learning_rate": 4.936671330877763e-05, + "loss": 2.7086, + "step": 332500 + }, + { + "epoch": 0.25, + "learning_rate": 4.936576065128239e-05, + "loss": 2.6589, + "step": 333000 + }, + { + "epoch": 0.25, + "learning_rate": 4.936480799378715e-05, + "loss": 2.6933, + "step": 333500 + }, + { + "epoch": 0.25, + "learning_rate": 4.936385533629191e-05, + "loss": 2.6995, + "step": 334000 + }, + { + "epoch": 0.25, + "learning_rate": 4.9362902678796666e-05, + "loss": 2.6829, + "step": 334500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9361950021301424e-05, + "loss": 2.6641, + "step": 335000 + }, + { + "epoch": 0.26, + "learning_rate": 4.936099736380618e-05, + "loss": 2.6928, + "step": 335500 + }, + { + "epoch": 0.26, + "learning_rate": 4.936004661162593e-05, + "loss": 2.6569, + "step": 336000 + }, + { + "epoch": 0.26, + "learning_rate": 4.935909395413069e-05, + "loss": 2.6996, + "step": 336500 + }, + { + "epoch": 0.26, + "learning_rate": 4.935814129663545e-05, + "loss": 2.6834, + "step": 337000 + }, + { + "epoch": 0.26, + "learning_rate": 4.9357192449770186e-05, + "loss": 2.6921, + "step": 337500 + }, + { + "epoch": 0.26, + "learning_rate": 4.935623979227494e-05, + "loss": 2.726, + "step": 338000 + }, + { + "epoch": 0.26, + "learning_rate": 4.9355287134779696e-05, + "loss": 2.6999, + "step": 338500 + }, + { + "epoch": 0.26, + "learning_rate": 4.935433447728446e-05, + "loss": 2.6603, + "step": 339000 + }, + { + "epoch": 0.26, + "learning_rate": 4.935338181978921e-05, + "loss": 2.7023, + "step": 339500 + }, + { + "epoch": 0.26, + "learning_rate": 4.935242916229397e-05, + "loss": 2.6995, + "step": 340000 + }, + { + "epoch": 0.26, + "learning_rate": 4.935147650479873e-05, + "loss": 2.7078, + "step": 340500 + }, + { + "epoch": 0.26, + "learning_rate": 4.935052384730349e-05, + "loss": 2.6793, + "step": 341000 + }, + { + "epoch": 0.26, + "learning_rate": 4.934957118980824e-05, + "loss": 2.6941, + "step": 341500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9348618532313004e-05, + "loss": 2.6902, + "step": 342000 + }, + { + "epoch": 0.26, + "learning_rate": 4.934766587481776e-05, + "loss": 2.6861, + "step": 342500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9346713217322513e-05, + "loss": 2.696, + "step": 343000 + }, + { + "epoch": 0.26, + "learning_rate": 4.934576055982728e-05, + "loss": 2.7084, + "step": 343500 + }, + { + "epoch": 0.26, + "learning_rate": 4.934480980764703e-05, + "loss": 2.7097, + "step": 344000 + }, + { + "epoch": 0.26, + "learning_rate": 4.934385715015178e-05, + "loss": 2.6744, + "step": 344500 + }, + { + "epoch": 0.26, + "learning_rate": 4.9342906397971527e-05, + "loss": 2.6656, + "step": 345000 + }, + { + "epoch": 0.26, + "learning_rate": 4.9341955645791275e-05, + "loss": 2.6596, + "step": 345500 + }, + { + "epoch": 0.26, + "learning_rate": 4.934100298829603e-05, + "loss": 2.6957, + "step": 346000 + }, + { + "epoch": 0.26, + "learning_rate": 4.934005033080079e-05, + "loss": 2.6929, + "step": 346500 + }, + { + "epoch": 0.26, + "learning_rate": 4.933909767330555e-05, + "loss": 2.6929, + "step": 347000 + }, + { + "epoch": 0.26, + "learning_rate": 4.933814501581031e-05, + "loss": 2.6926, + "step": 347500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9337192358315066e-05, + "loss": 2.6827, + "step": 348000 + }, + { + "epoch": 0.27, + "learning_rate": 4.933623970081982e-05, + "loss": 2.696, + "step": 348500 + }, + { + "epoch": 0.27, + "learning_rate": 4.933528704332458e-05, + "loss": 2.656, + "step": 349000 + }, + { + "epoch": 0.27, + "learning_rate": 4.9334334385829334e-05, + "loss": 2.6867, + "step": 349500 + }, + { + "epoch": 0.27, + "learning_rate": 4.933338363364908e-05, + "loss": 2.6826, + "step": 350000 + }, + { + "epoch": 0.27, + "learning_rate": 4.933243097615384e-05, + "loss": 2.6991, + "step": 350500 + }, + { + "epoch": 0.27, + "learning_rate": 4.93314783186586e-05, + "loss": 2.6862, + "step": 351000 + }, + { + "epoch": 0.27, + "learning_rate": 4.933052566116336e-05, + "loss": 2.7032, + "step": 351500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9329574908983106e-05, + "loss": 2.6707, + "step": 352000 + }, + { + "epoch": 0.27, + "learning_rate": 4.9328624156802854e-05, + "loss": 2.6744, + "step": 352500 + }, + { + "epoch": 0.27, + "learning_rate": 4.932767149930761e-05, + "loss": 2.6812, + "step": 353000 + }, + { + "epoch": 0.27, + "learning_rate": 4.932671884181237e-05, + "loss": 2.668, + "step": 353500 + }, + { + "epoch": 0.27, + "learning_rate": 4.932576618431712e-05, + "loss": 2.6646, + "step": 354000 + }, + { + "epoch": 0.27, + "learning_rate": 4.932481352682188e-05, + "loss": 2.6964, + "step": 354500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9323860869326645e-05, + "loss": 2.6879, + "step": 355000 + }, + { + "epoch": 0.27, + "learning_rate": 4.93229082118314e-05, + "loss": 2.6811, + "step": 355500 + }, + { + "epoch": 0.27, + "learning_rate": 4.9321955554336155e-05, + "loss": 2.6522, + "step": 356000 + }, + { + "epoch": 0.27, + "learning_rate": 4.9321002896840914e-05, + "loss": 2.6448, + "step": 356500 + }, + { + "epoch": 0.27, + "learning_rate": 4.932005023934567e-05, + "loss": 2.6908, + "step": 357000 + }, + { + "epoch": 0.27, + "learning_rate": 4.931909948716542e-05, + "loss": 2.6609, + "step": 357500 + }, + { + "epoch": 0.27, + "learning_rate": 4.931814682967018e-05, + "loss": 2.6928, + "step": 358000 + }, + { + "epoch": 0.27, + "learning_rate": 4.931719417217494e-05, + "loss": 2.6738, + "step": 358500 + }, + { + "epoch": 0.27, + "learning_rate": 4.931624151467969e-05, + "loss": 2.6662, + "step": 359000 + }, + { + "epoch": 0.27, + "learning_rate": 4.9315288857184447e-05, + "loss": 2.6862, + "step": 359500 + }, + { + "epoch": 0.27, + "learning_rate": 4.931433619968921e-05, + "loss": 2.6845, + "step": 360000 + }, + { + "epoch": 0.27, + "eval_accuracy": 0.5137789572575466, + "eval_loss": 2.605391025543213, + "eval_runtime": 9408.3309, + "eval_samples_per_second": 29.228, + "eval_steps_per_second": 7.307, + "step": 360000 + }, + { + "epoch": 0.27, + "learning_rate": 4.931338354219396e-05, + "loss": 2.6737, + "step": 360500 + }, + { + "epoch": 0.28, + "learning_rate": 4.931243088469872e-05, + "loss": 2.6591, + "step": 361000 + }, + { + "epoch": 0.28, + "learning_rate": 4.931147822720348e-05, + "loss": 2.6797, + "step": 361500 + }, + { + "epoch": 0.28, + "learning_rate": 4.931052747502323e-05, + "loss": 2.689, + "step": 362000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9309574817527986e-05, + "loss": 2.6877, + "step": 362500 + }, + { + "epoch": 0.28, + "learning_rate": 4.9308622160032745e-05, + "loss": 2.677, + "step": 363000 + }, + { + "epoch": 0.28, + "learning_rate": 4.93076695025375e-05, + "loss": 2.6857, + "step": 363500 + }, + { + "epoch": 0.28, + "learning_rate": 4.930671875035725e-05, + "loss": 2.6652, + "step": 364000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9305767998177e-05, + "loss": 2.6717, + "step": 364500 + }, + { + "epoch": 0.28, + "learning_rate": 4.930481534068175e-05, + "loss": 2.675, + "step": 365000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9303862683186516e-05, + "loss": 2.6487, + "step": 365500 + }, + { + "epoch": 0.28, + "learning_rate": 4.930291002569127e-05, + "loss": 2.6674, + "step": 366000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9301959273511016e-05, + "loss": 2.6587, + "step": 366500 + }, + { + "epoch": 0.28, + "learning_rate": 4.9301006616015774e-05, + "loss": 2.6535, + "step": 367000 + }, + { + "epoch": 0.28, + "learning_rate": 4.930005395852053e-05, + "loss": 2.6561, + "step": 367500 + }, + { + "epoch": 0.28, + "learning_rate": 4.929910130102529e-05, + "loss": 2.7063, + "step": 368000 + }, + { + "epoch": 0.28, + "learning_rate": 4.929814864353004e-05, + "loss": 2.6599, + "step": 368500 + }, + { + "epoch": 0.28, + "learning_rate": 4.929719598603481e-05, + "loss": 2.6673, + "step": 369000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9296245233854555e-05, + "loss": 2.6625, + "step": 369500 + }, + { + "epoch": 0.28, + "learning_rate": 4.929529257635931e-05, + "loss": 2.6827, + "step": 370000 + }, + { + "epoch": 0.28, + "learning_rate": 4.929433991886407e-05, + "loss": 2.6537, + "step": 370500 + }, + { + "epoch": 0.28, + "learning_rate": 4.929338726136883e-05, + "loss": 2.6647, + "step": 371000 + }, + { + "epoch": 0.28, + "learning_rate": 4.929243650918857e-05, + "loss": 2.6479, + "step": 371500 + }, + { + "epoch": 0.28, + "learning_rate": 4.929148575700832e-05, + "loss": 2.6511, + "step": 372000 + }, + { + "epoch": 0.28, + "learning_rate": 4.929053309951308e-05, + "loss": 2.6551, + "step": 372500 + }, + { + "epoch": 0.28, + "learning_rate": 4.928958044201784e-05, + "loss": 2.6516, + "step": 373000 + }, + { + "epoch": 0.28, + "learning_rate": 4.9288627784522595e-05, + "loss": 2.6736, + "step": 373500 + }, + { + "epoch": 0.29, + "learning_rate": 4.928767512702735e-05, + "loss": 2.6321, + "step": 374000 + }, + { + "epoch": 0.29, + "learning_rate": 4.928672246953211e-05, + "loss": 2.6526, + "step": 374500 + }, + { + "epoch": 0.29, + "learning_rate": 4.928576981203687e-05, + "loss": 2.6663, + "step": 375000 + }, + { + "epoch": 0.29, + "learning_rate": 4.928481715454162e-05, + "loss": 2.6844, + "step": 375500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9283864497046386e-05, + "loss": 2.6356, + "step": 376000 + }, + { + "epoch": 0.29, + "learning_rate": 4.928291183955114e-05, + "loss": 2.6523, + "step": 376500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9281961087370886e-05, + "loss": 2.6419, + "step": 377000 + }, + { + "epoch": 0.29, + "learning_rate": 4.9281008429875644e-05, + "loss": 2.6547, + "step": 377500 + }, + { + "epoch": 0.29, + "learning_rate": 4.92800557723804e-05, + "loss": 2.6595, + "step": 378000 + }, + { + "epoch": 0.29, + "learning_rate": 4.927910311488516e-05, + "loss": 2.6661, + "step": 378500 + }, + { + "epoch": 0.29, + "learning_rate": 4.927815045738992e-05, + "loss": 2.6265, + "step": 379000 + }, + { + "epoch": 0.29, + "learning_rate": 4.927719779989468e-05, + "loss": 2.6637, + "step": 379500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9276245142399436e-05, + "loss": 2.6869, + "step": 380000 + }, + { + "epoch": 0.29, + "learning_rate": 4.927529248490419e-05, + "loss": 2.6441, + "step": 380500 + }, + { + "epoch": 0.29, + "learning_rate": 4.927433982740895e-05, + "loss": 2.6789, + "step": 381000 + }, + { + "epoch": 0.29, + "learning_rate": 4.92733890752287e-05, + "loss": 2.6469, + "step": 381500 + }, + { + "epoch": 0.29, + "learning_rate": 4.927243641773345e-05, + "loss": 2.66, + "step": 382000 + }, + { + "epoch": 0.29, + "learning_rate": 4.927148376023821e-05, + "loss": 2.6563, + "step": 382500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9270531102742976e-05, + "loss": 2.6577, + "step": 383000 + }, + { + "epoch": 0.29, + "learning_rate": 4.926957844524773e-05, + "loss": 2.6431, + "step": 383500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9268627693067475e-05, + "loss": 2.6332, + "step": 384000 + }, + { + "epoch": 0.29, + "learning_rate": 4.9267676940887224e-05, + "loss": 2.6769, + "step": 384500 + }, + { + "epoch": 0.29, + "learning_rate": 4.926672618870697e-05, + "loss": 2.6478, + "step": 385000 + }, + { + "epoch": 0.29, + "learning_rate": 4.926577353121173e-05, + "loss": 2.6427, + "step": 385500 + }, + { + "epoch": 0.29, + "learning_rate": 4.926482087371649e-05, + "loss": 2.679, + "step": 386000 + }, + { + "epoch": 0.29, + "learning_rate": 4.926386821622124e-05, + "loss": 2.6152, + "step": 386500 + }, + { + "epoch": 0.29, + "learning_rate": 4.9262915558726005e-05, + "loss": 2.6526, + "step": 387000 + }, + { + "epoch": 0.3, + "learning_rate": 4.926196290123076e-05, + "loss": 2.6558, + "step": 387500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9261010243735515e-05, + "loss": 2.614, + "step": 388000 + }, + { + "epoch": 0.3, + "learning_rate": 4.926005758624028e-05, + "loss": 2.6355, + "step": 388500 + }, + { + "epoch": 0.3, + "learning_rate": 4.925910492874503e-05, + "loss": 2.6684, + "step": 389000 + }, + { + "epoch": 0.3, + "learning_rate": 4.925815227124979e-05, + "loss": 2.6417, + "step": 389500 + }, + { + "epoch": 0.3, + "learning_rate": 4.925720151906954e-05, + "loss": 2.6661, + "step": 390000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.5160672469262357, + "eval_loss": 2.585765838623047, + "eval_runtime": 9418.0501, + "eval_samples_per_second": 29.198, + "eval_steps_per_second": 7.299, + "step": 390000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9256248861574296e-05, + "loss": 2.648, + "step": 390500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9255296204079055e-05, + "loss": 2.6587, + "step": 391000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9254343546583806e-05, + "loss": 2.6226, + "step": 391500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9253392794403554e-05, + "loss": 2.6778, + "step": 392000 + }, + { + "epoch": 0.3, + "learning_rate": 4.925244013690832e-05, + "loss": 2.6372, + "step": 392500 + }, + { + "epoch": 0.3, + "learning_rate": 4.925148747941307e-05, + "loss": 2.6381, + "step": 393000 + }, + { + "epoch": 0.3, + "learning_rate": 4.925053482191783e-05, + "loss": 2.6418, + "step": 393500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9249584069737584e-05, + "loss": 2.6367, + "step": 394000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9248631412242336e-05, + "loss": 2.636, + "step": 394500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9247678754747094e-05, + "loss": 2.6826, + "step": 395000 + }, + { + "epoch": 0.3, + "learning_rate": 4.924672800256684e-05, + "loss": 2.6592, + "step": 395500 + }, + { + "epoch": 0.3, + "learning_rate": 4.92457753450716e-05, + "loss": 2.6342, + "step": 396000 + }, + { + "epoch": 0.3, + "learning_rate": 4.924482268757636e-05, + "loss": 2.6693, + "step": 396500 + }, + { + "epoch": 0.3, + "learning_rate": 4.924387003008111e-05, + "loss": 2.6693, + "step": 397000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9242917372585876e-05, + "loss": 2.622, + "step": 397500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9241964715090634e-05, + "loss": 2.6396, + "step": 398000 + }, + { + "epoch": 0.3, + "learning_rate": 4.9241012057595385e-05, + "loss": 2.6291, + "step": 398500 + }, + { + "epoch": 0.3, + "learning_rate": 4.9240059400100144e-05, + "loss": 2.589, + "step": 399000 + }, + { + "epoch": 0.3, + "learning_rate": 4.92391067426049e-05, + "loss": 2.64, + "step": 399500 + }, + { + "epoch": 0.3, + "learning_rate": 4.923815599042465e-05, + "loss": 2.6394, + "step": 400000 + }, + { + "epoch": 0.31, + "learning_rate": 4.923720333292941e-05, + "loss": 2.6606, + "step": 400500 + }, + { + "epoch": 0.31, + "learning_rate": 4.923625258074916e-05, + "loss": 2.6635, + "step": 401000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9235299923253915e-05, + "loss": 2.6722, + "step": 401500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9234347265758673e-05, + "loss": 2.6528, + "step": 402000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9233394608263425e-05, + "loss": 2.6387, + "step": 402500 + }, + { + "epoch": 0.31, + "learning_rate": 4.923244385608318e-05, + "loss": 2.6654, + "step": 403000 + }, + { + "epoch": 0.31, + "learning_rate": 4.923149310390293e-05, + "loss": 2.6465, + "step": 403500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9230540446407687e-05, + "loss": 2.6379, + "step": 404000 + }, + { + "epoch": 0.31, + "learning_rate": 4.922958778891244e-05, + "loss": 2.6584, + "step": 404500 + }, + { + "epoch": 0.31, + "learning_rate": 4.92286351314172e-05, + "loss": 2.6359, + "step": 405000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9227682473921955e-05, + "loss": 2.6, + "step": 405500 + }, + { + "epoch": 0.31, + "learning_rate": 4.922672981642671e-05, + "loss": 2.6522, + "step": 406000 + }, + { + "epoch": 0.31, + "learning_rate": 4.922577715893147e-05, + "loss": 2.6389, + "step": 406500 + }, + { + "epoch": 0.31, + "learning_rate": 4.922482450143623e-05, + "loss": 2.6525, + "step": 407000 + }, + { + "epoch": 0.31, + "learning_rate": 4.922387184394099e-05, + "loss": 2.6236, + "step": 407500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9222919186445746e-05, + "loss": 2.6291, + "step": 408000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9221966528950504e-05, + "loss": 2.6178, + "step": 408500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9221013871455256e-05, + "loss": 2.6508, + "step": 409000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9220061213960014e-05, + "loss": 2.6516, + "step": 409500 + }, + { + "epoch": 0.31, + "learning_rate": 4.921910855646478e-05, + "loss": 2.6492, + "step": 410000 + }, + { + "epoch": 0.31, + "learning_rate": 4.921815780428452e-05, + "loss": 2.6301, + "step": 410500 + }, + { + "epoch": 0.31, + "learning_rate": 4.921720514678928e-05, + "loss": 2.6367, + "step": 411000 + }, + { + "epoch": 0.31, + "learning_rate": 4.921625248929404e-05, + "loss": 2.6251, + "step": 411500 + }, + { + "epoch": 0.31, + "learning_rate": 4.9215299831798796e-05, + "loss": 2.6083, + "step": 412000 + }, + { + "epoch": 0.31, + "learning_rate": 4.9214347174303554e-05, + "loss": 2.639, + "step": 412500 + }, + { + "epoch": 0.31, + "learning_rate": 4.921339451680831e-05, + "loss": 2.6304, + "step": 413000 + }, + { + "epoch": 0.32, + "learning_rate": 4.921244376462806e-05, + "loss": 2.6551, + "step": 413500 + }, + { + "epoch": 0.32, + "learning_rate": 4.921149110713282e-05, + "loss": 2.6515, + "step": 414000 + }, + { + "epoch": 0.32, + "learning_rate": 4.921053844963757e-05, + "loss": 2.665, + "step": 414500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9209585792142335e-05, + "loss": 2.6419, + "step": 415000 + }, + { + "epoch": 0.32, + "learning_rate": 4.920863313464709e-05, + "loss": 2.6537, + "step": 415500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9207680477151845e-05, + "loss": 2.6293, + "step": 416000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9206727819656603e-05, + "loss": 2.6426, + "step": 416500 + }, + { + "epoch": 0.32, + "learning_rate": 4.920577706747635e-05, + "loss": 2.638, + "step": 417000 + }, + { + "epoch": 0.32, + "learning_rate": 4.92048263152961e-05, + "loss": 2.6342, + "step": 417500 + }, + { + "epoch": 0.32, + "learning_rate": 4.920387365780086e-05, + "loss": 2.6245, + "step": 418000 + }, + { + "epoch": 0.32, + "learning_rate": 4.920292100030561e-05, + "loss": 2.6507, + "step": 418500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9201970248125365e-05, + "loss": 2.639, + "step": 419000 + }, + { + "epoch": 0.32, + "learning_rate": 4.920101759063012e-05, + "loss": 2.5975, + "step": 419500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9200064933134875e-05, + "loss": 2.6178, + "step": 420000 + }, + { + "epoch": 0.32, + "eval_accuracy": 0.5182928021414346, + "eval_loss": 2.5693817138671875, + "eval_runtime": 9415.3679, + "eval_samples_per_second": 29.206, + "eval_steps_per_second": 7.302, + "step": 420000 + }, + { + "epoch": 0.32, + "learning_rate": 4.919911227563963e-05, + "loss": 2.662, + "step": 420500 + }, + { + "epoch": 0.32, + "learning_rate": 4.919815961814439e-05, + "loss": 2.6163, + "step": 421000 + }, + { + "epoch": 0.32, + "learning_rate": 4.919720696064915e-05, + "loss": 2.6503, + "step": 421500 + }, + { + "epoch": 0.32, + "learning_rate": 4.919625430315391e-05, + "loss": 2.6227, + "step": 422000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9195301645658666e-05, + "loss": 2.6163, + "step": 422500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9194348988163424e-05, + "loss": 2.6179, + "step": 423000 + }, + { + "epoch": 0.32, + "learning_rate": 4.9193396330668176e-05, + "loss": 2.6437, + "step": 423500 + }, + { + "epoch": 0.32, + "learning_rate": 4.919244367317294e-05, + "loss": 2.6422, + "step": 424000 + }, + { + "epoch": 0.32, + "learning_rate": 4.91914910156777e-05, + "loss": 2.6444, + "step": 424500 + }, + { + "epoch": 0.32, + "learning_rate": 4.919053835818245e-05, + "loss": 2.63, + "step": 425000 + }, + { + "epoch": 0.32, + "learning_rate": 4.918958951131719e-05, + "loss": 2.6384, + "step": 425500 + }, + { + "epoch": 0.32, + "learning_rate": 4.9188636853821954e-05, + "loss": 2.6253, + "step": 426000 + }, + { + "epoch": 0.33, + "learning_rate": 4.9187684196326706e-05, + "loss": 2.6414, + "step": 426500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9186731538831464e-05, + "loss": 2.6029, + "step": 427000 + }, + { + "epoch": 0.33, + "learning_rate": 4.918577888133622e-05, + "loss": 2.61, + "step": 427500 + }, + { + "epoch": 0.33, + "learning_rate": 4.918482622384098e-05, + "loss": 2.6562, + "step": 428000 + }, + { + "epoch": 0.33, + "learning_rate": 4.918387356634574e-05, + "loss": 2.6401, + "step": 428500 + }, + { + "epoch": 0.33, + "learning_rate": 4.91829209088505e-05, + "loss": 2.6153, + "step": 429000 + }, + { + "epoch": 0.33, + "learning_rate": 4.9181968251355255e-05, + "loss": 2.6088, + "step": 429500 + }, + { + "epoch": 0.33, + "learning_rate": 4.918101559386001e-05, + "loss": 2.6281, + "step": 430000 + }, + { + "epoch": 0.33, + "learning_rate": 4.9180062936364765e-05, + "loss": 2.6414, + "step": 430500 + }, + { + "epoch": 0.33, + "learning_rate": 4.917911027886953e-05, + "loss": 2.6408, + "step": 431000 + }, + { + "epoch": 0.33, + "learning_rate": 4.917815952668927e-05, + "loss": 2.6294, + "step": 431500 + }, + { + "epoch": 0.33, + "learning_rate": 4.917720686919403e-05, + "loss": 2.6377, + "step": 432000 + }, + { + "epoch": 0.33, + "learning_rate": 4.917625421169879e-05, + "loss": 2.6446, + "step": 432500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9175303459518537e-05, + "loss": 2.6278, + "step": 433000 + }, + { + "epoch": 0.33, + "learning_rate": 4.9174350802023295e-05, + "loss": 2.6108, + "step": 433500 + }, + { + "epoch": 0.33, + "learning_rate": 4.917339814452805e-05, + "loss": 2.6756, + "step": 434000 + }, + { + "epoch": 0.33, + "learning_rate": 4.917244548703281e-05, + "loss": 2.6358, + "step": 434500 + }, + { + "epoch": 0.33, + "learning_rate": 4.917149282953757e-05, + "loss": 2.6384, + "step": 435000 + }, + { + "epoch": 0.33, + "learning_rate": 4.917054017204232e-05, + "loss": 2.6002, + "step": 435500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9169587514547086e-05, + "loss": 2.6335, + "step": 436000 + }, + { + "epoch": 0.33, + "learning_rate": 4.9168636762366835e-05, + "loss": 2.6145, + "step": 436500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9167684104871586e-05, + "loss": 2.6213, + "step": 437000 + }, + { + "epoch": 0.33, + "learning_rate": 4.9166733352691334e-05, + "loss": 2.6218, + "step": 437500 + }, + { + "epoch": 0.33, + "learning_rate": 4.916578069519609e-05, + "loss": 2.6064, + "step": 438000 + }, + { + "epoch": 0.33, + "learning_rate": 4.916482803770085e-05, + "loss": 2.612, + "step": 438500 + }, + { + "epoch": 0.33, + "learning_rate": 4.916387538020561e-05, + "loss": 2.6512, + "step": 439000 + }, + { + "epoch": 0.33, + "learning_rate": 4.916292272271036e-05, + "loss": 2.6226, + "step": 439500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9161970065215126e-05, + "loss": 2.6488, + "step": 440000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9161017407719884e-05, + "loss": 2.6272, + "step": 440500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9160064750224636e-05, + "loss": 2.6011, + "step": 441000 + }, + { + "epoch": 0.34, + "learning_rate": 4.91591120927294e-05, + "loss": 2.6259, + "step": 441500 + }, + { + "epoch": 0.34, + "learning_rate": 4.915815943523415e-05, + "loss": 2.6375, + "step": 442000 + }, + { + "epoch": 0.34, + "learning_rate": 4.915720677773891e-05, + "loss": 2.638, + "step": 442500 + }, + { + "epoch": 0.34, + "learning_rate": 4.915625412024367e-05, + "loss": 2.6498, + "step": 443000 + }, + { + "epoch": 0.34, + "learning_rate": 4.915530146274843e-05, + "loss": 2.5921, + "step": 443500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9154352615883165e-05, + "loss": 2.6385, + "step": 444000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9153399958387924e-05, + "loss": 2.6273, + "step": 444500 + }, + { + "epoch": 0.34, + "learning_rate": 4.915244730089268e-05, + "loss": 2.6267, + "step": 445000 + }, + { + "epoch": 0.34, + "learning_rate": 4.915149464339744e-05, + "loss": 2.6169, + "step": 445500 + }, + { + "epoch": 0.34, + "learning_rate": 4.915054198590219e-05, + "loss": 2.6201, + "step": 446000 + }, + { + "epoch": 0.34, + "learning_rate": 4.914958932840695e-05, + "loss": 2.6552, + "step": 446500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9148636670911715e-05, + "loss": 2.6472, + "step": 447000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9147685918731457e-05, + "loss": 2.6187, + "step": 447500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9146733261236215e-05, + "loss": 2.6297, + "step": 448000 + }, + { + "epoch": 0.34, + "learning_rate": 4.914578060374097e-05, + "loss": 2.6266, + "step": 448500 + }, + { + "epoch": 0.34, + "learning_rate": 4.914482794624573e-05, + "loss": 2.6185, + "step": 449000 + }, + { + "epoch": 0.34, + "learning_rate": 4.914387719406548e-05, + "loss": 2.6239, + "step": 449500 + }, + { + "epoch": 0.34, + "learning_rate": 4.914292453657024e-05, + "loss": 2.6243, + "step": 450000 + }, + { + "epoch": 0.34, + "eval_accuracy": 0.5204107724611845, + "eval_loss": 2.5534162521362305, + "eval_runtime": 9415.6775, + "eval_samples_per_second": 29.205, + "eval_steps_per_second": 7.301, + "step": 450000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9141971879074996e-05, + "loss": 2.6308, + "step": 450500 + }, + { + "epoch": 0.34, + "learning_rate": 4.9141019221579755e-05, + "loss": 2.6045, + "step": 451000 + }, + { + "epoch": 0.34, + "learning_rate": 4.9140066564084506e-05, + "loss": 2.5877, + "step": 451500 + }, + { + "epoch": 0.34, + "learning_rate": 4.913911390658927e-05, + "loss": 2.629, + "step": 452000 + }, + { + "epoch": 0.34, + "learning_rate": 4.913816124909402e-05, + "loss": 2.6263, + "step": 452500 + }, + { + "epoch": 0.35, + "learning_rate": 4.913721049691377e-05, + "loss": 2.6047, + "step": 453000 + }, + { + "epoch": 0.35, + "learning_rate": 4.913625974473352e-05, + "loss": 2.5974, + "step": 453500 + }, + { + "epoch": 0.35, + "learning_rate": 4.913530708723828e-05, + "loss": 2.6318, + "step": 454000 + }, + { + "epoch": 0.35, + "learning_rate": 4.9134354429743036e-05, + "loss": 2.6453, + "step": 454500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9133401772247794e-05, + "loss": 2.6236, + "step": 455000 + }, + { + "epoch": 0.35, + "learning_rate": 4.913244911475255e-05, + "loss": 2.6533, + "step": 455500 + }, + { + "epoch": 0.35, + "learning_rate": 4.913150026788729e-05, + "loss": 2.6339, + "step": 456000 + }, + { + "epoch": 0.35, + "learning_rate": 4.913054761039205e-05, + "loss": 2.6059, + "step": 456500 + }, + { + "epoch": 0.35, + "learning_rate": 4.912959495289681e-05, + "loss": 2.6119, + "step": 457000 + }, + { + "epoch": 0.35, + "learning_rate": 4.912864229540156e-05, + "loss": 2.6488, + "step": 457500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9127689637906324e-05, + "loss": 2.6094, + "step": 458000 + }, + { + "epoch": 0.35, + "learning_rate": 4.9126736980411075e-05, + "loss": 2.6583, + "step": 458500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9125784322915834e-05, + "loss": 2.6252, + "step": 459000 + }, + { + "epoch": 0.35, + "learning_rate": 4.91248316654206e-05, + "loss": 2.6173, + "step": 459500 + }, + { + "epoch": 0.35, + "learning_rate": 4.912387900792535e-05, + "loss": 2.624, + "step": 460000 + }, + { + "epoch": 0.35, + "learning_rate": 4.912292635043011e-05, + "loss": 2.6011, + "step": 460500 + }, + { + "epoch": 0.35, + "learning_rate": 4.912197369293487e-05, + "loss": 2.5893, + "step": 461000 + }, + { + "epoch": 0.35, + "learning_rate": 4.9121021035439625e-05, + "loss": 2.6199, + "step": 461500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9120068377944376e-05, + "loss": 2.6008, + "step": 462000 + }, + { + "epoch": 0.35, + "learning_rate": 4.9119115720449135e-05, + "loss": 2.6024, + "step": 462500 + }, + { + "epoch": 0.35, + "learning_rate": 4.91181630629539e-05, + "loss": 2.6559, + "step": 463000 + }, + { + "epoch": 0.35, + "learning_rate": 4.911721231077364e-05, + "loss": 2.6475, + "step": 463500 + }, + { + "epoch": 0.35, + "learning_rate": 4.91162596532784e-05, + "loss": 2.6234, + "step": 464000 + }, + { + "epoch": 0.35, + "learning_rate": 4.911530699578316e-05, + "loss": 2.6034, + "step": 464500 + }, + { + "epoch": 0.35, + "learning_rate": 4.9114356243602906e-05, + "loss": 2.6108, + "step": 465000 + }, + { + "epoch": 0.35, + "learning_rate": 4.9113403586107664e-05, + "loss": 2.6313, + "step": 465500 + }, + { + "epoch": 0.36, + "learning_rate": 4.911245092861242e-05, + "loss": 2.6126, + "step": 466000 + }, + { + "epoch": 0.36, + "learning_rate": 4.911149827111718e-05, + "loss": 2.5916, + "step": 466500 + }, + { + "epoch": 0.36, + "learning_rate": 4.911054561362194e-05, + "loss": 2.6233, + "step": 467000 + }, + { + "epoch": 0.36, + "learning_rate": 4.910959295612669e-05, + "loss": 2.6083, + "step": 467500 + }, + { + "epoch": 0.36, + "learning_rate": 4.9108640298631456e-05, + "loss": 2.5778, + "step": 468000 + }, + { + "epoch": 0.36, + "learning_rate": 4.9107687641136214e-05, + "loss": 2.6258, + "step": 468500 + }, + { + "epoch": 0.36, + "learning_rate": 4.9106734983640966e-05, + "loss": 2.595, + "step": 469000 + }, + { + "epoch": 0.36, + "learning_rate": 4.9105784231460714e-05, + "loss": 2.6095, + "step": 469500 + }, + { + "epoch": 0.36, + "learning_rate": 4.910483157396547e-05, + "loss": 2.6012, + "step": 470000 + }, + { + "epoch": 0.36, + "learning_rate": 4.910387891647023e-05, + "loss": 2.6161, + "step": 470500 + }, + { + "epoch": 0.36, + "learning_rate": 4.910292625897499e-05, + "loss": 2.5923, + "step": 471000 + }, + { + "epoch": 0.36, + "learning_rate": 4.910197360147975e-05, + "loss": 2.6197, + "step": 471500 + }, + { + "epoch": 0.36, + "learning_rate": 4.9101020943984505e-05, + "loss": 2.61, + "step": 472000 + }, + { + "epoch": 0.36, + "learning_rate": 4.910006828648926e-05, + "loss": 2.5844, + "step": 472500 + }, + { + "epoch": 0.36, + "learning_rate": 4.909911562899402e-05, + "loss": 2.6276, + "step": 473000 + }, + { + "epoch": 0.36, + "learning_rate": 4.909816297149878e-05, + "loss": 2.6134, + "step": 473500 + }, + { + "epoch": 0.36, + "learning_rate": 4.909721221931852e-05, + "loss": 2.5853, + "step": 474000 + }, + { + "epoch": 0.36, + "learning_rate": 4.909626146713827e-05, + "loss": 2.638, + "step": 474500 + }, + { + "epoch": 0.36, + "learning_rate": 4.909530880964303e-05, + "loss": 2.5848, + "step": 475000 + }, + { + "epoch": 0.36, + "learning_rate": 4.909435615214779e-05, + "loss": 2.6422, + "step": 475500 + }, + { + "epoch": 0.36, + "learning_rate": 4.9093405399967535e-05, + "loss": 2.6272, + "step": 476000 + }, + { + "epoch": 0.36, + "learning_rate": 4.909245274247229e-05, + "loss": 2.6191, + "step": 476500 + }, + { + "epoch": 0.36, + "learning_rate": 4.909150008497705e-05, + "loss": 2.639, + "step": 477000 + }, + { + "epoch": 0.36, + "learning_rate": 4.909054742748181e-05, + "loss": 2.5862, + "step": 477500 + }, + { + "epoch": 0.36, + "learning_rate": 4.908959476998657e-05, + "loss": 2.6111, + "step": 478000 + }, + { + "epoch": 0.36, + "learning_rate": 4.9088642112491326e-05, + "loss": 2.6244, + "step": 478500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9087689454996085e-05, + "loss": 2.6131, + "step": 479000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9086736797500836e-05, + "loss": 2.6034, + "step": 479500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9085784140005595e-05, + "loss": 2.6093, + "step": 480000 + }, + { + "epoch": 0.37, + "eval_accuracy": 0.5221639565795111, + "eval_loss": 2.540274143218994, + "eval_runtime": 9413.8665, + "eval_samples_per_second": 29.211, + "eval_steps_per_second": 7.303, + "step": 480000 + }, + { + "epoch": 0.37, + "learning_rate": 4.908483148251035e-05, + "loss": 2.5819, + "step": 480500 + }, + { + "epoch": 0.37, + "learning_rate": 4.908388263564509e-05, + "loss": 2.622, + "step": 481000 + }, + { + "epoch": 0.37, + "learning_rate": 4.908292997814985e-05, + "loss": 2.6024, + "step": 481500 + }, + { + "epoch": 0.37, + "learning_rate": 4.908197732065461e-05, + "loss": 2.6016, + "step": 482000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9081024663159366e-05, + "loss": 2.6061, + "step": 482500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9080072005664124e-05, + "loss": 2.6218, + "step": 483000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9079119348168876e-05, + "loss": 2.6072, + "step": 483500 + }, + { + "epoch": 0.37, + "learning_rate": 4.907816669067364e-05, + "loss": 2.5975, + "step": 484000 + }, + { + "epoch": 0.37, + "learning_rate": 4.90772140331784e-05, + "loss": 2.6154, + "step": 484500 + }, + { + "epoch": 0.37, + "learning_rate": 4.907626137568315e-05, + "loss": 2.6064, + "step": 485000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9075308718187916e-05, + "loss": 2.5867, + "step": 485500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9074357966007664e-05, + "loss": 2.6123, + "step": 486000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9073405308512415e-05, + "loss": 2.5768, + "step": 486500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9072452651017174e-05, + "loss": 2.5982, + "step": 487000 + }, + { + "epoch": 0.37, + "learning_rate": 4.907149999352193e-05, + "loss": 2.6092, + "step": 487500 + }, + { + "epoch": 0.37, + "learning_rate": 4.907054733602669e-05, + "loss": 2.635, + "step": 488000 + }, + { + "epoch": 0.37, + "learning_rate": 4.906959467853144e-05, + "loss": 2.6102, + "step": 488500 + }, + { + "epoch": 0.37, + "learning_rate": 4.906864202103621e-05, + "loss": 2.5923, + "step": 489000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9067689363540965e-05, + "loss": 2.6081, + "step": 489500 + }, + { + "epoch": 0.37, + "learning_rate": 4.9066740516675703e-05, + "loss": 2.5921, + "step": 490000 + }, + { + "epoch": 0.37, + "learning_rate": 4.9065787859180455e-05, + "loss": 2.6144, + "step": 490500 + }, + { + "epoch": 0.37, + "learning_rate": 4.906483520168521e-05, + "loss": 2.6352, + "step": 491000 + }, + { + "epoch": 0.37, + "learning_rate": 4.906388254418997e-05, + "loss": 2.5999, + "step": 491500 + }, + { + "epoch": 0.37, + "learning_rate": 4.906292988669473e-05, + "loss": 2.6106, + "step": 492000 + }, + { + "epoch": 0.38, + "learning_rate": 4.906197722919949e-05, + "loss": 2.6154, + "step": 492500 + }, + { + "epoch": 0.38, + "learning_rate": 4.9061028382334226e-05, + "loss": 2.639, + "step": 493000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9060075724838985e-05, + "loss": 2.5698, + "step": 493500 + }, + { + "epoch": 0.38, + "learning_rate": 4.905912306734374e-05, + "loss": 2.6098, + "step": 494000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9058170409848494e-05, + "loss": 2.5431, + "step": 494500 + }, + { + "epoch": 0.38, + "learning_rate": 4.905721775235326e-05, + "loss": 2.6097, + "step": 495000 + }, + { + "epoch": 0.38, + "learning_rate": 4.905626509485802e-05, + "loss": 2.6005, + "step": 495500 + }, + { + "epoch": 0.38, + "learning_rate": 4.905531434267776e-05, + "loss": 2.6057, + "step": 496000 + }, + { + "epoch": 0.38, + "learning_rate": 4.905436168518252e-05, + "loss": 2.598, + "step": 496500 + }, + { + "epoch": 0.38, + "learning_rate": 4.905340902768728e-05, + "loss": 2.6414, + "step": 497000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9052456370192034e-05, + "loss": 2.6055, + "step": 497500 + }, + { + "epoch": 0.38, + "learning_rate": 4.905150561801178e-05, + "loss": 2.6116, + "step": 498000 + }, + { + "epoch": 0.38, + "learning_rate": 4.905055296051654e-05, + "loss": 2.6138, + "step": 498500 + }, + { + "epoch": 0.38, + "learning_rate": 4.90496003030213e-05, + "loss": 2.5881, + "step": 499000 + }, + { + "epoch": 0.38, + "learning_rate": 4.904864764552606e-05, + "loss": 2.591, + "step": 499500 + }, + { + "epoch": 0.38, + "learning_rate": 4.9047694988030816e-05, + "loss": 2.5928, + "step": 500000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9046742330535574e-05, + "loss": 2.5659, + "step": 500500 + }, + { + "epoch": 0.38, + "learning_rate": 4.9045789673040325e-05, + "loss": 2.6043, + "step": 501000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9044837015545084e-05, + "loss": 2.5973, + "step": 501500 + }, + { + "epoch": 0.38, + "learning_rate": 4.904388435804985e-05, + "loss": 2.5438, + "step": 502000 + }, + { + "epoch": 0.38, + "learning_rate": 4.90429317005546e-05, + "loss": 2.6249, + "step": 502500 + }, + { + "epoch": 0.38, + "learning_rate": 4.904198094837435e-05, + "loss": 2.5776, + "step": 503000 + }, + { + "epoch": 0.38, + "learning_rate": 4.904102829087911e-05, + "loss": 2.602, + "step": 503500 + }, + { + "epoch": 0.38, + "learning_rate": 4.9040075633383865e-05, + "loss": 2.6077, + "step": 504000 + }, + { + "epoch": 0.38, + "learning_rate": 4.9039122975888623e-05, + "loss": 2.6264, + "step": 504500 + }, + { + "epoch": 0.38, + "learning_rate": 4.9038170318393375e-05, + "loss": 2.5949, + "step": 505000 + }, + { + "epoch": 0.39, + "learning_rate": 4.903721766089814e-05, + "loss": 2.583, + "step": 505500 + }, + { + "epoch": 0.39, + "learning_rate": 4.903626500340289e-05, + "loss": 2.6052, + "step": 506000 + }, + { + "epoch": 0.39, + "learning_rate": 4.903531234590765e-05, + "loss": 2.5984, + "step": 506500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9034359688412415e-05, + "loss": 2.6108, + "step": 507000 + }, + { + "epoch": 0.39, + "learning_rate": 4.9033407030917166e-05, + "loss": 2.5899, + "step": 507500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9032456278736915e-05, + "loss": 2.6324, + "step": 508000 + }, + { + "epoch": 0.39, + "learning_rate": 4.903150362124167e-05, + "loss": 2.5811, + "step": 508500 + }, + { + "epoch": 0.39, + "learning_rate": 4.903055096374643e-05, + "loss": 2.5767, + "step": 509000 + }, + { + "epoch": 0.39, + "learning_rate": 4.902959830625119e-05, + "loss": 2.6209, + "step": 509500 + }, + { + "epoch": 0.39, + "learning_rate": 4.902864564875594e-05, + "loss": 2.5892, + "step": 510000 + }, + { + "epoch": 0.39, + "eval_accuracy": 0.5239542630855191, + "eval_loss": 2.527653455734253, + "eval_runtime": 9416.2978, + "eval_samples_per_second": 29.203, + "eval_steps_per_second": 7.301, + "step": 510000 + }, + { + "epoch": 0.39, + "learning_rate": 4.9027692991260706e-05, + "loss": 2.5818, + "step": 510500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9026742239080454e-05, + "loss": 2.5787, + "step": 511000 + }, + { + "epoch": 0.39, + "learning_rate": 4.9025789581585206e-05, + "loss": 2.598, + "step": 511500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9024836924089964e-05, + "loss": 2.5852, + "step": 512000 + }, + { + "epoch": 0.39, + "learning_rate": 4.902388426659472e-05, + "loss": 2.5897, + "step": 512500 + }, + { + "epoch": 0.39, + "learning_rate": 4.902293160909948e-05, + "loss": 2.6276, + "step": 513000 + }, + { + "epoch": 0.39, + "learning_rate": 4.902197895160424e-05, + "loss": 2.6128, + "step": 513500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9021026294109e-05, + "loss": 2.6022, + "step": 514000 + }, + { + "epoch": 0.39, + "learning_rate": 4.9020075541928746e-05, + "loss": 2.592, + "step": 514500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9019122884433504e-05, + "loss": 2.6054, + "step": 515000 + }, + { + "epoch": 0.39, + "learning_rate": 4.901817022693826e-05, + "loss": 2.5991, + "step": 515500 + }, + { + "epoch": 0.39, + "learning_rate": 4.901721947475801e-05, + "loss": 2.6033, + "step": 516000 + }, + { + "epoch": 0.39, + "learning_rate": 4.901626872257776e-05, + "loss": 2.5981, + "step": 516500 + }, + { + "epoch": 0.39, + "learning_rate": 4.901531606508251e-05, + "loss": 2.6045, + "step": 517000 + }, + { + "epoch": 0.39, + "learning_rate": 4.901436340758727e-05, + "loss": 2.5973, + "step": 517500 + }, + { + "epoch": 0.39, + "learning_rate": 4.9013410750092034e-05, + "loss": 2.5758, + "step": 518000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9012458092596785e-05, + "loss": 2.6193, + "step": 518500 + }, + { + "epoch": 0.4, + "learning_rate": 4.9011505435101543e-05, + "loss": 2.591, + "step": 519000 + }, + { + "epoch": 0.4, + "learning_rate": 4.90105527776063e-05, + "loss": 2.5944, + "step": 519500 + }, + { + "epoch": 0.4, + "learning_rate": 4.900960012011106e-05, + "loss": 2.5772, + "step": 520000 + }, + { + "epoch": 0.4, + "learning_rate": 4.900864746261581e-05, + "loss": 2.6243, + "step": 520500 + }, + { + "epoch": 0.4, + "learning_rate": 4.9007694805120577e-05, + "loss": 2.6039, + "step": 521000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9006744052940325e-05, + "loss": 2.5787, + "step": 521500 + }, + { + "epoch": 0.4, + "learning_rate": 4.9005791395445076e-05, + "loss": 2.5598, + "step": 522000 + }, + { + "epoch": 0.4, + "learning_rate": 4.9004838737949835e-05, + "loss": 2.6046, + "step": 522500 + }, + { + "epoch": 0.4, + "learning_rate": 4.90038860804546e-05, + "loss": 2.5995, + "step": 523000 + }, + { + "epoch": 0.4, + "learning_rate": 4.900293342295935e-05, + "loss": 2.612, + "step": 523500 + }, + { + "epoch": 0.4, + "learning_rate": 4.900198076546411e-05, + "loss": 2.5767, + "step": 524000 + }, + { + "epoch": 0.4, + "learning_rate": 4.900102810796887e-05, + "loss": 2.5863, + "step": 524500 + }, + { + "epoch": 0.4, + "learning_rate": 4.9000075450473626e-05, + "loss": 2.5699, + "step": 525000 + }, + { + "epoch": 0.4, + "learning_rate": 4.8999122792978384e-05, + "loss": 2.5862, + "step": 525500 + }, + { + "epoch": 0.4, + "learning_rate": 4.899817204079813e-05, + "loss": 2.5888, + "step": 526000 + }, + { + "epoch": 0.4, + "learning_rate": 4.899722128861788e-05, + "loss": 2.5789, + "step": 526500 + }, + { + "epoch": 0.4, + "learning_rate": 4.899626863112264e-05, + "loss": 2.5935, + "step": 527000 + }, + { + "epoch": 0.4, + "learning_rate": 4.899531597362739e-05, + "loss": 2.5866, + "step": 527500 + }, + { + "epoch": 0.4, + "learning_rate": 4.8994363316132156e-05, + "loss": 2.5645, + "step": 528000 + }, + { + "epoch": 0.4, + "learning_rate": 4.899341065863691e-05, + "loss": 2.587, + "step": 528500 + }, + { + "epoch": 0.4, + "learning_rate": 4.8992458001141666e-05, + "loss": 2.5689, + "step": 529000 + }, + { + "epoch": 0.4, + "learning_rate": 4.8991505343646424e-05, + "loss": 2.5819, + "step": 529500 + }, + { + "epoch": 0.4, + "learning_rate": 4.899055268615118e-05, + "loss": 2.6145, + "step": 530000 + }, + { + "epoch": 0.4, + "learning_rate": 4.898960002865594e-05, + "loss": 2.5692, + "step": 530500 + }, + { + "epoch": 0.4, + "learning_rate": 4.898864927647569e-05, + "loss": 2.5872, + "step": 531000 + }, + { + "epoch": 0.41, + "learning_rate": 4.898769661898045e-05, + "loss": 2.593, + "step": 531500 + }, + { + "epoch": 0.41, + "learning_rate": 4.8986743961485205e-05, + "loss": 2.5856, + "step": 532000 + }, + { + "epoch": 0.41, + "learning_rate": 4.898579130398996e-05, + "loss": 2.5836, + "step": 532500 + }, + { + "epoch": 0.41, + "learning_rate": 4.8984838646494715e-05, + "loss": 2.6089, + "step": 533000 + }, + { + "epoch": 0.41, + "learning_rate": 4.898388598899948e-05, + "loss": 2.5723, + "step": 533500 + }, + { + "epoch": 0.41, + "learning_rate": 4.898293333150423e-05, + "loss": 2.5817, + "step": 534000 + }, + { + "epoch": 0.41, + "learning_rate": 4.898198067400899e-05, + "loss": 2.5881, + "step": 534500 + }, + { + "epoch": 0.41, + "learning_rate": 4.898102801651375e-05, + "loss": 2.5845, + "step": 535000 + }, + { + "epoch": 0.41, + "learning_rate": 4.8980077264333497e-05, + "loss": 2.6131, + "step": 535500 + }, + { + "epoch": 0.41, + "learning_rate": 4.8979124606838255e-05, + "loss": 2.5602, + "step": 536000 + }, + { + "epoch": 0.41, + "learning_rate": 4.897817194934301e-05, + "loss": 2.5914, + "step": 536500 + }, + { + "epoch": 0.41, + "learning_rate": 4.897721929184777e-05, + "loss": 2.6114, + "step": 537000 + }, + { + "epoch": 0.41, + "learning_rate": 4.897626853966752e-05, + "loss": 2.5844, + "step": 537500 + }, + { + "epoch": 0.41, + "learning_rate": 4.897531588217227e-05, + "loss": 2.5795, + "step": 538000 + }, + { + "epoch": 0.41, + "learning_rate": 4.8974363224677036e-05, + "loss": 2.5649, + "step": 538500 + }, + { + "epoch": 0.41, + "learning_rate": 4.897341056718179e-05, + "loss": 2.5521, + "step": 539000 + }, + { + "epoch": 0.41, + "learning_rate": 4.8972457909686546e-05, + "loss": 2.567, + "step": 539500 + }, + { + "epoch": 0.41, + "learning_rate": 4.8971505252191304e-05, + "loss": 2.5747, + "step": 540000 + }, + { + "epoch": 0.41, + "eval_accuracy": 0.5256214468279793, + "eval_loss": 2.5165464878082275, + "eval_runtime": 9416.0522, + "eval_samples_per_second": 29.204, + "eval_steps_per_second": 7.301, + "step": 540000 + }, + { + "epoch": 0.41, + "learning_rate": 4.897055259469606e-05, + "loss": 2.5886, + "step": 540500 + }, + { + "epoch": 0.41, + "learning_rate": 4.896959993720082e-05, + "loss": 2.5949, + "step": 541000 + }, + { + "epoch": 0.41, + "learning_rate": 4.896864727970558e-05, + "loss": 2.5847, + "step": 541500 + }, + { + "epoch": 0.41, + "learning_rate": 4.896769462221034e-05, + "loss": 2.5993, + "step": 542000 + }, + { + "epoch": 0.41, + "learning_rate": 4.8966745775345076e-05, + "loss": 2.5967, + "step": 542500 + }, + { + "epoch": 0.41, + "learning_rate": 4.8965793117849834e-05, + "loss": 2.5327, + "step": 543000 + }, + { + "epoch": 0.41, + "learning_rate": 4.8964840460354586e-05, + "loss": 2.5844, + "step": 543500 + }, + { + "epoch": 0.41, + "learning_rate": 4.896388780285935e-05, + "loss": 2.5667, + "step": 544000 + }, + { + "epoch": 0.41, + "learning_rate": 4.89629351453641e-05, + "loss": 2.5514, + "step": 544500 + }, + { + "epoch": 0.42, + "learning_rate": 4.896198248786886e-05, + "loss": 2.5898, + "step": 545000 + }, + { + "epoch": 0.42, + "learning_rate": 4.896103173568861e-05, + "loss": 2.6, + "step": 545500 + }, + { + "epoch": 0.42, + "learning_rate": 4.896007907819337e-05, + "loss": 2.5687, + "step": 546000 + }, + { + "epoch": 0.42, + "learning_rate": 4.8959126420698125e-05, + "loss": 2.5999, + "step": 546500 + }, + { + "epoch": 0.42, + "learning_rate": 4.8958173763202884e-05, + "loss": 2.5746, + "step": 547000 + }, + { + "epoch": 0.42, + "learning_rate": 4.895722110570764e-05, + "loss": 2.5982, + "step": 547500 + }, + { + "epoch": 0.42, + "learning_rate": 4.895627035352739e-05, + "loss": 2.6103, + "step": 548000 + }, + { + "epoch": 0.42, + "learning_rate": 4.895531960134714e-05, + "loss": 2.569, + "step": 548500 + }, + { + "epoch": 0.42, + "learning_rate": 4.895436694385189e-05, + "loss": 2.5567, + "step": 549000 + }, + { + "epoch": 0.42, + "learning_rate": 4.8953414286356655e-05, + "loss": 2.5987, + "step": 549500 + }, + { + "epoch": 0.42, + "learning_rate": 4.8952461628861407e-05, + "loss": 2.592, + "step": 550000 + }, + { + "epoch": 0.42, + "learning_rate": 4.8951508971366165e-05, + "loss": 2.5926, + "step": 550500 + }, + { + "epoch": 0.42, + "learning_rate": 4.895055631387093e-05, + "loss": 2.5759, + "step": 551000 + }, + { + "epoch": 0.42, + "learning_rate": 4.894960556169067e-05, + "loss": 2.5833, + "step": 551500 + }, + { + "epoch": 0.42, + "learning_rate": 4.894865290419543e-05, + "loss": 2.5767, + "step": 552000 + }, + { + "epoch": 0.42, + "learning_rate": 4.894770024670019e-05, + "loss": 2.5764, + "step": 552500 + }, + { + "epoch": 0.42, + "learning_rate": 4.8946747589204946e-05, + "loss": 2.5596, + "step": 553000 + }, + { + "epoch": 0.42, + "learning_rate": 4.8945794931709705e-05, + "loss": 2.5815, + "step": 553500 + }, + { + "epoch": 0.42, + "learning_rate": 4.894484417952945e-05, + "loss": 2.6082, + "step": 554000 + }, + { + "epoch": 0.42, + "learning_rate": 4.8943893427349194e-05, + "loss": 2.5899, + "step": 554500 + }, + { + "epoch": 0.42, + "learning_rate": 4.894294076985396e-05, + "loss": 2.5846, + "step": 555000 + }, + { + "epoch": 0.42, + "learning_rate": 4.894198811235871e-05, + "loss": 2.5914, + "step": 555500 + }, + { + "epoch": 0.42, + "learning_rate": 4.894103545486347e-05, + "loss": 2.5745, + "step": 556000 + }, + { + "epoch": 0.42, + "learning_rate": 4.894008470268322e-05, + "loss": 2.5803, + "step": 556500 + }, + { + "epoch": 0.42, + "learning_rate": 4.8939132045187976e-05, + "loss": 2.5613, + "step": 557000 + }, + { + "epoch": 0.42, + "learning_rate": 4.8938179387692734e-05, + "loss": 2.6123, + "step": 557500 + }, + { + "epoch": 0.43, + "learning_rate": 4.893722673019749e-05, + "loss": 2.5578, + "step": 558000 + }, + { + "epoch": 0.43, + "learning_rate": 4.893627407270225e-05, + "loss": 2.57, + "step": 558500 + }, + { + "epoch": 0.43, + "learning_rate": 4.893532141520701e-05, + "loss": 2.5602, + "step": 559000 + }, + { + "epoch": 0.43, + "learning_rate": 4.893436875771176e-05, + "loss": 2.593, + "step": 559500 + }, + { + "epoch": 0.43, + "learning_rate": 4.8933416100216525e-05, + "loss": 2.577, + "step": 560000 + }, + { + "epoch": 0.43, + "learning_rate": 4.8932463442721284e-05, + "loss": 2.5641, + "step": 560500 + }, + { + "epoch": 0.43, + "learning_rate": 4.8931510785226035e-05, + "loss": 2.5776, + "step": 561000 + }, + { + "epoch": 0.43, + "learning_rate": 4.8930558127730794e-05, + "loss": 2.5898, + "step": 561500 + }, + { + "epoch": 0.43, + "learning_rate": 4.892960547023555e-05, + "loss": 2.5663, + "step": 562000 + }, + { + "epoch": 0.43, + "learning_rate": 4.89286547180553e-05, + "loss": 2.5632, + "step": 562500 + }, + { + "epoch": 0.43, + "learning_rate": 4.892770206056006e-05, + "loss": 2.5737, + "step": 563000 + }, + { + "epoch": 0.43, + "learning_rate": 4.892674940306482e-05, + "loss": 2.5655, + "step": 563500 + }, + { + "epoch": 0.43, + "learning_rate": 4.8925796745569575e-05, + "loss": 2.5296, + "step": 564000 + }, + { + "epoch": 0.43, + "learning_rate": 4.892484599338932e-05, + "loss": 2.5813, + "step": 564500 + }, + { + "epoch": 0.43, + "learning_rate": 4.8923893335894075e-05, + "loss": 2.5744, + "step": 565000 + }, + { + "epoch": 0.43, + "learning_rate": 4.892294258371383e-05, + "loss": 2.5764, + "step": 565500 + }, + { + "epoch": 0.43, + "learning_rate": 4.892198992621859e-05, + "loss": 2.5807, + "step": 566000 + }, + { + "epoch": 0.43, + "learning_rate": 4.892103726872334e-05, + "loss": 2.5906, + "step": 566500 + }, + { + "epoch": 0.43, + "learning_rate": 4.89200846112281e-05, + "loss": 2.5214, + "step": 567000 + }, + { + "epoch": 0.43, + "learning_rate": 4.8919131953732856e-05, + "loss": 2.5861, + "step": 567500 + }, + { + "epoch": 0.43, + "learning_rate": 4.8918179296237615e-05, + "loss": 2.5611, + "step": 568000 + }, + { + "epoch": 0.43, + "learning_rate": 4.891722663874237e-05, + "loss": 2.5639, + "step": 568500 + }, + { + "epoch": 0.43, + "learning_rate": 4.891627398124713e-05, + "loss": 2.5643, + "step": 569000 + }, + { + "epoch": 0.43, + "learning_rate": 4.891532132375189e-05, + "loss": 2.5686, + "step": 569500 + }, + { + "epoch": 0.43, + "learning_rate": 4.891436866625664e-05, + "loss": 2.5763, + "step": 570000 + }, + { + "epoch": 0.43, + "eval_accuracy": 0.5270845587489317, + "eval_loss": 2.505284547805786, + "eval_runtime": 9432.2342, + "eval_samples_per_second": 29.154, + "eval_steps_per_second": 7.289, + "step": 570000 + }, + { + "epoch": 0.43, + "learning_rate": 4.8913416008761406e-05, + "loss": 2.5246, + "step": 570500 + }, + { + "epoch": 0.44, + "learning_rate": 4.891246335126616e-05, + "loss": 2.5753, + "step": 571000 + }, + { + "epoch": 0.44, + "learning_rate": 4.8911510693770916e-05, + "loss": 2.585, + "step": 571500 + }, + { + "epoch": 0.44, + "learning_rate": 4.8910559941590664e-05, + "loss": 2.5768, + "step": 572000 + }, + { + "epoch": 0.44, + "learning_rate": 4.890960728409542e-05, + "loss": 2.6062, + "step": 572500 + }, + { + "epoch": 0.44, + "learning_rate": 4.890865462660018e-05, + "loss": 2.5534, + "step": 573000 + }, + { + "epoch": 0.44, + "learning_rate": 4.890770387441993e-05, + "loss": 2.5247, + "step": 573500 + }, + { + "epoch": 0.44, + "learning_rate": 4.890675121692469e-05, + "loss": 2.5756, + "step": 574000 + }, + { + "epoch": 0.44, + "learning_rate": 4.8905798559429445e-05, + "loss": 2.5494, + "step": 574500 + }, + { + "epoch": 0.44, + "learning_rate": 4.8904847807249194e-05, + "loss": 2.5725, + "step": 575000 + }, + { + "epoch": 0.44, + "learning_rate": 4.8903895149753945e-05, + "loss": 2.5579, + "step": 575500 + }, + { + "epoch": 0.44, + "learning_rate": 4.890294249225871e-05, + "loss": 2.5732, + "step": 576000 + }, + { + "epoch": 0.44, + "learning_rate": 4.890198983476347e-05, + "loss": 2.5871, + "step": 576500 + }, + { + "epoch": 0.44, + "learning_rate": 4.890103717726822e-05, + "loss": 2.5768, + "step": 577000 + }, + { + "epoch": 0.44, + "learning_rate": 4.890008642508797e-05, + "loss": 2.5641, + "step": 577500 + }, + { + "epoch": 0.44, + "learning_rate": 4.8899133767592733e-05, + "loss": 2.5499, + "step": 578000 + }, + { + "epoch": 0.44, + "learning_rate": 4.8898181110097485e-05, + "loss": 2.5572, + "step": 578500 + }, + { + "epoch": 0.44, + "learning_rate": 4.889722845260224e-05, + "loss": 2.5845, + "step": 579000 + }, + { + "epoch": 0.44, + "learning_rate": 4.8896275795107e-05, + "loss": 2.539, + "step": 579500 + }, + { + "epoch": 0.44, + "learning_rate": 4.889532313761176e-05, + "loss": 2.5803, + "step": 580000 + }, + { + "epoch": 0.44, + "learning_rate": 4.889437048011651e-05, + "loss": 2.601, + "step": 580500 + }, + { + "epoch": 0.44, + "learning_rate": 4.8893417822621276e-05, + "loss": 2.5539, + "step": 581000 + }, + { + "epoch": 0.44, + "learning_rate": 4.8892465165126035e-05, + "loss": 2.5432, + "step": 581500 + }, + { + "epoch": 0.44, + "learning_rate": 4.8891514412945776e-05, + "loss": 2.5968, + "step": 582000 + }, + { + "epoch": 0.44, + "learning_rate": 4.8890561755450535e-05, + "loss": 2.5454, + "step": 582500 + }, + { + "epoch": 0.44, + "learning_rate": 4.88896090979553e-05, + "loss": 2.5819, + "step": 583000 + }, + { + "epoch": 0.44, + "learning_rate": 4.888865644046005e-05, + "loss": 2.5688, + "step": 583500 + }, + { + "epoch": 0.45, + "learning_rate": 4.888770378296481e-05, + "loss": 2.5972, + "step": 584000 + }, + { + "epoch": 0.45, + "learning_rate": 4.888675303078456e-05, + "loss": 2.5718, + "step": 584500 + }, + { + "epoch": 0.45, + "learning_rate": 4.8885800373289316e-05, + "loss": 2.5602, + "step": 585000 + }, + { + "epoch": 0.45, + "learning_rate": 4.8884847715794074e-05, + "loss": 2.5643, + "step": 585500 + }, + { + "epoch": 0.45, + "learning_rate": 4.8883895058298826e-05, + "loss": 2.5745, + "step": 586000 + }, + { + "epoch": 0.45, + "learning_rate": 4.888294240080359e-05, + "loss": 2.5657, + "step": 586500 + }, + { + "epoch": 0.45, + "learning_rate": 4.888198974330834e-05, + "loss": 2.5658, + "step": 587000 + }, + { + "epoch": 0.45, + "learning_rate": 4.88810370858131e-05, + "loss": 2.5612, + "step": 587500 + }, + { + "epoch": 0.45, + "learning_rate": 4.8880084428317866e-05, + "loss": 2.5861, + "step": 588000 + }, + { + "epoch": 0.45, + "learning_rate": 4.887913367613761e-05, + "loss": 2.5615, + "step": 588500 + }, + { + "epoch": 0.45, + "learning_rate": 4.8878181018642365e-05, + "loss": 2.5519, + "step": 589000 + }, + { + "epoch": 0.45, + "learning_rate": 4.8877228361147124e-05, + "loss": 2.5352, + "step": 589500 + }, + { + "epoch": 0.45, + "learning_rate": 4.887627760896687e-05, + "loss": 2.6373, + "step": 590000 + }, + { + "epoch": 0.45, + "learning_rate": 4.887532495147163e-05, + "loss": 2.5981, + "step": 590500 + }, + { + "epoch": 0.45, + "learning_rate": 4.887437229397639e-05, + "loss": 2.5953, + "step": 591000 + }, + { + "epoch": 0.45, + "learning_rate": 4.887341963648115e-05, + "loss": 2.5567, + "step": 591500 + }, + { + "epoch": 0.45, + "learning_rate": 4.8872466978985905e-05, + "loss": 2.5826, + "step": 592000 + }, + { + "epoch": 0.45, + "learning_rate": 4.8871516226805653e-05, + "loss": 2.5662, + "step": 592500 + }, + { + "epoch": 0.45, + "learning_rate": 4.8870563569310405e-05, + "loss": 2.582, + "step": 593000 + }, + { + "epoch": 0.45, + "learning_rate": 4.886961091181517e-05, + "loss": 2.5929, + "step": 593500 + }, + { + "epoch": 0.45, + "learning_rate": 4.886866015963492e-05, + "loss": 2.5374, + "step": 594000 + }, + { + "epoch": 0.45, + "learning_rate": 4.886770750213967e-05, + "loss": 2.5856, + "step": 594500 + }, + { + "epoch": 0.45, + "learning_rate": 4.886675484464443e-05, + "loss": 2.5694, + "step": 595000 + }, + { + "epoch": 0.45, + "learning_rate": 4.8865802187149186e-05, + "loss": 2.5857, + "step": 595500 + }, + { + "epoch": 0.45, + "learning_rate": 4.8864849529653945e-05, + "loss": 2.5597, + "step": 596000 + }, + { + "epoch": 0.45, + "learning_rate": 4.886389877747369e-05, + "loss": 2.5636, + "step": 596500 + }, + { + "epoch": 0.45, + "learning_rate": 4.886294802529344e-05, + "loss": 2.5573, + "step": 597000 + }, + { + "epoch": 0.46, + "learning_rate": 4.88619953677982e-05, + "loss": 2.5692, + "step": 597500 + }, + { + "epoch": 0.46, + "learning_rate": 4.886104271030296e-05, + "loss": 2.5766, + "step": 598000 + }, + { + "epoch": 0.46, + "learning_rate": 4.886009005280771e-05, + "loss": 2.5503, + "step": 598500 + }, + { + "epoch": 0.46, + "learning_rate": 4.885913739531247e-05, + "loss": 2.5367, + "step": 599000 + }, + { + "epoch": 0.46, + "learning_rate": 4.8858184737817226e-05, + "loss": 2.5407, + "step": 599500 + }, + { + "epoch": 0.46, + "learning_rate": 4.8857232080321984e-05, + "loss": 2.5422, + "step": 600000 + }, + { + "epoch": 0.46, + "eval_accuracy": 0.5284035143749843, + "eval_loss": 2.4944164752960205, + "eval_runtime": 9408.0544, + "eval_samples_per_second": 29.229, + "eval_steps_per_second": 7.307, + "step": 600000 + }, + { + "epoch": 0.46, + "learning_rate": 4.885627942282674e-05, + "loss": 2.5282, + "step": 600500 + }, + { + "epoch": 0.46, + "learning_rate": 4.88553267653315e-05, + "loss": 2.5668, + "step": 601000 + }, + { + "epoch": 0.46, + "learning_rate": 4.885437410783626e-05, + "loss": 2.5304, + "step": 601500 + }, + { + "epoch": 0.46, + "learning_rate": 4.885342145034101e-05, + "loss": 2.572, + "step": 602000 + }, + { + "epoch": 0.46, + "learning_rate": 4.8852468792845776e-05, + "loss": 2.5565, + "step": 602500 + }, + { + "epoch": 0.46, + "learning_rate": 4.8851516135350534e-05, + "loss": 2.5708, + "step": 603000 + }, + { + "epoch": 0.46, + "learning_rate": 4.8850563477855285e-05, + "loss": 2.5373, + "step": 603500 + }, + { + "epoch": 0.46, + "learning_rate": 4.8849612725675034e-05, + "loss": 2.5501, + "step": 604000 + }, + { + "epoch": 0.46, + "learning_rate": 4.884866006817979e-05, + "loss": 2.5333, + "step": 604500 + }, + { + "epoch": 0.46, + "learning_rate": 4.884770741068455e-05, + "loss": 2.5603, + "step": 605000 + }, + { + "epoch": 0.46, + "learning_rate": 4.88467566585043e-05, + "loss": 2.5624, + "step": 605500 + }, + { + "epoch": 0.46, + "learning_rate": 4.884580590632405e-05, + "loss": 2.5848, + "step": 606000 + }, + { + "epoch": 0.46, + "learning_rate": 4.8844853248828805e-05, + "loss": 2.5545, + "step": 606500 + }, + { + "epoch": 0.46, + "learning_rate": 4.8843900591333563e-05, + "loss": 2.5781, + "step": 607000 + }, + { + "epoch": 0.46, + "learning_rate": 4.8842947933838315e-05, + "loss": 2.5574, + "step": 607500 + }, + { + "epoch": 0.46, + "learning_rate": 4.884199527634308e-05, + "loss": 2.5623, + "step": 608000 + }, + { + "epoch": 0.46, + "learning_rate": 4.884104261884784e-05, + "loss": 2.5391, + "step": 608500 + }, + { + "epoch": 0.46, + "learning_rate": 4.884008996135259e-05, + "loss": 2.5471, + "step": 609000 + }, + { + "epoch": 0.46, + "learning_rate": 4.8839137303857355e-05, + "loss": 2.5576, + "step": 609500 + }, + { + "epoch": 0.46, + "learning_rate": 4.8838184646362106e-05, + "loss": 2.5849, + "step": 610000 + }, + { + "epoch": 0.47, + "learning_rate": 4.8837231988866865e-05, + "loss": 2.5478, + "step": 610500 + }, + { + "epoch": 0.47, + "learning_rate": 4.883627933137162e-05, + "loss": 2.5847, + "step": 611000 + }, + { + "epoch": 0.47, + "learning_rate": 4.883532667387638e-05, + "loss": 2.5785, + "step": 611500 + }, + { + "epoch": 0.47, + "learning_rate": 4.883437592169613e-05, + "loss": 2.5349, + "step": 612000 + }, + { + "epoch": 0.47, + "learning_rate": 4.883342516951588e-05, + "loss": 2.5917, + "step": 612500 + }, + { + "epoch": 0.47, + "learning_rate": 4.8832472512020636e-05, + "loss": 2.5535, + "step": 613000 + }, + { + "epoch": 0.47, + "learning_rate": 4.8831519854525394e-05, + "loss": 2.5452, + "step": 613500 + }, + { + "epoch": 0.47, + "learning_rate": 4.8830567197030146e-05, + "loss": 2.5837, + "step": 614000 + }, + { + "epoch": 0.47, + "learning_rate": 4.8829614539534904e-05, + "loss": 2.5961, + "step": 614500 + }, + { + "epoch": 0.47, + "learning_rate": 4.882866188203967e-05, + "loss": 2.5671, + "step": 615000 + }, + { + "epoch": 0.47, + "learning_rate": 4.882770922454442e-05, + "loss": 2.5642, + "step": 615500 + }, + { + "epoch": 0.47, + "learning_rate": 4.882675656704918e-05, + "loss": 2.5708, + "step": 616000 + }, + { + "epoch": 0.47, + "learning_rate": 4.882580390955394e-05, + "loss": 2.5436, + "step": 616500 + }, + { + "epoch": 0.47, + "learning_rate": 4.8824853157373686e-05, + "loss": 2.5578, + "step": 617000 + }, + { + "epoch": 0.47, + "learning_rate": 4.8823900499878444e-05, + "loss": 2.5543, + "step": 617500 + }, + { + "epoch": 0.47, + "learning_rate": 4.8822947842383195e-05, + "loss": 2.5486, + "step": 618000 + }, + { + "epoch": 0.47, + "learning_rate": 4.882199518488796e-05, + "loss": 2.5571, + "step": 618500 + }, + { + "epoch": 0.47, + "learning_rate": 4.882104252739272e-05, + "loss": 2.5487, + "step": 619000 + }, + { + "epoch": 0.47, + "learning_rate": 4.882008986989747e-05, + "loss": 2.5652, + "step": 619500 + }, + { + "epoch": 0.47, + "learning_rate": 4.8819137212402235e-05, + "loss": 2.5337, + "step": 620000 + }, + { + "epoch": 0.47, + "learning_rate": 4.881818455490699e-05, + "loss": 2.5391, + "step": 620500 + }, + { + "epoch": 0.47, + "learning_rate": 4.8817231897411745e-05, + "loss": 2.5752, + "step": 621000 + }, + { + "epoch": 0.47, + "learning_rate": 4.8816283050546483e-05, + "loss": 2.5687, + "step": 621500 + }, + { + "epoch": 0.47, + "learning_rate": 4.881533039305125e-05, + "loss": 2.598, + "step": 622000 + }, + { + "epoch": 0.47, + "learning_rate": 4.8814377735556e-05, + "loss": 2.5839, + "step": 622500 + }, + { + "epoch": 0.47, + "learning_rate": 4.881342507806076e-05, + "loss": 2.5837, + "step": 623000 + }, + { + "epoch": 0.48, + "learning_rate": 4.8812472420565517e-05, + "loss": 2.5647, + "step": 623500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8811521668385265e-05, + "loss": 2.5452, + "step": 624000 + }, + { + "epoch": 0.48, + "learning_rate": 4.881056901089002e-05, + "loss": 2.551, + "step": 624500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8809618258709765e-05, + "loss": 2.5592, + "step": 625000 + }, + { + "epoch": 0.48, + "learning_rate": 4.880866560121452e-05, + "loss": 2.5408, + "step": 625500 + }, + { + "epoch": 0.48, + "learning_rate": 4.880771294371929e-05, + "loss": 2.5772, + "step": 626000 + }, + { + "epoch": 0.48, + "learning_rate": 4.880676028622404e-05, + "loss": 2.5597, + "step": 626500 + }, + { + "epoch": 0.48, + "learning_rate": 4.88058076287288e-05, + "loss": 2.5607, + "step": 627000 + }, + { + "epoch": 0.48, + "learning_rate": 4.8804854971233556e-05, + "loss": 2.5403, + "step": 627500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8803902313738314e-05, + "loss": 2.5646, + "step": 628000 + }, + { + "epoch": 0.48, + "learning_rate": 4.880294965624307e-05, + "loss": 2.5266, + "step": 628500 + }, + { + "epoch": 0.48, + "learning_rate": 4.880199890406282e-05, + "loss": 2.5615, + "step": 629000 + }, + { + "epoch": 0.48, + "learning_rate": 4.880104815188257e-05, + "loss": 2.5383, + "step": 629500 + }, + { + "epoch": 0.48, + "learning_rate": 4.880009549438733e-05, + "loss": 2.5656, + "step": 630000 + }, + { + "epoch": 0.48, + "eval_accuracy": 0.5297416443914567, + "eval_loss": 2.486067771911621, + "eval_runtime": 9415.1025, + "eval_samples_per_second": 29.207, + "eval_steps_per_second": 7.302, + "step": 630000 + }, + { + "epoch": 0.48, + "learning_rate": 4.879914283689208e-05, + "loss": 2.5756, + "step": 630500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8798190179396844e-05, + "loss": 2.5631, + "step": 631000 + }, + { + "epoch": 0.48, + "learning_rate": 4.87972375219016e-05, + "loss": 2.5694, + "step": 631500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8796284864406354e-05, + "loss": 2.5522, + "step": 632000 + }, + { + "epoch": 0.48, + "learning_rate": 4.879533220691111e-05, + "loss": 2.5608, + "step": 632500 + }, + { + "epoch": 0.48, + "learning_rate": 4.879437954941587e-05, + "loss": 2.5621, + "step": 633000 + }, + { + "epoch": 0.48, + "learning_rate": 4.879342689192063e-05, + "loss": 2.5046, + "step": 633500 + }, + { + "epoch": 0.48, + "learning_rate": 4.879247423442539e-05, + "loss": 2.5398, + "step": 634000 + }, + { + "epoch": 0.48, + "learning_rate": 4.8791521576930145e-05, + "loss": 2.5526, + "step": 634500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8790568919434904e-05, + "loss": 2.5357, + "step": 635000 + }, + { + "epoch": 0.48, + "learning_rate": 4.878962007256964e-05, + "loss": 2.5391, + "step": 635500 + }, + { + "epoch": 0.48, + "learning_rate": 4.8788667415074393e-05, + "loss": 2.5564, + "step": 636000 + }, + { + "epoch": 0.49, + "learning_rate": 4.878771475757916e-05, + "loss": 2.5525, + "step": 636500 + }, + { + "epoch": 0.49, + "learning_rate": 4.878676210008391e-05, + "loss": 2.5343, + "step": 637000 + }, + { + "epoch": 0.49, + "learning_rate": 4.878580944258867e-05, + "loss": 2.5426, + "step": 637500 + }, + { + "epoch": 0.49, + "learning_rate": 4.878485678509343e-05, + "loss": 2.5558, + "step": 638000 + }, + { + "epoch": 0.49, + "learning_rate": 4.8783904127598185e-05, + "loss": 2.5494, + "step": 638500 + }, + { + "epoch": 0.49, + "learning_rate": 4.878295147010294e-05, + "loss": 2.5582, + "step": 639000 + }, + { + "epoch": 0.49, + "learning_rate": 4.87819988126077e-05, + "loss": 2.565, + "step": 639500 + }, + { + "epoch": 0.49, + "learning_rate": 4.878104806042745e-05, + "loss": 2.5377, + "step": 640000 + }, + { + "epoch": 0.49, + "learning_rate": 4.878009540293221e-05, + "loss": 2.5619, + "step": 640500 + }, + { + "epoch": 0.49, + "learning_rate": 4.877914274543696e-05, + "loss": 2.5582, + "step": 641000 + }, + { + "epoch": 0.49, + "learning_rate": 4.8778190087941725e-05, + "loss": 2.541, + "step": 641500 + }, + { + "epoch": 0.49, + "learning_rate": 4.877723933576147e-05, + "loss": 2.5546, + "step": 642000 + }, + { + "epoch": 0.49, + "learning_rate": 4.8776288583581214e-05, + "loss": 2.5695, + "step": 642500 + }, + { + "epoch": 0.49, + "learning_rate": 4.877533592608597e-05, + "loss": 2.5482, + "step": 643000 + }, + { + "epoch": 0.49, + "learning_rate": 4.877438517390572e-05, + "loss": 2.5599, + "step": 643500 + }, + { + "epoch": 0.49, + "learning_rate": 4.877343251641048e-05, + "loss": 2.5551, + "step": 644000 + }, + { + "epoch": 0.49, + "learning_rate": 4.877247985891524e-05, + "loss": 2.5748, + "step": 644500 + }, + { + "epoch": 0.49, + "learning_rate": 4.8771527201419996e-05, + "loss": 2.5434, + "step": 645000 + }, + { + "epoch": 0.49, + "learning_rate": 4.8770574543924754e-05, + "loss": 2.5653, + "step": 645500 + }, + { + "epoch": 0.49, + "learning_rate": 4.876962188642951e-05, + "loss": 2.5685, + "step": 646000 + }, + { + "epoch": 0.49, + "learning_rate": 4.8768669228934264e-05, + "loss": 2.5399, + "step": 646500 + }, + { + "epoch": 0.49, + "learning_rate": 4.876771657143903e-05, + "loss": 2.5867, + "step": 647000 + }, + { + "epoch": 0.49, + "learning_rate": 4.876676391394379e-05, + "loss": 2.5336, + "step": 647500 + }, + { + "epoch": 0.49, + "learning_rate": 4.876581316176353e-05, + "loss": 2.5478, + "step": 648000 + }, + { + "epoch": 0.49, + "learning_rate": 4.876486050426829e-05, + "loss": 2.5523, + "step": 648500 + }, + { + "epoch": 0.49, + "learning_rate": 4.876390784677305e-05, + "loss": 2.5737, + "step": 649000 + }, + { + "epoch": 0.5, + "learning_rate": 4.8762957094592794e-05, + "loss": 2.5375, + "step": 649500 + }, + { + "epoch": 0.5, + "learning_rate": 4.876200443709755e-05, + "loss": 2.5383, + "step": 650000 + }, + { + "epoch": 0.5, + "learning_rate": 4.876105177960231e-05, + "loss": 2.5543, + "step": 650500 + }, + { + "epoch": 0.5, + "learning_rate": 4.876009912210707e-05, + "loss": 2.5613, + "step": 651000 + }, + { + "epoch": 0.5, + "learning_rate": 4.875914646461183e-05, + "loss": 2.567, + "step": 651500 + }, + { + "epoch": 0.5, + "learning_rate": 4.875819380711658e-05, + "loss": 2.5231, + "step": 652000 + }, + { + "epoch": 0.5, + "learning_rate": 4.875724114962134e-05, + "loss": 2.4986, + "step": 652500 + }, + { + "epoch": 0.5, + "learning_rate": 4.8756288492126095e-05, + "loss": 2.5637, + "step": 653000 + }, + { + "epoch": 0.5, + "learning_rate": 4.875533583463085e-05, + "loss": 2.5403, + "step": 653500 + }, + { + "epoch": 0.5, + "learning_rate": 4.875438317713562e-05, + "loss": 2.529, + "step": 654000 + }, + { + "epoch": 0.5, + "learning_rate": 4.875343242495536e-05, + "loss": 2.5484, + "step": 654500 + }, + { + "epoch": 0.5, + "learning_rate": 4.875247976746012e-05, + "loss": 2.5776, + "step": 655000 + }, + { + "epoch": 0.5, + "learning_rate": 4.8751527109964876e-05, + "loss": 2.5572, + "step": 655500 + }, + { + "epoch": 0.5, + "learning_rate": 4.8750574452469635e-05, + "loss": 2.5701, + "step": 656000 + }, + { + "epoch": 0.5, + "learning_rate": 4.874962179497439e-05, + "loss": 2.5078, + "step": 656500 + }, + { + "epoch": 0.5, + "learning_rate": 4.8748669137479144e-05, + "loss": 2.5622, + "step": 657000 + }, + { + "epoch": 0.5, + "learning_rate": 4.874771647998391e-05, + "loss": 2.5526, + "step": 657500 + }, + { + "epoch": 0.5, + "learning_rate": 4.874676382248866e-05, + "loss": 2.5706, + "step": 658000 + }, + { + "epoch": 0.5, + "learning_rate": 4.874581116499342e-05, + "loss": 2.5637, + "step": 658500 + }, + { + "epoch": 0.5, + "learning_rate": 4.874486041281317e-05, + "loss": 2.5495, + "step": 659000 + }, + { + "epoch": 0.5, + "learning_rate": 4.874390966063292e-05, + "loss": 2.5396, + "step": 659500 + }, + { + "epoch": 0.5, + "learning_rate": 4.8742957003137674e-05, + "loss": 2.5344, + "step": 660000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.5306867278424278, + "eval_loss": 2.4773244857788086, + "eval_runtime": 9425.3328, + "eval_samples_per_second": 29.175, + "eval_steps_per_second": 7.294, + "step": 660000 + }, + { + "epoch": 0.5, + "learning_rate": 4.874200434564243e-05, + "loss": 2.5531, + "step": 660500 + }, + { + "epoch": 0.5, + "learning_rate": 4.874105168814719e-05, + "loss": 2.5221, + "step": 661000 + }, + { + "epoch": 0.5, + "learning_rate": 4.874009903065195e-05, + "loss": 2.59, + "step": 661500 + }, + { + "epoch": 0.5, + "learning_rate": 4.873914637315671e-05, + "loss": 2.567, + "step": 662000 + }, + { + "epoch": 0.5, + "learning_rate": 4.873819562097645e-05, + "loss": 2.5654, + "step": 662500 + }, + { + "epoch": 0.51, + "learning_rate": 4.8737242963481214e-05, + "loss": 2.5486, + "step": 663000 + }, + { + "epoch": 0.51, + "learning_rate": 4.873629030598597e-05, + "loss": 2.5288, + "step": 663500 + }, + { + "epoch": 0.51, + "learning_rate": 4.8735337648490724e-05, + "loss": 2.5396, + "step": 664000 + }, + { + "epoch": 0.51, + "learning_rate": 4.873438499099549e-05, + "loss": 2.5294, + "step": 664500 + }, + { + "epoch": 0.51, + "learning_rate": 4.873343423881524e-05, + "loss": 2.5545, + "step": 665000 + }, + { + "epoch": 0.51, + "learning_rate": 4.873248158131999e-05, + "loss": 2.5236, + "step": 665500 + }, + { + "epoch": 0.51, + "learning_rate": 4.873152892382475e-05, + "loss": 2.524, + "step": 666000 + }, + { + "epoch": 0.51, + "learning_rate": 4.8730576266329505e-05, + "loss": 2.5508, + "step": 666500 + }, + { + "epoch": 0.51, + "learning_rate": 4.872962360883426e-05, + "loss": 2.548, + "step": 667000 + }, + { + "epoch": 0.51, + "learning_rate": 4.8728670951339015e-05, + "loss": 2.552, + "step": 667500 + }, + { + "epoch": 0.51, + "learning_rate": 4.872772019915876e-05, + "loss": 2.5529, + "step": 668000 + }, + { + "epoch": 0.51, + "learning_rate": 4.872676754166353e-05, + "loss": 2.4969, + "step": 668500 + }, + { + "epoch": 0.51, + "learning_rate": 4.8725816789483276e-05, + "loss": 2.5471, + "step": 669000 + }, + { + "epoch": 0.51, + "learning_rate": 4.872486413198803e-05, + "loss": 2.5424, + "step": 669500 + }, + { + "epoch": 0.51, + "learning_rate": 4.8723911474492786e-05, + "loss": 2.5373, + "step": 670000 + }, + { + "epoch": 0.51, + "learning_rate": 4.8722958816997545e-05, + "loss": 2.5443, + "step": 670500 + }, + { + "epoch": 0.51, + "learning_rate": 4.87220061595023e-05, + "loss": 2.5299, + "step": 671000 + }, + { + "epoch": 0.51, + "learning_rate": 4.872105540732205e-05, + "loss": 2.5615, + "step": 671500 + }, + { + "epoch": 0.51, + "learning_rate": 4.872010274982681e-05, + "loss": 2.5374, + "step": 672000 + }, + { + "epoch": 0.51, + "learning_rate": 4.871915009233157e-05, + "loss": 2.5591, + "step": 672500 + }, + { + "epoch": 0.51, + "learning_rate": 4.8718197434836326e-05, + "loss": 2.5736, + "step": 673000 + }, + { + "epoch": 0.51, + "learning_rate": 4.8717244777341084e-05, + "loss": 2.5431, + "step": 673500 + }, + { + "epoch": 0.51, + "learning_rate": 4.871629211984584e-05, + "loss": 2.5586, + "step": 674000 + }, + { + "epoch": 0.51, + "learning_rate": 4.8715339462350594e-05, + "loss": 2.5191, + "step": 674500 + }, + { + "epoch": 0.51, + "learning_rate": 4.871438680485535e-05, + "loss": 2.5578, + "step": 675000 + }, + { + "epoch": 0.51, + "learning_rate": 4.871343605267511e-05, + "loss": 2.5528, + "step": 675500 + }, + { + "epoch": 0.52, + "learning_rate": 4.871248339517986e-05, + "loss": 2.5669, + "step": 676000 + }, + { + "epoch": 0.52, + "learning_rate": 4.871153073768462e-05, + "loss": 2.5569, + "step": 676500 + }, + { + "epoch": 0.52, + "learning_rate": 4.8710578080189375e-05, + "loss": 2.5039, + "step": 677000 + }, + { + "epoch": 0.52, + "learning_rate": 4.8709625422694134e-05, + "loss": 2.5063, + "step": 677500 + }, + { + "epoch": 0.52, + "learning_rate": 4.870867276519889e-05, + "loss": 2.5292, + "step": 678000 + }, + { + "epoch": 0.52, + "learning_rate": 4.870772010770365e-05, + "loss": 2.5737, + "step": 678500 + }, + { + "epoch": 0.52, + "learning_rate": 4.870676745020841e-05, + "loss": 2.5232, + "step": 679000 + }, + { + "epoch": 0.52, + "learning_rate": 4.870581479271316e-05, + "loss": 2.5348, + "step": 679500 + }, + { + "epoch": 0.52, + "learning_rate": 4.870486404053291e-05, + "loss": 2.5561, + "step": 680000 + }, + { + "epoch": 0.52, + "learning_rate": 4.8703911383037673e-05, + "loss": 2.5313, + "step": 680500 + }, + { + "epoch": 0.52, + "learning_rate": 4.870296063085742e-05, + "loss": 2.573, + "step": 681000 + }, + { + "epoch": 0.52, + "learning_rate": 4.870200797336217e-05, + "loss": 2.5602, + "step": 681500 + }, + { + "epoch": 0.52, + "learning_rate": 4.870105531586693e-05, + "loss": 2.513, + "step": 682000 + }, + { + "epoch": 0.52, + "learning_rate": 4.870010265837169e-05, + "loss": 2.5733, + "step": 682500 + }, + { + "epoch": 0.52, + "learning_rate": 4.869915190619144e-05, + "loss": 2.5505, + "step": 683000 + }, + { + "epoch": 0.52, + "learning_rate": 4.8698199248696196e-05, + "loss": 2.5238, + "step": 683500 + }, + { + "epoch": 0.52, + "learning_rate": 4.869724659120095e-05, + "loss": 2.5421, + "step": 684000 + }, + { + "epoch": 0.52, + "learning_rate": 4.869629393370571e-05, + "loss": 2.5464, + "step": 684500 + }, + { + "epoch": 0.52, + "learning_rate": 4.869534318152546e-05, + "loss": 2.5229, + "step": 685000 + }, + { + "epoch": 0.52, + "learning_rate": 4.869439052403021e-05, + "loss": 2.527, + "step": 685500 + }, + { + "epoch": 0.52, + "learning_rate": 4.869343977184996e-05, + "loss": 2.5014, + "step": 686000 + }, + { + "epoch": 0.52, + "learning_rate": 4.8692487114354726e-05, + "loss": 2.5508, + "step": 686500 + }, + { + "epoch": 0.52, + "learning_rate": 4.869153445685948e-05, + "loss": 2.5354, + "step": 687000 + }, + { + "epoch": 0.52, + "learning_rate": 4.8690581799364236e-05, + "loss": 2.5611, + "step": 687500 + }, + { + "epoch": 0.52, + "learning_rate": 4.8689629141868994e-05, + "loss": 2.5014, + "step": 688000 + }, + { + "epoch": 0.52, + "learning_rate": 4.868867648437375e-05, + "loss": 2.5469, + "step": 688500 + }, + { + "epoch": 0.53, + "learning_rate": 4.86877257321935e-05, + "loss": 2.5727, + "step": 689000 + }, + { + "epoch": 0.53, + "learning_rate": 4.868677307469825e-05, + "loss": 2.5306, + "step": 689500 + }, + { + "epoch": 0.53, + "learning_rate": 4.868582041720302e-05, + "loss": 2.5202, + "step": 690000 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.532116129947965, + "eval_loss": 2.4686408042907715, + "eval_runtime": 9410.3379, + "eval_samples_per_second": 29.222, + "eval_steps_per_second": 7.305, + "step": 690000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8684867759707776e-05, + "loss": 2.5555, + "step": 690500 + }, + { + "epoch": 0.53, + "learning_rate": 4.868391700752752e-05, + "loss": 2.5307, + "step": 691000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8682964350032275e-05, + "loss": 2.5264, + "step": 691500 + }, + { + "epoch": 0.53, + "learning_rate": 4.868201169253704e-05, + "loss": 2.5466, + "step": 692000 + }, + { + "epoch": 0.53, + "learning_rate": 4.868105903504179e-05, + "loss": 2.5391, + "step": 692500 + }, + { + "epoch": 0.53, + "learning_rate": 4.868010637754655e-05, + "loss": 2.5694, + "step": 693000 + }, + { + "epoch": 0.53, + "learning_rate": 4.867915372005131e-05, + "loss": 2.5597, + "step": 693500 + }, + { + "epoch": 0.53, + "learning_rate": 4.867820296787106e-05, + "loss": 2.5412, + "step": 694000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8677250310375815e-05, + "loss": 2.5648, + "step": 694500 + }, + { + "epoch": 0.53, + "learning_rate": 4.8676297652880573e-05, + "loss": 2.5592, + "step": 695000 + }, + { + "epoch": 0.53, + "learning_rate": 4.867534499538533e-05, + "loss": 2.5439, + "step": 695500 + }, + { + "epoch": 0.53, + "learning_rate": 4.867439233789008e-05, + "loss": 2.5169, + "step": 696000 + }, + { + "epoch": 0.53, + "learning_rate": 4.867343968039484e-05, + "loss": 2.5515, + "step": 696500 + }, + { + "epoch": 0.53, + "learning_rate": 4.8672488928214597e-05, + "loss": 2.5607, + "step": 697000 + }, + { + "epoch": 0.53, + "learning_rate": 4.867153627071935e-05, + "loss": 2.5531, + "step": 697500 + }, + { + "epoch": 0.53, + "learning_rate": 4.8670583613224106e-05, + "loss": 2.5409, + "step": 698000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8669630955728865e-05, + "loss": 2.5239, + "step": 698500 + }, + { + "epoch": 0.53, + "learning_rate": 4.866867829823362e-05, + "loss": 2.5463, + "step": 699000 + }, + { + "epoch": 0.53, + "learning_rate": 4.866772754605337e-05, + "loss": 2.5302, + "step": 699500 + }, + { + "epoch": 0.53, + "learning_rate": 4.866677488855813e-05, + "loss": 2.5167, + "step": 700000 + }, + { + "epoch": 0.53, + "learning_rate": 4.866582223106289e-05, + "loss": 2.5443, + "step": 700500 + }, + { + "epoch": 0.53, + "learning_rate": 4.8664869573567646e-05, + "loss": 2.5615, + "step": 701000 + }, + { + "epoch": 0.53, + "learning_rate": 4.8663918821387394e-05, + "loss": 2.552, + "step": 701500 + }, + { + "epoch": 0.54, + "learning_rate": 4.8662968069207136e-05, + "loss": 2.5655, + "step": 702000 + }, + { + "epoch": 0.54, + "learning_rate": 4.86620154117119e-05, + "loss": 2.5419, + "step": 702500 + }, + { + "epoch": 0.54, + "learning_rate": 4.866106275421666e-05, + "loss": 2.5475, + "step": 703000 + }, + { + "epoch": 0.54, + "learning_rate": 4.866011009672141e-05, + "loss": 2.5282, + "step": 703500 + }, + { + "epoch": 0.54, + "learning_rate": 4.865915743922617e-05, + "loss": 2.5345, + "step": 704000 + }, + { + "epoch": 0.54, + "learning_rate": 4.865820478173093e-05, + "loss": 2.5373, + "step": 704500 + }, + { + "epoch": 0.54, + "learning_rate": 4.8657252124235686e-05, + "loss": 2.5229, + "step": 705000 + }, + { + "epoch": 0.54, + "learning_rate": 4.865629946674044e-05, + "loss": 2.5327, + "step": 705500 + }, + { + "epoch": 0.54, + "learning_rate": 4.86553468092452e-05, + "loss": 2.526, + "step": 706000 + }, + { + "epoch": 0.54, + "learning_rate": 4.865439605706495e-05, + "loss": 2.5441, + "step": 706500 + }, + { + "epoch": 0.54, + "learning_rate": 4.86534433995697e-05, + "loss": 2.5054, + "step": 707000 + }, + { + "epoch": 0.54, + "learning_rate": 4.865249074207446e-05, + "loss": 2.4997, + "step": 707500 + }, + { + "epoch": 0.54, + "learning_rate": 4.8651538084579225e-05, + "loss": 2.5454, + "step": 708000 + }, + { + "epoch": 0.54, + "learning_rate": 4.865058542708398e-05, + "loss": 2.5189, + "step": 708500 + }, + { + "epoch": 0.54, + "learning_rate": 4.8649632769588735e-05, + "loss": 2.5273, + "step": 709000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8648680112093493e-05, + "loss": 2.4977, + "step": 709500 + }, + { + "epoch": 0.54, + "learning_rate": 4.864772935991324e-05, + "loss": 2.5801, + "step": 710000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8646776702418e-05, + "loss": 2.5558, + "step": 710500 + }, + { + "epoch": 0.54, + "learning_rate": 4.864582404492276e-05, + "loss": 2.5305, + "step": 711000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8644871387427517e-05, + "loss": 2.4888, + "step": 711500 + }, + { + "epoch": 0.54, + "learning_rate": 4.8643920635247265e-05, + "loss": 2.534, + "step": 712000 + }, + { + "epoch": 0.54, + "learning_rate": 4.8642967977752016e-05, + "loss": 2.5377, + "step": 712500 + }, + { + "epoch": 0.54, + "learning_rate": 4.864201532025678e-05, + "loss": 2.5441, + "step": 713000 + }, + { + "epoch": 0.54, + "learning_rate": 4.864106266276153e-05, + "loss": 2.5437, + "step": 713500 + }, + { + "epoch": 0.54, + "learning_rate": 4.864011000526629e-05, + "loss": 2.5334, + "step": 714000 + }, + { + "epoch": 0.54, + "learning_rate": 4.863915734777105e-05, + "loss": 2.5425, + "step": 714500 + }, + { + "epoch": 0.54, + "learning_rate": 4.863820469027581e-05, + "loss": 2.5258, + "step": 715000 + }, + { + "epoch": 0.55, + "learning_rate": 4.8637252032780566e-05, + "loss": 2.5401, + "step": 715500 + }, + { + "epoch": 0.55, + "learning_rate": 4.8636299375285324e-05, + "loss": 2.5349, + "step": 716000 + }, + { + "epoch": 0.55, + "learning_rate": 4.863534862310507e-05, + "loss": 2.5199, + "step": 716500 + }, + { + "epoch": 0.55, + "learning_rate": 4.863439596560983e-05, + "loss": 2.5394, + "step": 717000 + }, + { + "epoch": 0.55, + "learning_rate": 4.863344330811458e-05, + "loss": 2.5368, + "step": 717500 + }, + { + "epoch": 0.55, + "learning_rate": 4.863249065061935e-05, + "loss": 2.5311, + "step": 718000 + }, + { + "epoch": 0.55, + "learning_rate": 4.86315379931241e-05, + "loss": 2.5372, + "step": 718500 + }, + { + "epoch": 0.55, + "learning_rate": 4.863058533562886e-05, + "loss": 2.5266, + "step": 719000 + }, + { + "epoch": 0.55, + "learning_rate": 4.8629634583448606e-05, + "loss": 2.5009, + "step": 719500 + }, + { + "epoch": 0.55, + "learning_rate": 4.8628681925953364e-05, + "loss": 2.5152, + "step": 720000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.5331774896893751, + "eval_loss": 2.461303949356079, + "eval_runtime": 9436.303, + "eval_samples_per_second": 29.141, + "eval_steps_per_second": 7.285, + "step": 720000 + }, + { + "epoch": 0.55, + "learning_rate": 4.862772926845812e-05, + "loss": 2.5644, + "step": 720500 + }, + { + "epoch": 0.55, + "learning_rate": 4.862677661096288e-05, + "loss": 2.5311, + "step": 721000 + }, + { + "epoch": 0.55, + "learning_rate": 4.862582395346764e-05, + "loss": 2.5312, + "step": 721500 + }, + { + "epoch": 0.55, + "learning_rate": 4.862487320128739e-05, + "loss": 2.5506, + "step": 722000 + }, + { + "epoch": 0.55, + "learning_rate": 4.8623920543792145e-05, + "loss": 2.5207, + "step": 722500 + }, + { + "epoch": 0.55, + "learning_rate": 4.86229678862969e-05, + "loss": 2.5311, + "step": 723000 + }, + { + "epoch": 0.55, + "learning_rate": 4.862201713411665e-05, + "loss": 2.5135, + "step": 723500 + }, + { + "epoch": 0.55, + "learning_rate": 4.862106447662141e-05, + "loss": 2.5328, + "step": 724000 + }, + { + "epoch": 0.55, + "learning_rate": 4.862011181912616e-05, + "loss": 2.5216, + "step": 724500 + }, + { + "epoch": 0.55, + "learning_rate": 4.861915916163092e-05, + "loss": 2.5389, + "step": 725000 + }, + { + "epoch": 0.55, + "learning_rate": 4.861820650413568e-05, + "loss": 2.5443, + "step": 725500 + }, + { + "epoch": 0.55, + "learning_rate": 4.8617253846640437e-05, + "loss": 2.5346, + "step": 726000 + }, + { + "epoch": 0.55, + "learning_rate": 4.8616301189145195e-05, + "loss": 2.4951, + "step": 726500 + }, + { + "epoch": 0.55, + "learning_rate": 4.861534853164995e-05, + "loss": 2.5471, + "step": 727000 + }, + { + "epoch": 0.55, + "learning_rate": 4.86143977794697e-05, + "loss": 2.5386, + "step": 727500 + }, + { + "epoch": 0.55, + "learning_rate": 4.861344512197445e-05, + "loss": 2.5442, + "step": 728000 + }, + { + "epoch": 0.56, + "learning_rate": 4.861249246447921e-05, + "loss": 2.5288, + "step": 728500 + }, + { + "epoch": 0.56, + "learning_rate": 4.8611539806983976e-05, + "loss": 2.5318, + "step": 729000 + }, + { + "epoch": 0.56, + "learning_rate": 4.861058905480372e-05, + "loss": 2.5224, + "step": 729500 + }, + { + "epoch": 0.56, + "learning_rate": 4.8609636397308476e-05, + "loss": 2.5255, + "step": 730000 + }, + { + "epoch": 0.56, + "learning_rate": 4.860868373981324e-05, + "loss": 2.5103, + "step": 730500 + }, + { + "epoch": 0.56, + "learning_rate": 4.860773108231799e-05, + "loss": 2.5233, + "step": 731000 + }, + { + "epoch": 0.56, + "learning_rate": 4.860677842482275e-05, + "loss": 2.5143, + "step": 731500 + }, + { + "epoch": 0.56, + "learning_rate": 4.860582576732751e-05, + "loss": 2.4888, + "step": 732000 + }, + { + "epoch": 0.56, + "learning_rate": 4.860487310983227e-05, + "loss": 2.5293, + "step": 732500 + }, + { + "epoch": 0.56, + "learning_rate": 4.8603920452337026e-05, + "loss": 2.5312, + "step": 733000 + }, + { + "epoch": 0.56, + "learning_rate": 4.860296779484178e-05, + "loss": 2.5259, + "step": 733500 + }, + { + "epoch": 0.56, + "learning_rate": 4.860201513734654e-05, + "loss": 2.533, + "step": 734000 + }, + { + "epoch": 0.56, + "learning_rate": 4.860106438516629e-05, + "loss": 2.529, + "step": 734500 + }, + { + "epoch": 0.56, + "learning_rate": 4.860011172767104e-05, + "loss": 2.5312, + "step": 735000 + }, + { + "epoch": 0.56, + "learning_rate": 4.85991590701758e-05, + "loss": 2.5207, + "step": 735500 + }, + { + "epoch": 0.56, + "learning_rate": 4.859820641268056e-05, + "loss": 2.5198, + "step": 736000 + }, + { + "epoch": 0.56, + "learning_rate": 4.859725375518532e-05, + "loss": 2.4997, + "step": 736500 + }, + { + "epoch": 0.56, + "learning_rate": 4.8596303003005065e-05, + "loss": 2.5366, + "step": 737000 + }, + { + "epoch": 0.56, + "learning_rate": 4.8595350345509824e-05, + "loss": 2.5173, + "step": 737500 + }, + { + "epoch": 0.56, + "learning_rate": 4.859439768801458e-05, + "loss": 2.5266, + "step": 738000 + }, + { + "epoch": 0.56, + "learning_rate": 4.8593445030519333e-05, + "loss": 2.5798, + "step": 738500 + }, + { + "epoch": 0.56, + "learning_rate": 4.85924923730241e-05, + "loss": 2.5424, + "step": 739000 + }, + { + "epoch": 0.56, + "learning_rate": 4.859154162084385e-05, + "loss": 2.5759, + "step": 739500 + }, + { + "epoch": 0.56, + "learning_rate": 4.85905889633486e-05, + "loss": 2.5127, + "step": 740000 + }, + { + "epoch": 0.56, + "learning_rate": 4.8589636305853357e-05, + "loss": 2.5299, + "step": 740500 + }, + { + "epoch": 0.56, + "learning_rate": 4.858868364835812e-05, + "loss": 2.5405, + "step": 741000 + }, + { + "epoch": 0.57, + "learning_rate": 4.858773099086287e-05, + "loss": 2.5254, + "step": 741500 + }, + { + "epoch": 0.57, + "learning_rate": 4.858677833336763e-05, + "loss": 2.5503, + "step": 742000 + }, + { + "epoch": 0.57, + "learning_rate": 4.858582567587239e-05, + "loss": 2.5077, + "step": 742500 + }, + { + "epoch": 0.57, + "learning_rate": 4.858487492369214e-05, + "loss": 2.5035, + "step": 743000 + }, + { + "epoch": 0.57, + "learning_rate": 4.8583922266196896e-05, + "loss": 2.5126, + "step": 743500 + }, + { + "epoch": 0.57, + "learning_rate": 4.858296960870165e-05, + "loss": 2.5186, + "step": 744000 + }, + { + "epoch": 0.57, + "learning_rate": 4.858201695120641e-05, + "loss": 2.5173, + "step": 744500 + }, + { + "epoch": 0.57, + "learning_rate": 4.8581064293711164e-05, + "loss": 2.5233, + "step": 745000 + }, + { + "epoch": 0.57, + "learning_rate": 4.858011163621592e-05, + "loss": 2.5409, + "step": 745500 + }, + { + "epoch": 0.57, + "learning_rate": 4.857915897872069e-05, + "loss": 2.5061, + "step": 746000 + }, + { + "epoch": 0.57, + "learning_rate": 4.857820822654043e-05, + "loss": 2.5157, + "step": 746500 + }, + { + "epoch": 0.57, + "learning_rate": 4.857725556904519e-05, + "loss": 2.5046, + "step": 747000 + }, + { + "epoch": 0.57, + "learning_rate": 4.8576302911549946e-05, + "loss": 2.5362, + "step": 747500 + }, + { + "epoch": 0.57, + "learning_rate": 4.8575350254054704e-05, + "loss": 2.529, + "step": 748000 + }, + { + "epoch": 0.57, + "learning_rate": 4.857439759655946e-05, + "loss": 2.5313, + "step": 748500 + }, + { + "epoch": 0.57, + "learning_rate": 4.8573444939064214e-05, + "loss": 2.5468, + "step": 749000 + }, + { + "epoch": 0.57, + "learning_rate": 4.857249228156898e-05, + "loss": 2.5012, + "step": 749500 + }, + { + "epoch": 0.57, + "learning_rate": 4.857154152938873e-05, + "loss": 2.501, + "step": 750000 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.5342951817565138, + "eval_loss": 2.4543516635894775, + "eval_runtime": 9417.671, + "eval_samples_per_second": 29.199, + "eval_steps_per_second": 7.3, + "step": 750000 + }, + { + "epoch": 0.57, + "learning_rate": 4.857058887189348e-05, + "loss": 2.4986, + "step": 750500 + }, + { + "epoch": 0.57, + "learning_rate": 4.856963621439824e-05, + "loss": 2.5288, + "step": 751000 + }, + { + "epoch": 0.57, + "learning_rate": 4.8568683556902995e-05, + "loss": 2.5196, + "step": 751500 + }, + { + "epoch": 0.57, + "learning_rate": 4.8567732804722744e-05, + "loss": 2.4986, + "step": 752000 + }, + { + "epoch": 0.57, + "learning_rate": 4.85667801472275e-05, + "loss": 2.4932, + "step": 752500 + }, + { + "epoch": 0.57, + "learning_rate": 4.856582748973226e-05, + "loss": 2.5125, + "step": 753000 + }, + { + "epoch": 0.57, + "learning_rate": 4.856487483223702e-05, + "loss": 2.5352, + "step": 753500 + }, + { + "epoch": 0.57, + "learning_rate": 4.856392217474178e-05, + "loss": 2.5109, + "step": 754000 + }, + { + "epoch": 0.58, + "learning_rate": 4.856297142256152e-05, + "loss": 2.5321, + "step": 754500 + }, + { + "epoch": 0.58, + "learning_rate": 4.856201876506628e-05, + "loss": 2.5216, + "step": 755000 + }, + { + "epoch": 0.58, + "learning_rate": 4.856106610757104e-05, + "loss": 2.5253, + "step": 755500 + }, + { + "epoch": 0.58, + "learning_rate": 4.856011345007579e-05, + "loss": 2.5162, + "step": 756000 + }, + { + "epoch": 0.58, + "learning_rate": 4.855916079258055e-05, + "loss": 2.5036, + "step": 756500 + }, + { + "epoch": 0.58, + "learning_rate": 4.855820813508531e-05, + "loss": 2.556, + "step": 757000 + }, + { + "epoch": 0.58, + "learning_rate": 4.855725738290506e-05, + "loss": 2.542, + "step": 757500 + }, + { + "epoch": 0.58, + "learning_rate": 4.8556304725409816e-05, + "loss": 2.5479, + "step": 758000 + }, + { + "epoch": 0.58, + "learning_rate": 4.8555352067914575e-05, + "loss": 2.548, + "step": 758500 + }, + { + "epoch": 0.58, + "learning_rate": 4.855440131573432e-05, + "loss": 2.5394, + "step": 759000 + }, + { + "epoch": 0.58, + "learning_rate": 4.855344865823908e-05, + "loss": 2.5406, + "step": 759500 + }, + { + "epoch": 0.58, + "learning_rate": 4.855249600074383e-05, + "loss": 2.5179, + "step": 760000 + }, + { + "epoch": 0.58, + "learning_rate": 4.85515433432486e-05, + "loss": 2.5343, + "step": 760500 + }, + { + "epoch": 0.58, + "learning_rate": 4.855059068575335e-05, + "loss": 2.4971, + "step": 761000 + }, + { + "epoch": 0.58, + "learning_rate": 4.854963802825811e-05, + "loss": 2.5338, + "step": 761500 + }, + { + "epoch": 0.58, + "learning_rate": 4.854868537076287e-05, + "loss": 2.5017, + "step": 762000 + }, + { + "epoch": 0.58, + "learning_rate": 4.8547732713267624e-05, + "loss": 2.5356, + "step": 762500 + }, + { + "epoch": 0.58, + "learning_rate": 4.854678005577238e-05, + "loss": 2.5034, + "step": 763000 + }, + { + "epoch": 0.58, + "learning_rate": 4.854582739827714e-05, + "loss": 2.5402, + "step": 763500 + }, + { + "epoch": 0.58, + "learning_rate": 4.85448747407819e-05, + "loss": 2.5489, + "step": 764000 + }, + { + "epoch": 0.58, + "learning_rate": 4.854392398860165e-05, + "loss": 2.5176, + "step": 764500 + }, + { + "epoch": 0.58, + "learning_rate": 4.85429713311064e-05, + "loss": 2.5373, + "step": 765000 + }, + { + "epoch": 0.58, + "learning_rate": 4.8542018673611164e-05, + "loss": 2.5225, + "step": 765500 + }, + { + "epoch": 0.58, + "learning_rate": 4.854106601611592e-05, + "loss": 2.5459, + "step": 766000 + }, + { + "epoch": 0.58, + "learning_rate": 4.8540113358620674e-05, + "loss": 2.5122, + "step": 766500 + }, + { + "epoch": 0.58, + "learning_rate": 4.853916070112544e-05, + "loss": 2.5329, + "step": 767000 + }, + { + "epoch": 0.58, + "learning_rate": 4.853820804363019e-05, + "loss": 2.5147, + "step": 767500 + }, + { + "epoch": 0.59, + "learning_rate": 4.853725538613495e-05, + "loss": 2.5107, + "step": 768000 + }, + { + "epoch": 0.59, + "learning_rate": 4.85363046339547e-05, + "loss": 2.5306, + "step": 768500 + }, + { + "epoch": 0.59, + "learning_rate": 4.8535351976459455e-05, + "loss": 2.5514, + "step": 769000 + }, + { + "epoch": 0.59, + "learning_rate": 4.853439931896421e-05, + "loss": 2.5209, + "step": 769500 + }, + { + "epoch": 0.59, + "learning_rate": 4.8533446661468965e-05, + "loss": 2.5557, + "step": 770000 + }, + { + "epoch": 0.59, + "learning_rate": 4.853249400397373e-05, + "loss": 2.5025, + "step": 770500 + }, + { + "epoch": 0.59, + "learning_rate": 4.853154325179348e-05, + "loss": 2.5243, + "step": 771000 + }, + { + "epoch": 0.59, + "learning_rate": 4.8530592499613226e-05, + "loss": 2.5324, + "step": 771500 + }, + { + "epoch": 0.59, + "learning_rate": 4.852963984211798e-05, + "loss": 2.5651, + "step": 772000 + }, + { + "epoch": 0.59, + "learning_rate": 4.852868718462274e-05, + "loss": 2.5142, + "step": 772500 + }, + { + "epoch": 0.59, + "learning_rate": 4.852773643244249e-05, + "loss": 2.5296, + "step": 773000 + }, + { + "epoch": 0.59, + "learning_rate": 4.852678377494724e-05, + "loss": 2.5006, + "step": 773500 + }, + { + "epoch": 0.59, + "learning_rate": 4.8525831117452e-05, + "loss": 2.5112, + "step": 774000 + }, + { + "epoch": 0.59, + "learning_rate": 4.852487845995676e-05, + "loss": 2.5575, + "step": 774500 + }, + { + "epoch": 0.59, + "learning_rate": 4.852392580246152e-05, + "loss": 2.5241, + "step": 775000 + }, + { + "epoch": 0.59, + "learning_rate": 4.8522975050281266e-05, + "loss": 2.5181, + "step": 775500 + }, + { + "epoch": 0.59, + "learning_rate": 4.852202239278602e-05, + "loss": 2.4925, + "step": 776000 + }, + { + "epoch": 0.59, + "learning_rate": 4.852106973529078e-05, + "loss": 2.5104, + "step": 776500 + }, + { + "epoch": 0.59, + "learning_rate": 4.8520117077795534e-05, + "loss": 2.5167, + "step": 777000 + }, + { + "epoch": 0.59, + "learning_rate": 4.851916442030029e-05, + "loss": 2.5447, + "step": 777500 + }, + { + "epoch": 0.59, + "learning_rate": 4.851821176280506e-05, + "loss": 2.5358, + "step": 778000 + }, + { + "epoch": 0.59, + "learning_rate": 4.851725910530981e-05, + "loss": 2.5225, + "step": 778500 + }, + { + "epoch": 0.59, + "learning_rate": 4.851630644781457e-05, + "loss": 2.5213, + "step": 779000 + }, + { + "epoch": 0.59, + "learning_rate": 4.8515353790319325e-05, + "loss": 2.5208, + "step": 779500 + }, + { + "epoch": 0.59, + "learning_rate": 4.8514403038139074e-05, + "loss": 2.5117, + "step": 780000 + }, + { + "epoch": 0.59, + "eval_accuracy": 0.5350785554939919, + "eval_loss": 2.4482388496398926, + "eval_runtime": 9410.4392, + "eval_samples_per_second": 29.222, + "eval_steps_per_second": 7.305, + "step": 780000 + }, + { + "epoch": 0.59, + "learning_rate": 4.851345228595882e-05, + "loss": 2.4966, + "step": 780500 + }, + { + "epoch": 0.6, + "learning_rate": 4.851249962846358e-05, + "loss": 2.5239, + "step": 781000 + }, + { + "epoch": 0.6, + "learning_rate": 4.851154697096834e-05, + "loss": 2.5053, + "step": 781500 + }, + { + "epoch": 0.6, + "learning_rate": 4.85105943134731e-05, + "loss": 2.5289, + "step": 782000 + }, + { + "epoch": 0.6, + "learning_rate": 4.850964165597785e-05, + "loss": 2.4993, + "step": 782500 + }, + { + "epoch": 0.6, + "learning_rate": 4.850868899848261e-05, + "loss": 2.4985, + "step": 783000 + }, + { + "epoch": 0.6, + "learning_rate": 4.850773824630236e-05, + "loss": 2.5329, + "step": 783500 + }, + { + "epoch": 0.6, + "learning_rate": 4.850678558880711e-05, + "loss": 2.4752, + "step": 784000 + }, + { + "epoch": 0.6, + "learning_rate": 4.850583293131187e-05, + "loss": 2.5238, + "step": 784500 + }, + { + "epoch": 0.6, + "learning_rate": 4.850488027381663e-05, + "loss": 2.525, + "step": 785000 + }, + { + "epoch": 0.6, + "learning_rate": 4.850392761632139e-05, + "loss": 2.5306, + "step": 785500 + }, + { + "epoch": 0.6, + "learning_rate": 4.8502974958826146e-05, + "loss": 2.5225, + "step": 786000 + }, + { + "epoch": 0.6, + "learning_rate": 4.8502022301330905e-05, + "loss": 2.5329, + "step": 786500 + }, + { + "epoch": 0.6, + "learning_rate": 4.850106964383566e-05, + "loss": 2.5193, + "step": 787000 + }, + { + "epoch": 0.6, + "learning_rate": 4.8500116986340415e-05, + "loss": 2.5076, + "step": 787500 + }, + { + "epoch": 0.6, + "learning_rate": 4.849916432884517e-05, + "loss": 2.5274, + "step": 788000 + }, + { + "epoch": 0.6, + "learning_rate": 4.849821167134994e-05, + "loss": 2.5147, + "step": 788500 + }, + { + "epoch": 0.6, + "learning_rate": 4.849725901385469e-05, + "loss": 2.5143, + "step": 789000 + }, + { + "epoch": 0.6, + "learning_rate": 4.849630826167444e-05, + "loss": 2.52, + "step": 789500 + }, + { + "epoch": 0.6, + "learning_rate": 4.8495355604179196e-05, + "loss": 2.518, + "step": 790000 + }, + { + "epoch": 0.6, + "learning_rate": 4.8494402946683954e-05, + "loss": 2.5346, + "step": 790500 + }, + { + "epoch": 0.6, + "learning_rate": 4.84934521945037e-05, + "loss": 2.509, + "step": 791000 + }, + { + "epoch": 0.6, + "learning_rate": 4.849249953700846e-05, + "loss": 2.5212, + "step": 791500 + }, + { + "epoch": 0.6, + "learning_rate": 4.849154687951322e-05, + "loss": 2.5395, + "step": 792000 + }, + { + "epoch": 0.6, + "learning_rate": 4.849059422201798e-05, + "loss": 2.5189, + "step": 792500 + }, + { + "epoch": 0.6, + "learning_rate": 4.848964156452273e-05, + "loss": 2.5435, + "step": 793000 + }, + { + "epoch": 0.6, + "learning_rate": 4.848869081234248e-05, + "loss": 2.5293, + "step": 793500 + }, + { + "epoch": 0.61, + "learning_rate": 4.848774006016223e-05, + "loss": 2.4901, + "step": 794000 + }, + { + "epoch": 0.61, + "learning_rate": 4.8486787402666984e-05, + "loss": 2.5383, + "step": 794500 + }, + { + "epoch": 0.61, + "learning_rate": 4.848583474517174e-05, + "loss": 2.5224, + "step": 795000 + }, + { + "epoch": 0.61, + "learning_rate": 4.84848820876765e-05, + "loss": 2.5309, + "step": 795500 + }, + { + "epoch": 0.61, + "learning_rate": 4.848392943018126e-05, + "loss": 2.533, + "step": 796000 + }, + { + "epoch": 0.61, + "learning_rate": 4.848297677268602e-05, + "loss": 2.4852, + "step": 796500 + }, + { + "epoch": 0.61, + "learning_rate": 4.848202411519077e-05, + "loss": 2.5018, + "step": 797000 + }, + { + "epoch": 0.61, + "learning_rate": 4.8481071457695533e-05, + "loss": 2.5226, + "step": 797500 + }, + { + "epoch": 0.61, + "learning_rate": 4.848012070551528e-05, + "loss": 2.525, + "step": 798000 + }, + { + "epoch": 0.61, + "learning_rate": 4.847916804802003e-05, + "loss": 2.4942, + "step": 798500 + }, + { + "epoch": 0.61, + "learning_rate": 4.847821729583978e-05, + "loss": 2.511, + "step": 799000 + }, + { + "epoch": 0.61, + "learning_rate": 4.8477264638344547e-05, + "loss": 2.4991, + "step": 799500 + }, + { + "epoch": 0.61, + "learning_rate": 4.84763119808493e-05, + "loss": 2.568, + "step": 800000 + }, + { + "epoch": 0.61, + "learning_rate": 4.8475359323354056e-05, + "loss": 2.5072, + "step": 800500 + }, + { + "epoch": 0.61, + "learning_rate": 4.8474406665858815e-05, + "loss": 2.5354, + "step": 801000 + }, + { + "epoch": 0.61, + "learning_rate": 4.847345400836357e-05, + "loss": 2.5272, + "step": 801500 + }, + { + "epoch": 0.61, + "learning_rate": 4.847250325618332e-05, + "loss": 2.5244, + "step": 802000 + }, + { + "epoch": 0.61, + "learning_rate": 4.847155059868808e-05, + "loss": 2.5137, + "step": 802500 + }, + { + "epoch": 0.61, + "learning_rate": 4.847059794119284e-05, + "loss": 2.5394, + "step": 803000 + }, + { + "epoch": 0.61, + "learning_rate": 4.8469645283697596e-05, + "loss": 2.4671, + "step": 803500 + }, + { + "epoch": 0.61, + "learning_rate": 4.846869453151734e-05, + "loss": 2.4784, + "step": 804000 + }, + { + "epoch": 0.61, + "learning_rate": 4.8467741874022096e-05, + "loss": 2.5371, + "step": 804500 + }, + { + "epoch": 0.61, + "learning_rate": 4.846678921652686e-05, + "loss": 2.5387, + "step": 805000 + }, + { + "epoch": 0.61, + "learning_rate": 4.846583655903161e-05, + "loss": 2.4954, + "step": 805500 + }, + { + "epoch": 0.61, + "learning_rate": 4.846488580685136e-05, + "loss": 2.5118, + "step": 806000 + }, + { + "epoch": 0.61, + "learning_rate": 4.846393314935612e-05, + "loss": 2.5114, + "step": 806500 + }, + { + "epoch": 0.62, + "learning_rate": 4.846298049186088e-05, + "loss": 2.5082, + "step": 807000 + }, + { + "epoch": 0.62, + "learning_rate": 4.8462027834365636e-05, + "loss": 2.5428, + "step": 807500 + }, + { + "epoch": 0.62, + "learning_rate": 4.8461075176870394e-05, + "loss": 2.5115, + "step": 808000 + }, + { + "epoch": 0.62, + "learning_rate": 4.846012251937515e-05, + "loss": 2.5144, + "step": 808500 + }, + { + "epoch": 0.62, + "learning_rate": 4.845916986187991e-05, + "loss": 2.4977, + "step": 809000 + }, + { + "epoch": 0.62, + "learning_rate": 4.845821720438466e-05, + "loss": 2.5214, + "step": 809500 + }, + { + "epoch": 0.62, + "learning_rate": 4.845726454688943e-05, + "loss": 2.533, + "step": 810000 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.5359159402971051, + "eval_loss": 2.441509962081909, + "eval_runtime": 9410.4578, + "eval_samples_per_second": 29.222, + "eval_steps_per_second": 7.305, + "step": 810000 + }, + { + "epoch": 0.62, + "learning_rate": 4.845631188939418e-05, + "loss": 2.5166, + "step": 810500 + }, + { + "epoch": 0.62, + "learning_rate": 4.845535923189894e-05, + "loss": 2.5196, + "step": 811000 + }, + { + "epoch": 0.62, + "learning_rate": 4.8454406574403695e-05, + "loss": 2.5294, + "step": 811500 + }, + { + "epoch": 0.62, + "learning_rate": 4.8453453916908453e-05, + "loss": 2.5061, + "step": 812000 + }, + { + "epoch": 0.62, + "learning_rate": 4.84525031647282e-05, + "loss": 2.4878, + "step": 812500 + }, + { + "epoch": 0.62, + "learning_rate": 4.845155050723296e-05, + "loss": 2.5158, + "step": 813000 + }, + { + "epoch": 0.62, + "learning_rate": 4.845059975505271e-05, + "loss": 2.4952, + "step": 813500 + }, + { + "epoch": 0.62, + "learning_rate": 4.8449647097557467e-05, + "loss": 2.5196, + "step": 814000 + }, + { + "epoch": 0.62, + "learning_rate": 4.844869444006222e-05, + "loss": 2.5308, + "step": 814500 + }, + { + "epoch": 0.62, + "learning_rate": 4.844774178256698e-05, + "loss": 2.53, + "step": 815000 + }, + { + "epoch": 0.62, + "learning_rate": 4.844679103038673e-05, + "loss": 2.4882, + "step": 815500 + }, + { + "epoch": 0.62, + "learning_rate": 4.844583837289148e-05, + "loss": 2.5313, + "step": 816000 + }, + { + "epoch": 0.62, + "learning_rate": 4.844488571539624e-05, + "loss": 2.4912, + "step": 816500 + }, + { + "epoch": 0.62, + "learning_rate": 4.8443933057901006e-05, + "loss": 2.4797, + "step": 817000 + }, + { + "epoch": 0.62, + "learning_rate": 4.844298040040576e-05, + "loss": 2.4845, + "step": 817500 + }, + { + "epoch": 0.62, + "learning_rate": 4.8442027742910516e-05, + "loss": 2.5227, + "step": 818000 + }, + { + "epoch": 0.62, + "learning_rate": 4.8441075085415274e-05, + "loss": 2.5142, + "step": 818500 + }, + { + "epoch": 0.62, + "learning_rate": 4.844012433323502e-05, + "loss": 2.5353, + "step": 819000 + }, + { + "epoch": 0.62, + "learning_rate": 4.843917167573978e-05, + "loss": 2.5149, + "step": 819500 + }, + { + "epoch": 0.62, + "learning_rate": 4.843821901824453e-05, + "loss": 2.4849, + "step": 820000 + }, + { + "epoch": 0.63, + "learning_rate": 4.84372663607493e-05, + "loss": 2.4997, + "step": 820500 + }, + { + "epoch": 0.63, + "learning_rate": 4.843631370325405e-05, + "loss": 2.5269, + "step": 821000 + }, + { + "epoch": 0.63, + "learning_rate": 4.843536104575881e-05, + "loss": 2.5204, + "step": 821500 + }, + { + "epoch": 0.63, + "learning_rate": 4.843440838826357e-05, + "loss": 2.4866, + "step": 822000 + }, + { + "epoch": 0.63, + "learning_rate": 4.8433455730768324e-05, + "loss": 2.5257, + "step": 822500 + }, + { + "epoch": 0.63, + "learning_rate": 4.843250307327308e-05, + "loss": 2.5165, + "step": 823000 + }, + { + "epoch": 0.63, + "learning_rate": 4.843155041577784e-05, + "loss": 2.5304, + "step": 823500 + }, + { + "epoch": 0.63, + "learning_rate": 4.84305977582826e-05, + "loss": 2.5032, + "step": 824000 + }, + { + "epoch": 0.63, + "learning_rate": 4.842964700610235e-05, + "loss": 2.5299, + "step": 824500 + }, + { + "epoch": 0.63, + "learning_rate": 4.84286943486071e-05, + "loss": 2.5513, + "step": 825000 + }, + { + "epoch": 0.63, + "learning_rate": 4.8427741691111864e-05, + "loss": 2.5273, + "step": 825500 + }, + { + "epoch": 0.63, + "learning_rate": 4.8426789033616615e-05, + "loss": 2.4934, + "step": 826000 + }, + { + "epoch": 0.63, + "learning_rate": 4.8425836376121373e-05, + "loss": 2.4948, + "step": 826500 + }, + { + "epoch": 0.63, + "learning_rate": 4.842488371862613e-05, + "loss": 2.4867, + "step": 827000 + }, + { + "epoch": 0.63, + "learning_rate": 4.842393296644588e-05, + "loss": 2.4848, + "step": 827500 + }, + { + "epoch": 0.63, + "learning_rate": 4.842298030895064e-05, + "loss": 2.504, + "step": 828000 + }, + { + "epoch": 0.63, + "learning_rate": 4.8422027651455397e-05, + "loss": 2.52, + "step": 828500 + }, + { + "epoch": 0.63, + "learning_rate": 4.8421074993960155e-05, + "loss": 2.4617, + "step": 829000 + }, + { + "epoch": 0.63, + "learning_rate": 4.842012233646491e-05, + "loss": 2.5031, + "step": 829500 + }, + { + "epoch": 0.63, + "learning_rate": 4.841917158428466e-05, + "loss": 2.5218, + "step": 830000 + }, + { + "epoch": 0.63, + "learning_rate": 4.84182208321044e-05, + "loss": 2.4915, + "step": 830500 + }, + { + "epoch": 0.63, + "learning_rate": 4.841726817460917e-05, + "loss": 2.5277, + "step": 831000 + }, + { + "epoch": 0.63, + "learning_rate": 4.8416317422428916e-05, + "loss": 2.5073, + "step": 831500 + }, + { + "epoch": 0.63, + "learning_rate": 4.841536476493367e-05, + "loss": 2.4946, + "step": 832000 + }, + { + "epoch": 0.63, + "learning_rate": 4.8414412107438426e-05, + "loss": 2.4955, + "step": 832500 + }, + { + "epoch": 0.63, + "learning_rate": 4.841345944994319e-05, + "loss": 2.4991, + "step": 833000 + }, + { + "epoch": 0.64, + "learning_rate": 4.841250679244794e-05, + "loss": 2.5253, + "step": 833500 + }, + { + "epoch": 0.64, + "learning_rate": 4.84115541349527e-05, + "loss": 2.5169, + "step": 834000 + }, + { + "epoch": 0.64, + "learning_rate": 4.841060147745746e-05, + "loss": 2.5177, + "step": 834500 + }, + { + "epoch": 0.64, + "learning_rate": 4.840964881996222e-05, + "loss": 2.502, + "step": 835000 + }, + { + "epoch": 0.64, + "learning_rate": 4.840869616246697e-05, + "loss": 2.5051, + "step": 835500 + }, + { + "epoch": 0.64, + "learning_rate": 4.8407743504971734e-05, + "loss": 2.5146, + "step": 836000 + }, + { + "epoch": 0.64, + "learning_rate": 4.840679084747649e-05, + "loss": 2.5162, + "step": 836500 + }, + { + "epoch": 0.64, + "learning_rate": 4.8405838189981244e-05, + "loss": 2.4826, + "step": 837000 + }, + { + "epoch": 0.64, + "learning_rate": 4.8404885532486e-05, + "loss": 2.5228, + "step": 837500 + }, + { + "epoch": 0.64, + "learning_rate": 4.840393478030576e-05, + "loss": 2.493, + "step": 838000 + }, + { + "epoch": 0.64, + "learning_rate": 4.840298212281051e-05, + "loss": 2.4975, + "step": 838500 + }, + { + "epoch": 0.64, + "learning_rate": 4.840203137063026e-05, + "loss": 2.4956, + "step": 839000 + }, + { + "epoch": 0.64, + "learning_rate": 4.8401078713135015e-05, + "loss": 2.4693, + "step": 839500 + }, + { + "epoch": 0.64, + "learning_rate": 4.8400126055639774e-05, + "loss": 2.5084, + "step": 840000 + }, + { + "epoch": 0.64, + "eval_accuracy": 0.5369741576282202, + "eval_loss": 2.4342472553253174, + "eval_runtime": 9412.8058, + "eval_samples_per_second": 29.214, + "eval_steps_per_second": 7.304, + "step": 840000 + }, + { + "epoch": 0.64, + "learning_rate": 4.839917339814453e-05, + "loss": 2.5141, + "step": 840500 + }, + { + "epoch": 0.64, + "learning_rate": 4.8398220740649283e-05, + "loss": 2.4913, + "step": 841000 + }, + { + "epoch": 0.64, + "learning_rate": 4.839726808315405e-05, + "loss": 2.4952, + "step": 841500 + }, + { + "epoch": 0.64, + "learning_rate": 4.839631542565881e-05, + "loss": 2.506, + "step": 842000 + }, + { + "epoch": 0.64, + "learning_rate": 4.839536276816356e-05, + "loss": 2.5072, + "step": 842500 + }, + { + "epoch": 0.64, + "learning_rate": 4.839441011066832e-05, + "loss": 2.4864, + "step": 843000 + }, + { + "epoch": 0.64, + "learning_rate": 4.8393457453173075e-05, + "loss": 2.5058, + "step": 843500 + }, + { + "epoch": 0.64, + "learning_rate": 4.839250479567783e-05, + "loss": 2.4961, + "step": 844000 + }, + { + "epoch": 0.64, + "learning_rate": 4.839155213818259e-05, + "loss": 2.4822, + "step": 844500 + }, + { + "epoch": 0.64, + "learning_rate": 4.839059948068735e-05, + "loss": 2.5, + "step": 845000 + }, + { + "epoch": 0.64, + "learning_rate": 4.838965063382209e-05, + "loss": 2.5315, + "step": 845500 + }, + { + "epoch": 0.64, + "learning_rate": 4.8388697976326846e-05, + "loss": 2.4885, + "step": 846000 + }, + { + "epoch": 0.65, + "learning_rate": 4.83877453188316e-05, + "loss": 2.5117, + "step": 846500 + }, + { + "epoch": 0.65, + "learning_rate": 4.838679266133636e-05, + "loss": 2.4866, + "step": 847000 + }, + { + "epoch": 0.65, + "learning_rate": 4.8385840003841114e-05, + "loss": 2.506, + "step": 847500 + }, + { + "epoch": 0.65, + "learning_rate": 4.838488734634587e-05, + "loss": 2.5338, + "step": 848000 + }, + { + "epoch": 0.65, + "learning_rate": 4.838393468885064e-05, + "loss": 2.5288, + "step": 848500 + }, + { + "epoch": 0.65, + "learning_rate": 4.838298203135539e-05, + "loss": 2.4938, + "step": 849000 + }, + { + "epoch": 0.65, + "learning_rate": 4.838203127917514e-05, + "loss": 2.5137, + "step": 849500 + }, + { + "epoch": 0.65, + "learning_rate": 4.8381078621679896e-05, + "loss": 2.4738, + "step": 850000 + }, + { + "epoch": 0.65, + "learning_rate": 4.8380125964184654e-05, + "loss": 2.4973, + "step": 850500 + }, + { + "epoch": 0.65, + "learning_rate": 4.837917330668941e-05, + "loss": 2.5173, + "step": 851000 + }, + { + "epoch": 0.65, + "learning_rate": 4.8378220649194164e-05, + "loss": 2.4681, + "step": 851500 + }, + { + "epoch": 0.65, + "learning_rate": 4.837726799169893e-05, + "loss": 2.5334, + "step": 852000 + }, + { + "epoch": 0.65, + "learning_rate": 4.837631723951868e-05, + "loss": 2.5, + "step": 852500 + }, + { + "epoch": 0.65, + "learning_rate": 4.837536458202343e-05, + "loss": 2.526, + "step": 853000 + }, + { + "epoch": 0.65, + "learning_rate": 4.837441192452819e-05, + "loss": 2.5256, + "step": 853500 + }, + { + "epoch": 0.65, + "learning_rate": 4.8373459267032945e-05, + "loss": 2.487, + "step": 854000 + }, + { + "epoch": 0.65, + "learning_rate": 4.8372506609537704e-05, + "loss": 2.4844, + "step": 854500 + }, + { + "epoch": 0.65, + "learning_rate": 4.837155585735745e-05, + "loss": 2.5446, + "step": 855000 + }, + { + "epoch": 0.65, + "learning_rate": 4.83706051051772e-05, + "loss": 2.4951, + "step": 855500 + }, + { + "epoch": 0.65, + "learning_rate": 4.836965244768196e-05, + "loss": 2.511, + "step": 856000 + }, + { + "epoch": 0.65, + "learning_rate": 4.836869979018672e-05, + "loss": 2.4738, + "step": 856500 + }, + { + "epoch": 0.65, + "learning_rate": 4.836774713269147e-05, + "loss": 2.4937, + "step": 857000 + }, + { + "epoch": 0.65, + "learning_rate": 4.836679447519623e-05, + "loss": 2.52, + "step": 857500 + }, + { + "epoch": 0.65, + "learning_rate": 4.836584181770099e-05, + "loss": 2.483, + "step": 858000 + }, + { + "epoch": 0.65, + "learning_rate": 4.836488916020574e-05, + "loss": 2.5091, + "step": 858500 + }, + { + "epoch": 0.65, + "learning_rate": 4.836393650271051e-05, + "loss": 2.4984, + "step": 859000 + }, + { + "epoch": 0.66, + "learning_rate": 4.836298384521526e-05, + "loss": 2.5226, + "step": 859500 + }, + { + "epoch": 0.66, + "learning_rate": 4.836203309303501e-05, + "loss": 2.4624, + "step": 860000 + }, + { + "epoch": 0.66, + "learning_rate": 4.8361082340854756e-05, + "loss": 2.4854, + "step": 860500 + }, + { + "epoch": 0.66, + "learning_rate": 4.8360129683359515e-05, + "loss": 2.5103, + "step": 861000 + }, + { + "epoch": 0.66, + "learning_rate": 4.835917702586427e-05, + "loss": 2.5037, + "step": 861500 + }, + { + "epoch": 0.66, + "learning_rate": 4.835822436836903e-05, + "loss": 2.4906, + "step": 862000 + }, + { + "epoch": 0.66, + "learning_rate": 4.835727171087379e-05, + "loss": 2.5154, + "step": 862500 + }, + { + "epoch": 0.66, + "learning_rate": 4.835632095869354e-05, + "loss": 2.4859, + "step": 863000 + }, + { + "epoch": 0.66, + "learning_rate": 4.8355368301198296e-05, + "loss": 2.5066, + "step": 863500 + }, + { + "epoch": 0.66, + "learning_rate": 4.835441564370305e-05, + "loss": 2.4989, + "step": 864000 + }, + { + "epoch": 0.66, + "learning_rate": 4.835346298620781e-05, + "loss": 2.4893, + "step": 864500 + }, + { + "epoch": 0.66, + "learning_rate": 4.8352510328712564e-05, + "loss": 2.4604, + "step": 865000 + }, + { + "epoch": 0.66, + "learning_rate": 4.835155767121732e-05, + "loss": 2.4778, + "step": 865500 + }, + { + "epoch": 0.66, + "learning_rate": 4.835060501372208e-05, + "loss": 2.4728, + "step": 866000 + }, + { + "epoch": 0.66, + "learning_rate": 4.834965235622684e-05, + "loss": 2.5163, + "step": 866500 + }, + { + "epoch": 0.66, + "learning_rate": 4.83486996987316e-05, + "loss": 2.4842, + "step": 867000 + }, + { + "epoch": 0.66, + "learning_rate": 4.834774704123635e-05, + "loss": 2.4792, + "step": 867500 + }, + { + "epoch": 0.66, + "learning_rate": 4.8346794383741114e-05, + "loss": 2.5151, + "step": 868000 + }, + { + "epoch": 0.66, + "learning_rate": 4.8345841726245865e-05, + "loss": 2.5082, + "step": 868500 + }, + { + "epoch": 0.66, + "learning_rate": 4.8344889068750624e-05, + "loss": 2.4805, + "step": 869000 + }, + { + "epoch": 0.66, + "learning_rate": 4.834393831657037e-05, + "loss": 2.4789, + "step": 869500 + }, + { + "epoch": 0.66, + "learning_rate": 4.834298565907513e-05, + "loss": 2.494, + "step": 870000 + }, + { + "epoch": 0.66, + "eval_accuracy": 0.5376838909000078, + "eval_loss": 2.4287471771240234, + "eval_runtime": 9411.4474, + "eval_samples_per_second": 29.218, + "eval_steps_per_second": 7.305, + "step": 870000 + }, + { + "epoch": 0.66, + "learning_rate": 4.834203300157989e-05, + "loss": 2.5188, + "step": 870500 + }, + { + "epoch": 0.66, + "learning_rate": 4.834108224939964e-05, + "loss": 2.4801, + "step": 871000 + }, + { + "epoch": 0.66, + "learning_rate": 4.8340129591904395e-05, + "loss": 2.4803, + "step": 871500 + }, + { + "epoch": 0.66, + "learning_rate": 4.833917693440915e-05, + "loss": 2.4882, + "step": 872000 + }, + { + "epoch": 0.66, + "learning_rate": 4.833822427691391e-05, + "loss": 2.4573, + "step": 872500 + }, + { + "epoch": 0.67, + "learning_rate": 4.833727352473365e-05, + "loss": 2.4912, + "step": 873000 + }, + { + "epoch": 0.67, + "learning_rate": 4.833632086723842e-05, + "loss": 2.4955, + "step": 873500 + }, + { + "epoch": 0.67, + "learning_rate": 4.8335368209743176e-05, + "loss": 2.5414, + "step": 874000 + }, + { + "epoch": 0.67, + "learning_rate": 4.833441555224793e-05, + "loss": 2.4965, + "step": 874500 + }, + { + "epoch": 0.67, + "learning_rate": 4.8333464800067676e-05, + "loss": 2.4903, + "step": 875000 + }, + { + "epoch": 0.67, + "learning_rate": 4.833251214257244e-05, + "loss": 2.4883, + "step": 875500 + }, + { + "epoch": 0.67, + "learning_rate": 4.833155948507719e-05, + "loss": 2.4873, + "step": 876000 + }, + { + "epoch": 0.67, + "learning_rate": 4.833060873289694e-05, + "loss": 2.4918, + "step": 876500 + }, + { + "epoch": 0.67, + "learning_rate": 4.83296560754017e-05, + "loss": 2.4747, + "step": 877000 + }, + { + "epoch": 0.67, + "learning_rate": 4.832870341790646e-05, + "loss": 2.4503, + "step": 877500 + }, + { + "epoch": 0.67, + "learning_rate": 4.8327750760411216e-05, + "loss": 2.4926, + "step": 878000 + }, + { + "epoch": 0.67, + "learning_rate": 4.8326800008230964e-05, + "loss": 2.4869, + "step": 878500 + }, + { + "epoch": 0.67, + "learning_rate": 4.832584735073572e-05, + "loss": 2.4935, + "step": 879000 + }, + { + "epoch": 0.67, + "learning_rate": 4.832489469324048e-05, + "loss": 2.4595, + "step": 879500 + }, + { + "epoch": 0.67, + "learning_rate": 4.832394203574523e-05, + "loss": 2.5026, + "step": 880000 + }, + { + "epoch": 0.67, + "learning_rate": 4.832298937825e-05, + "loss": 2.4938, + "step": 880500 + }, + { + "epoch": 0.67, + "learning_rate": 4.832203672075475e-05, + "loss": 2.5015, + "step": 881000 + }, + { + "epoch": 0.67, + "learning_rate": 4.83210859685745e-05, + "loss": 2.5084, + "step": 881500 + }, + { + "epoch": 0.67, + "learning_rate": 4.8320133311079255e-05, + "loss": 2.5152, + "step": 882000 + }, + { + "epoch": 0.67, + "learning_rate": 4.8319180653584014e-05, + "loss": 2.477, + "step": 882500 + }, + { + "epoch": 0.67, + "learning_rate": 4.831822799608877e-05, + "loss": 2.4759, + "step": 883000 + }, + { + "epoch": 0.67, + "learning_rate": 4.831727533859353e-05, + "loss": 2.5024, + "step": 883500 + }, + { + "epoch": 0.67, + "learning_rate": 4.831632268109829e-05, + "loss": 2.508, + "step": 884000 + }, + { + "epoch": 0.67, + "learning_rate": 4.831537002360305e-05, + "loss": 2.4888, + "step": 884500 + }, + { + "epoch": 0.67, + "learning_rate": 4.83144173661078e-05, + "loss": 2.4926, + "step": 885000 + }, + { + "epoch": 0.67, + "learning_rate": 4.8313464708612564e-05, + "loss": 2.4664, + "step": 885500 + }, + { + "epoch": 0.68, + "learning_rate": 4.8312512051117315e-05, + "loss": 2.4748, + "step": 886000 + }, + { + "epoch": 0.68, + "learning_rate": 4.831155939362207e-05, + "loss": 2.5276, + "step": 886500 + }, + { + "epoch": 0.68, + "learning_rate": 4.831060673612683e-05, + "loss": 2.4911, + "step": 887000 + }, + { + "epoch": 0.68, + "learning_rate": 4.830965407863159e-05, + "loss": 2.5154, + "step": 887500 + }, + { + "epoch": 0.68, + "learning_rate": 4.830870142113635e-05, + "loss": 2.4843, + "step": 888000 + }, + { + "epoch": 0.68, + "learning_rate": 4.8307750668956096e-05, + "loss": 2.5503, + "step": 888500 + }, + { + "epoch": 0.68, + "learning_rate": 4.8306798011460855e-05, + "loss": 2.4833, + "step": 889000 + }, + { + "epoch": 0.68, + "learning_rate": 4.83058472592806e-05, + "loss": 2.4943, + "step": 889500 + }, + { + "epoch": 0.68, + "learning_rate": 4.830489460178536e-05, + "loss": 2.5046, + "step": 890000 + }, + { + "epoch": 0.68, + "learning_rate": 4.830394194429011e-05, + "loss": 2.5074, + "step": 890500 + }, + { + "epoch": 0.68, + "learning_rate": 4.830298928679488e-05, + "loss": 2.4619, + "step": 891000 + }, + { + "epoch": 0.68, + "learning_rate": 4.830203662929963e-05, + "loss": 2.4918, + "step": 891500 + }, + { + "epoch": 0.68, + "learning_rate": 4.830108397180439e-05, + "loss": 2.491, + "step": 892000 + }, + { + "epoch": 0.68, + "learning_rate": 4.8300131314309146e-05, + "loss": 2.4875, + "step": 892500 + }, + { + "epoch": 0.68, + "learning_rate": 4.8299178656813904e-05, + "loss": 2.4513, + "step": 893000 + }, + { + "epoch": 0.68, + "learning_rate": 4.829822599931866e-05, + "loss": 2.4885, + "step": 893500 + }, + { + "epoch": 0.68, + "learning_rate": 4.829727334182342e-05, + "loss": 2.5036, + "step": 894000 + }, + { + "epoch": 0.68, + "learning_rate": 4.829632068432818e-05, + "loss": 2.5035, + "step": 894500 + }, + { + "epoch": 0.68, + "learning_rate": 4.829536802683293e-05, + "loss": 2.5075, + "step": 895000 + }, + { + "epoch": 0.68, + "learning_rate": 4.829441727465268e-05, + "loss": 2.4598, + "step": 895500 + }, + { + "epoch": 0.68, + "learning_rate": 4.8293464617157444e-05, + "loss": 2.4967, + "step": 896000 + }, + { + "epoch": 0.68, + "learning_rate": 4.8292511959662196e-05, + "loss": 2.5064, + "step": 896500 + }, + { + "epoch": 0.68, + "learning_rate": 4.8291559302166954e-05, + "loss": 2.525, + "step": 897000 + }, + { + "epoch": 0.68, + "learning_rate": 4.829060664467171e-05, + "loss": 2.4545, + "step": 897500 + }, + { + "epoch": 0.68, + "learning_rate": 4.828965398717647e-05, + "loss": 2.516, + "step": 898000 + }, + { + "epoch": 0.68, + "learning_rate": 4.828870323499622e-05, + "loss": 2.4708, + "step": 898500 + }, + { + "epoch": 0.69, + "learning_rate": 4.828775057750098e-05, + "loss": 2.5151, + "step": 899000 + }, + { + "epoch": 0.69, + "learning_rate": 4.8286797920005735e-05, + "loss": 2.4571, + "step": 899500 + }, + { + "epoch": 0.69, + "learning_rate": 4.8285845262510494e-05, + "loss": 2.5175, + "step": 900000 + }, + { + "epoch": 0.69, + "eval_accuracy": 0.5388566249139872, + "eval_loss": 2.4221484661102295, + "eval_runtime": 9409.0191, + "eval_samples_per_second": 29.226, + "eval_steps_per_second": 7.307, + "step": 900000 + }, + { + "epoch": 0.69, + "learning_rate": 4.8284894510330235e-05, + "loss": 2.4905, + "step": 900500 + }, + { + "epoch": 0.69, + "learning_rate": 4.828394185283499e-05, + "loss": 2.4983, + "step": 901000 + }, + { + "epoch": 0.69, + "learning_rate": 4.828298919533976e-05, + "loss": 2.4935, + "step": 901500 + }, + { + "epoch": 0.69, + "learning_rate": 4.828203653784451e-05, + "loss": 2.4928, + "step": 902000 + }, + { + "epoch": 0.69, + "learning_rate": 4.828108388034927e-05, + "loss": 2.4976, + "step": 902500 + }, + { + "epoch": 0.69, + "learning_rate": 4.8280133128169016e-05, + "loss": 2.4794, + "step": 903000 + }, + { + "epoch": 0.69, + "learning_rate": 4.8279180470673775e-05, + "loss": 2.4856, + "step": 903500 + }, + { + "epoch": 0.69, + "learning_rate": 4.827822781317853e-05, + "loss": 2.4948, + "step": 904000 + }, + { + "epoch": 0.69, + "learning_rate": 4.827727515568329e-05, + "loss": 2.511, + "step": 904500 + }, + { + "epoch": 0.69, + "learning_rate": 4.827632249818805e-05, + "loss": 2.4867, + "step": 905000 + }, + { + "epoch": 0.69, + "learning_rate": 4.827536984069281e-05, + "loss": 2.5173, + "step": 905500 + }, + { + "epoch": 0.69, + "learning_rate": 4.827441718319756e-05, + "loss": 2.4996, + "step": 906000 + }, + { + "epoch": 0.69, + "learning_rate": 4.8273464525702324e-05, + "loss": 2.4811, + "step": 906500 + }, + { + "epoch": 0.69, + "learning_rate": 4.827251377352207e-05, + "loss": 2.5069, + "step": 907000 + }, + { + "epoch": 0.69, + "learning_rate": 4.8271561116026824e-05, + "loss": 2.4965, + "step": 907500 + }, + { + "epoch": 0.69, + "learning_rate": 4.827060845853158e-05, + "loss": 2.4813, + "step": 908000 + }, + { + "epoch": 0.69, + "learning_rate": 4.826965580103634e-05, + "loss": 2.4841, + "step": 908500 + }, + { + "epoch": 0.69, + "learning_rate": 4.826870504885609e-05, + "loss": 2.4887, + "step": 909000 + }, + { + "epoch": 0.69, + "learning_rate": 4.826775239136085e-05, + "loss": 2.5073, + "step": 909500 + }, + { + "epoch": 0.69, + "learning_rate": 4.8266799733865606e-05, + "loss": 2.4886, + "step": 910000 + }, + { + "epoch": 0.69, + "learning_rate": 4.8265847076370364e-05, + "loss": 2.5093, + "step": 910500 + }, + { + "epoch": 0.69, + "learning_rate": 4.8264894418875115e-05, + "loss": 2.4743, + "step": 911000 + }, + { + "epoch": 0.69, + "learning_rate": 4.8263943666694864e-05, + "loss": 2.4868, + "step": 911500 + }, + { + "epoch": 0.7, + "learning_rate": 4.826299100919963e-05, + "loss": 2.5044, + "step": 912000 + }, + { + "epoch": 0.7, + "learning_rate": 4.826203835170438e-05, + "loss": 2.4792, + "step": 912500 + }, + { + "epoch": 0.7, + "learning_rate": 4.826108569420914e-05, + "loss": 2.5448, + "step": 913000 + }, + { + "epoch": 0.7, + "learning_rate": 4.8260133036713904e-05, + "loss": 2.5065, + "step": 913500 + }, + { + "epoch": 0.7, + "learning_rate": 4.8259180379218655e-05, + "loss": 2.4822, + "step": 914000 + }, + { + "epoch": 0.7, + "learning_rate": 4.8258227721723414e-05, + "loss": 2.4932, + "step": 914500 + }, + { + "epoch": 0.7, + "learning_rate": 4.825727506422817e-05, + "loss": 2.4814, + "step": 915000 + }, + { + "epoch": 0.7, + "learning_rate": 4.825632240673293e-05, + "loss": 2.4913, + "step": 915500 + }, + { + "epoch": 0.7, + "learning_rate": 4.825536974923768e-05, + "loss": 2.4926, + "step": 916000 + }, + { + "epoch": 0.7, + "learning_rate": 4.825441709174244e-05, + "loss": 2.5007, + "step": 916500 + }, + { + "epoch": 0.7, + "learning_rate": 4.8253466339562195e-05, + "loss": 2.4949, + "step": 917000 + }, + { + "epoch": 0.7, + "learning_rate": 4.8252513682066946e-05, + "loss": 2.4632, + "step": 917500 + }, + { + "epoch": 0.7, + "learning_rate": 4.8251561024571705e-05, + "loss": 2.4737, + "step": 918000 + }, + { + "epoch": 0.7, + "learning_rate": 4.825060836707646e-05, + "loss": 2.5082, + "step": 918500 + }, + { + "epoch": 0.7, + "learning_rate": 4.824965570958122e-05, + "loss": 2.5111, + "step": 919000 + }, + { + "epoch": 0.7, + "learning_rate": 4.824870305208598e-05, + "loss": 2.4833, + "step": 919500 + }, + { + "epoch": 0.7, + "learning_rate": 4.824775039459074e-05, + "loss": 2.4857, + "step": 920000 + }, + { + "epoch": 0.7, + "learning_rate": 4.8246797737095496e-05, + "loss": 2.4953, + "step": 920500 + }, + { + "epoch": 0.7, + "learning_rate": 4.8245846984915244e-05, + "loss": 2.4999, + "step": 921000 + }, + { + "epoch": 0.7, + "learning_rate": 4.8244894327419996e-05, + "loss": 2.4709, + "step": 921500 + }, + { + "epoch": 0.7, + "learning_rate": 4.824394166992476e-05, + "loss": 2.4988, + "step": 922000 + }, + { + "epoch": 0.7, + "learning_rate": 4.824298901242951e-05, + "loss": 2.5042, + "step": 922500 + }, + { + "epoch": 0.7, + "learning_rate": 4.824203826024926e-05, + "loss": 2.508, + "step": 923000 + }, + { + "epoch": 0.7, + "learning_rate": 4.824108560275402e-05, + "loss": 2.5063, + "step": 923500 + }, + { + "epoch": 0.7, + "learning_rate": 4.824013294525878e-05, + "loss": 2.5009, + "step": 924000 + }, + { + "epoch": 0.7, + "learning_rate": 4.8239180287763536e-05, + "loss": 2.4955, + "step": 924500 + }, + { + "epoch": 0.7, + "learning_rate": 4.8238227630268294e-05, + "loss": 2.4691, + "step": 925000 + }, + { + "epoch": 0.71, + "learning_rate": 4.823727497277305e-05, + "loss": 2.5176, + "step": 925500 + }, + { + "epoch": 0.71, + "learning_rate": 4.823632231527781e-05, + "loss": 2.5087, + "step": 926000 + }, + { + "epoch": 0.71, + "learning_rate": 4.823536965778256e-05, + "loss": 2.4921, + "step": 926500 + }, + { + "epoch": 0.71, + "learning_rate": 4.823441890560231e-05, + "loss": 2.5024, + "step": 927000 + }, + { + "epoch": 0.71, + "learning_rate": 4.8233466248107075e-05, + "loss": 2.5081, + "step": 927500 + }, + { + "epoch": 0.71, + "learning_rate": 4.823251359061183e-05, + "loss": 2.4908, + "step": 928000 + }, + { + "epoch": 0.71, + "learning_rate": 4.8231560933116585e-05, + "loss": 2.5146, + "step": 928500 + }, + { + "epoch": 0.71, + "learning_rate": 4.823060827562135e-05, + "loss": 2.4671, + "step": 929000 + }, + { + "epoch": 0.71, + "learning_rate": 4.82296556181261e-05, + "loss": 2.49, + "step": 929500 + }, + { + "epoch": 0.71, + "learning_rate": 4.822870296063086e-05, + "loss": 2.5036, + "step": 930000 + }, + { + "epoch": 0.71, + "eval_accuracy": 0.539393191471851, + "eval_loss": 2.4172141551971436, + "eval_runtime": 9413.1379, + "eval_samples_per_second": 29.213, + "eval_steps_per_second": 7.303, + "step": 930000 + }, + { + "epoch": 0.71, + "learning_rate": 4.822775030313562e-05, + "loss": 2.4828, + "step": 930500 + }, + { + "epoch": 0.71, + "learning_rate": 4.822679955095537e-05, + "loss": 2.5049, + "step": 931000 + }, + { + "epoch": 0.71, + "learning_rate": 4.8225846893460125e-05, + "loss": 2.4641, + "step": 931500 + }, + { + "epoch": 0.71, + "learning_rate": 4.8224894235964876e-05, + "loss": 2.4949, + "step": 932000 + }, + { + "epoch": 0.71, + "learning_rate": 4.822394348378463e-05, + "loss": 2.4619, + "step": 932500 + }, + { + "epoch": 0.71, + "learning_rate": 4.822299082628939e-05, + "loss": 2.4722, + "step": 933000 + }, + { + "epoch": 0.71, + "learning_rate": 4.822203816879414e-05, + "loss": 2.4838, + "step": 933500 + }, + { + "epoch": 0.71, + "learning_rate": 4.82210855112989e-05, + "loss": 2.4456, + "step": 934000 + }, + { + "epoch": 0.71, + "learning_rate": 4.822013285380366e-05, + "loss": 2.4731, + "step": 934500 + }, + { + "epoch": 0.71, + "learning_rate": 4.8219182101623406e-05, + "loss": 2.5111, + "step": 935000 + }, + { + "epoch": 0.71, + "learning_rate": 4.8218229444128164e-05, + "loss": 2.5076, + "step": 935500 + }, + { + "epoch": 0.71, + "learning_rate": 4.821727678663292e-05, + "loss": 2.529, + "step": 936000 + }, + { + "epoch": 0.71, + "learning_rate": 4.821632603445267e-05, + "loss": 2.4612, + "step": 936500 + }, + { + "epoch": 0.71, + "learning_rate": 4.821537337695743e-05, + "loss": 2.4864, + "step": 937000 + }, + { + "epoch": 0.71, + "learning_rate": 4.821442071946218e-05, + "loss": 2.5097, + "step": 937500 + }, + { + "epoch": 0.71, + "learning_rate": 4.8213468061966946e-05, + "loss": 2.4934, + "step": 938000 + }, + { + "epoch": 0.72, + "learning_rate": 4.8212515404471704e-05, + "loss": 2.4869, + "step": 938500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8211562746976456e-05, + "loss": 2.4571, + "step": 939000 + }, + { + "epoch": 0.72, + "learning_rate": 4.821061008948122e-05, + "loss": 2.4786, + "step": 939500 + }, + { + "epoch": 0.72, + "learning_rate": 4.820965743198597e-05, + "loss": 2.4997, + "step": 940000 + }, + { + "epoch": 0.72, + "learning_rate": 4.820870667980572e-05, + "loss": 2.4797, + "step": 940500 + }, + { + "epoch": 0.72, + "learning_rate": 4.820775402231048e-05, + "loss": 2.4914, + "step": 941000 + }, + { + "epoch": 0.72, + "learning_rate": 4.820680136481524e-05, + "loss": 2.4935, + "step": 941500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8205848707319995e-05, + "loss": 2.4896, + "step": 942000 + }, + { + "epoch": 0.72, + "learning_rate": 4.820489604982475e-05, + "loss": 2.4567, + "step": 942500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8203945297644495e-05, + "loss": 2.514, + "step": 943000 + }, + { + "epoch": 0.72, + "learning_rate": 4.820299264014926e-05, + "loss": 2.4889, + "step": 943500 + }, + { + "epoch": 0.72, + "learning_rate": 4.820203998265401e-05, + "loss": 2.4947, + "step": 944000 + }, + { + "epoch": 0.72, + "learning_rate": 4.820108732515877e-05, + "loss": 2.4895, + "step": 944500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8200134667663535e-05, + "loss": 2.4772, + "step": 945000 + }, + { + "epoch": 0.72, + "learning_rate": 4.819918201016829e-05, + "loss": 2.5238, + "step": 945500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8198229352673045e-05, + "loss": 2.4753, + "step": 946000 + }, + { + "epoch": 0.72, + "learning_rate": 4.81972766951778e-05, + "loss": 2.4789, + "step": 946500 + }, + { + "epoch": 0.72, + "learning_rate": 4.819632594299755e-05, + "loss": 2.4676, + "step": 947000 + }, + { + "epoch": 0.72, + "learning_rate": 4.819537328550231e-05, + "loss": 2.4754, + "step": 947500 + }, + { + "epoch": 0.72, + "learning_rate": 4.819442062800706e-05, + "loss": 2.5048, + "step": 948000 + }, + { + "epoch": 0.72, + "learning_rate": 4.8193467970511826e-05, + "loss": 2.471, + "step": 948500 + }, + { + "epoch": 0.72, + "learning_rate": 4.819251531301658e-05, + "loss": 2.4683, + "step": 949000 + }, + { + "epoch": 0.72, + "learning_rate": 4.8191564560836326e-05, + "loss": 2.4646, + "step": 949500 + }, + { + "epoch": 0.72, + "learning_rate": 4.8190611903341084e-05, + "loss": 2.4823, + "step": 950000 + }, + { + "epoch": 0.72, + "learning_rate": 4.818965924584584e-05, + "loss": 2.4908, + "step": 950500 + }, + { + "epoch": 0.72, + "learning_rate": 4.81887065883506e-05, + "loss": 2.4582, + "step": 951000 + }, + { + "epoch": 0.73, + "learning_rate": 4.818775583617035e-05, + "loss": 2.5086, + "step": 951500 + }, + { + "epoch": 0.73, + "learning_rate": 4.818680317867511e-05, + "loss": 2.4951, + "step": 952000 + }, + { + "epoch": 0.73, + "learning_rate": 4.8185852426494856e-05, + "loss": 2.4763, + "step": 952500 + }, + { + "epoch": 0.73, + "learning_rate": 4.8184899768999614e-05, + "loss": 2.4668, + "step": 953000 + }, + { + "epoch": 0.73, + "learning_rate": 4.8183947111504366e-05, + "loss": 2.5116, + "step": 953500 + }, + { + "epoch": 0.73, + "learning_rate": 4.818299445400913e-05, + "loss": 2.4724, + "step": 954000 + }, + { + "epoch": 0.73, + "learning_rate": 4.818204179651389e-05, + "loss": 2.469, + "step": 954500 + }, + { + "epoch": 0.73, + "learning_rate": 4.818108913901864e-05, + "loss": 2.4679, + "step": 955000 + }, + { + "epoch": 0.73, + "learning_rate": 4.818013838683839e-05, + "loss": 2.4678, + "step": 955500 + }, + { + "epoch": 0.73, + "learning_rate": 4.8179185729343154e-05, + "loss": 2.4845, + "step": 956000 + }, + { + "epoch": 0.73, + "learning_rate": 4.8178233071847905e-05, + "loss": 2.4754, + "step": 956500 + }, + { + "epoch": 0.73, + "learning_rate": 4.8177280414352664e-05, + "loss": 2.4891, + "step": 957000 + }, + { + "epoch": 0.73, + "learning_rate": 4.817632775685742e-05, + "loss": 2.4855, + "step": 957500 + }, + { + "epoch": 0.73, + "learning_rate": 4.817537700467717e-05, + "loss": 2.4583, + "step": 958000 + }, + { + "epoch": 0.73, + "learning_rate": 4.817442434718193e-05, + "loss": 2.4874, + "step": 958500 + }, + { + "epoch": 0.73, + "learning_rate": 4.817347168968668e-05, + "loss": 2.5114, + "step": 959000 + }, + { + "epoch": 0.73, + "learning_rate": 4.8172519032191445e-05, + "loss": 2.4895, + "step": 959500 + }, + { + "epoch": 0.73, + "learning_rate": 4.817156828001119e-05, + "loss": 2.4737, + "step": 960000 + }, + { + "epoch": 0.73, + "eval_accuracy": 0.5402253401093937, + "eval_loss": 2.412201166152954, + "eval_runtime": 9426.0905, + "eval_samples_per_second": 29.173, + "eval_steps_per_second": 7.293, + "step": 960000 + }, + { + "epoch": 0.73, + "learning_rate": 4.8170615622515945e-05, + "loss": 2.4677, + "step": 960500 + }, + { + "epoch": 0.73, + "learning_rate": 4.81696629650207e-05, + "loss": 2.4539, + "step": 961000 + }, + { + "epoch": 0.73, + "learning_rate": 4.816871030752546e-05, + "loss": 2.4774, + "step": 961500 + }, + { + "epoch": 0.73, + "learning_rate": 4.816775765003022e-05, + "loss": 2.4727, + "step": 962000 + }, + { + "epoch": 0.73, + "learning_rate": 4.816680499253498e-05, + "loss": 2.499, + "step": 962500 + }, + { + "epoch": 0.73, + "learning_rate": 4.8165852335039736e-05, + "loss": 2.4723, + "step": 963000 + }, + { + "epoch": 0.73, + "learning_rate": 4.8164899677544495e-05, + "loss": 2.4551, + "step": 963500 + }, + { + "epoch": 0.73, + "learning_rate": 4.8163947020049246e-05, + "loss": 2.4807, + "step": 964000 + }, + { + "epoch": 0.74, + "learning_rate": 4.8162996267869e-05, + "loss": 2.5056, + "step": 964500 + }, + { + "epoch": 0.74, + "learning_rate": 4.816204361037376e-05, + "loss": 2.5135, + "step": 965000 + }, + { + "epoch": 0.74, + "learning_rate": 4.816109285819351e-05, + "loss": 2.4659, + "step": 965500 + }, + { + "epoch": 0.74, + "learning_rate": 4.816014210601325e-05, + "loss": 2.4747, + "step": 966000 + }, + { + "epoch": 0.74, + "learning_rate": 4.815918944851801e-05, + "loss": 2.4545, + "step": 966500 + }, + { + "epoch": 0.74, + "learning_rate": 4.815823679102277e-05, + "loss": 2.5005, + "step": 967000 + }, + { + "epoch": 0.74, + "learning_rate": 4.8157284133527524e-05, + "loss": 2.4939, + "step": 967500 + }, + { + "epoch": 0.74, + "learning_rate": 4.815633147603228e-05, + "loss": 2.4551, + "step": 968000 + }, + { + "epoch": 0.74, + "learning_rate": 4.815537881853704e-05, + "loss": 2.4803, + "step": 968500 + }, + { + "epoch": 0.74, + "learning_rate": 4.81544261610418e-05, + "loss": 2.5045, + "step": 969000 + }, + { + "epoch": 0.74, + "learning_rate": 4.815347350354655e-05, + "loss": 2.5134, + "step": 969500 + }, + { + "epoch": 0.74, + "learning_rate": 4.8152520846051316e-05, + "loss": 2.4397, + "step": 970000 + }, + { + "epoch": 0.74, + "learning_rate": 4.8151570093871064e-05, + "loss": 2.476, + "step": 970500 + }, + { + "epoch": 0.74, + "learning_rate": 4.8150617436375815e-05, + "loss": 2.4476, + "step": 971000 + }, + { + "epoch": 0.74, + "learning_rate": 4.8149664778880574e-05, + "loss": 2.4668, + "step": 971500 + }, + { + "epoch": 0.74, + "learning_rate": 4.814871402670033e-05, + "loss": 2.499, + "step": 972000 + }, + { + "epoch": 0.74, + "learning_rate": 4.814776136920508e-05, + "loss": 2.4625, + "step": 972500 + }, + { + "epoch": 0.74, + "learning_rate": 4.814680871170984e-05, + "loss": 2.489, + "step": 973000 + }, + { + "epoch": 0.74, + "learning_rate": 4.81458560542146e-05, + "loss": 2.4892, + "step": 973500 + }, + { + "epoch": 0.74, + "learning_rate": 4.8144903396719355e-05, + "loss": 2.5068, + "step": 974000 + }, + { + "epoch": 0.74, + "learning_rate": 4.814395073922411e-05, + "loss": 2.4924, + "step": 974500 + }, + { + "epoch": 0.74, + "learning_rate": 4.814299808172887e-05, + "loss": 2.4525, + "step": 975000 + }, + { + "epoch": 0.74, + "learning_rate": 4.814204542423363e-05, + "loss": 2.4971, + "step": 975500 + }, + { + "epoch": 0.74, + "learning_rate": 4.814109467205338e-05, + "loss": 2.4405, + "step": 976000 + }, + { + "epoch": 0.74, + "learning_rate": 4.814014201455813e-05, + "loss": 2.4834, + "step": 976500 + }, + { + "epoch": 0.74, + "learning_rate": 4.8139189357062895e-05, + "loss": 2.4788, + "step": 977000 + }, + { + "epoch": 0.74, + "learning_rate": 4.8138236699567646e-05, + "loss": 2.4718, + "step": 977500 + }, + { + "epoch": 0.75, + "learning_rate": 4.8137284042072405e-05, + "loss": 2.499, + "step": 978000 + }, + { + "epoch": 0.75, + "learning_rate": 4.813633328989215e-05, + "loss": 2.4602, + "step": 978500 + }, + { + "epoch": 0.75, + "learning_rate": 4.813538063239691e-05, + "loss": 2.4698, + "step": 979000 + }, + { + "epoch": 0.75, + "learning_rate": 4.813442797490167e-05, + "loss": 2.4682, + "step": 979500 + }, + { + "epoch": 0.75, + "learning_rate": 4.813347531740643e-05, + "loss": 2.492, + "step": 980000 + }, + { + "epoch": 0.75, + "learning_rate": 4.8132522659911186e-05, + "loss": 2.4744, + "step": 980500 + }, + { + "epoch": 0.75, + "learning_rate": 4.8131571907730934e-05, + "loss": 2.5006, + "step": 981000 + }, + { + "epoch": 0.75, + "learning_rate": 4.813061925023569e-05, + "loss": 2.5044, + "step": 981500 + }, + { + "epoch": 0.75, + "learning_rate": 4.8129666592740444e-05, + "loss": 2.4985, + "step": 982000 + }, + { + "epoch": 0.75, + "learning_rate": 4.812871393524521e-05, + "loss": 2.4751, + "step": 982500 + }, + { + "epoch": 0.75, + "learning_rate": 4.812776127774996e-05, + "loss": 2.4424, + "step": 983000 + }, + { + "epoch": 0.75, + "learning_rate": 4.812681052556971e-05, + "loss": 2.4881, + "step": 983500 + }, + { + "epoch": 0.75, + "learning_rate": 4.812585786807447e-05, + "loss": 2.4833, + "step": 984000 + }, + { + "epoch": 0.75, + "learning_rate": 4.8124907115894216e-05, + "loss": 2.483, + "step": 984500 + }, + { + "epoch": 0.75, + "learning_rate": 4.8123954458398974e-05, + "loss": 2.4597, + "step": 985000 + }, + { + "epoch": 0.75, + "learning_rate": 4.812300180090373e-05, + "loss": 2.4662, + "step": 985500 + }, + { + "epoch": 0.75, + "learning_rate": 4.812204914340849e-05, + "loss": 2.4616, + "step": 986000 + }, + { + "epoch": 0.75, + "learning_rate": 4.812109648591325e-05, + "loss": 2.5064, + "step": 986500 + }, + { + "epoch": 0.75, + "learning_rate": 4.8120143828418e-05, + "loss": 2.4773, + "step": 987000 + }, + { + "epoch": 0.75, + "learning_rate": 4.811919117092276e-05, + "loss": 2.4767, + "step": 987500 + }, + { + "epoch": 0.75, + "learning_rate": 4.8118238513427524e-05, + "loss": 2.4788, + "step": 988000 + }, + { + "epoch": 0.75, + "learning_rate": 4.8117285855932275e-05, + "loss": 2.4874, + "step": 988500 + }, + { + "epoch": 0.75, + "learning_rate": 4.811633319843703e-05, + "loss": 2.4651, + "step": 989000 + }, + { + "epoch": 0.75, + "learning_rate": 4.811538054094179e-05, + "loss": 2.4646, + "step": 989500 + }, + { + "epoch": 0.75, + "learning_rate": 4.811442978876154e-05, + "loss": 2.4655, + "step": 990000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5409409316302379, + "eval_loss": 2.4072861671447754, + "eval_runtime": 9433.0912, + "eval_samples_per_second": 29.151, + "eval_steps_per_second": 7.288, + "step": 990000 + }, + { + "epoch": 0.75, + "learning_rate": 4.81134771312663e-05, + "loss": 2.4483, + "step": 990500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8112524473771057e-05, + "loss": 2.4964, + "step": 991000 + }, + { + "epoch": 0.76, + "learning_rate": 4.8111571816275815e-05, + "loss": 2.4624, + "step": 991500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8110619158780566e-05, + "loss": 2.4773, + "step": 992000 + }, + { + "epoch": 0.76, + "learning_rate": 4.8109666501285325e-05, + "loss": 2.4647, + "step": 992500 + }, + { + "epoch": 0.76, + "learning_rate": 4.810871384379009e-05, + "loss": 2.4689, + "step": 993000 + }, + { + "epoch": 0.76, + "learning_rate": 4.810776118629484e-05, + "loss": 2.4664, + "step": 993500 + }, + { + "epoch": 0.76, + "learning_rate": 4.81068085287996e-05, + "loss": 2.4742, + "step": 994000 + }, + { + "epoch": 0.76, + "learning_rate": 4.810585968193434e-05, + "loss": 2.4475, + "step": 994500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8104907024439096e-05, + "loss": 2.4938, + "step": 995000 + }, + { + "epoch": 0.76, + "learning_rate": 4.8103954366943854e-05, + "loss": 2.5089, + "step": 995500 + }, + { + "epoch": 0.76, + "learning_rate": 4.81030036147636e-05, + "loss": 2.4848, + "step": 996000 + }, + { + "epoch": 0.76, + "learning_rate": 4.810205095726836e-05, + "loss": 2.4562, + "step": 996500 + }, + { + "epoch": 0.76, + "learning_rate": 4.810109829977312e-05, + "loss": 2.4675, + "step": 997000 + }, + { + "epoch": 0.76, + "learning_rate": 4.810014564227788e-05, + "loss": 2.4573, + "step": 997500 + }, + { + "epoch": 0.76, + "learning_rate": 4.809919298478263e-05, + "loss": 2.4664, + "step": 998000 + }, + { + "epoch": 0.76, + "learning_rate": 4.8098240327287394e-05, + "loss": 2.4773, + "step": 998500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8097287669792146e-05, + "loss": 2.4876, + "step": 999000 + }, + { + "epoch": 0.76, + "learning_rate": 4.8096335012296904e-05, + "loss": 2.4869, + "step": 999500 + }, + { + "epoch": 0.76, + "learning_rate": 4.809538235480166e-05, + "loss": 2.443, + "step": 1000000 + }, + { + "epoch": 0.76, + "learning_rate": 4.809442969730642e-05, + "loss": 2.4626, + "step": 1000500 + }, + { + "epoch": 0.76, + "learning_rate": 4.809347703981118e-05, + "loss": 2.4709, + "step": 1001000 + }, + { + "epoch": 0.76, + "learning_rate": 4.809252438231594e-05, + "loss": 2.4591, + "step": 1001500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8091573630135685e-05, + "loss": 2.5046, + "step": 1002000 + }, + { + "epoch": 0.76, + "learning_rate": 4.8090620972640444e-05, + "loss": 2.5069, + "step": 1002500 + }, + { + "epoch": 0.76, + "learning_rate": 4.8089668315145195e-05, + "loss": 2.4912, + "step": 1003000 + }, + { + "epoch": 0.76, + "learning_rate": 4.808871756296494e-05, + "loss": 2.471, + "step": 1003500 + }, + { + "epoch": 0.77, + "learning_rate": 4.808776490546971e-05, + "loss": 2.4829, + "step": 1004000 + }, + { + "epoch": 0.77, + "learning_rate": 4.808681224797446e-05, + "loss": 2.4365, + "step": 1004500 + }, + { + "epoch": 0.77, + "learning_rate": 4.808585959047922e-05, + "loss": 2.4508, + "step": 1005000 + }, + { + "epoch": 0.77, + "learning_rate": 4.8084906932983976e-05, + "loss": 2.4546, + "step": 1005500 + }, + { + "epoch": 0.77, + "learning_rate": 4.8083956180803725e-05, + "loss": 2.4639, + "step": 1006000 + }, + { + "epoch": 0.77, + "learning_rate": 4.808300352330848e-05, + "loss": 2.478, + "step": 1006500 + }, + { + "epoch": 0.77, + "learning_rate": 4.808205086581324e-05, + "loss": 2.4844, + "step": 1007000 + }, + { + "epoch": 0.77, + "learning_rate": 4.8081098208318e-05, + "loss": 2.499, + "step": 1007500 + }, + { + "epoch": 0.77, + "learning_rate": 4.808014555082275e-05, + "loss": 2.4942, + "step": 1008000 + }, + { + "epoch": 0.77, + "learning_rate": 4.80791947986425e-05, + "loss": 2.4731, + "step": 1008500 + }, + { + "epoch": 0.77, + "learning_rate": 4.8078242141147264e-05, + "loss": 2.4879, + "step": 1009000 + }, + { + "epoch": 0.77, + "learning_rate": 4.8077289483652016e-05, + "loss": 2.4472, + "step": 1009500 + }, + { + "epoch": 0.77, + "learning_rate": 4.8076336826156774e-05, + "loss": 2.4394, + "step": 1010000 + }, + { + "epoch": 0.77, + "learning_rate": 4.807538607397652e-05, + "loss": 2.4794, + "step": 1010500 + }, + { + "epoch": 0.77, + "learning_rate": 4.807443341648128e-05, + "loss": 2.4854, + "step": 1011000 + }, + { + "epoch": 0.77, + "learning_rate": 4.807348075898604e-05, + "loss": 2.4698, + "step": 1011500 + }, + { + "epoch": 0.77, + "learning_rate": 4.80725281014908e-05, + "loss": 2.4573, + "step": 1012000 + }, + { + "epoch": 0.77, + "learning_rate": 4.8071575443995556e-05, + "loss": 2.5029, + "step": 1012500 + }, + { + "epoch": 0.77, + "learning_rate": 4.8070622786500314e-05, + "loss": 2.4642, + "step": 1013000 + }, + { + "epoch": 0.77, + "learning_rate": 4.8069670129005066e-05, + "loss": 2.4686, + "step": 1013500 + }, + { + "epoch": 0.77, + "learning_rate": 4.8068719376824814e-05, + "loss": 2.4545, + "step": 1014000 + }, + { + "epoch": 0.77, + "learning_rate": 4.806776671932958e-05, + "loss": 2.458, + "step": 1014500 + }, + { + "epoch": 0.77, + "learning_rate": 4.806681596714933e-05, + "loss": 2.4629, + "step": 1015000 + }, + { + "epoch": 0.77, + "learning_rate": 4.806586521496907e-05, + "loss": 2.4895, + "step": 1015500 + }, + { + "epoch": 0.77, + "learning_rate": 4.806491255747383e-05, + "loss": 2.4846, + "step": 1016000 + }, + { + "epoch": 0.77, + "learning_rate": 4.806395989997859e-05, + "loss": 2.486, + "step": 1016500 + }, + { + "epoch": 0.78, + "learning_rate": 4.8063007242483344e-05, + "loss": 2.5048, + "step": 1017000 + }, + { + "epoch": 0.78, + "learning_rate": 4.80620545849881e-05, + "loss": 2.4903, + "step": 1017500 + }, + { + "epoch": 0.78, + "learning_rate": 4.806110192749286e-05, + "loss": 2.4722, + "step": 1018000 + }, + { + "epoch": 0.78, + "learning_rate": 4.806014926999762e-05, + "loss": 2.4848, + "step": 1018500 + }, + { + "epoch": 0.78, + "learning_rate": 4.805919661250237e-05, + "loss": 2.4748, + "step": 1019000 + }, + { + "epoch": 0.78, + "learning_rate": 4.805824586032212e-05, + "loss": 2.4389, + "step": 1019500 + }, + { + "epoch": 0.78, + "learning_rate": 4.805729320282688e-05, + "loss": 2.4693, + "step": 1020000 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.5417482253664431, + "eval_loss": 2.403618812561035, + "eval_runtime": 9433.7308, + "eval_samples_per_second": 29.149, + "eval_steps_per_second": 7.287, + "step": 1020000 + }, + { + "epoch": 0.78, + "learning_rate": 4.8056340545331635e-05, + "loss": 2.4557, + "step": 1020500 + }, + { + "epoch": 0.78, + "learning_rate": 4.805538788783639e-05, + "loss": 2.4343, + "step": 1021000 + }, + { + "epoch": 0.78, + "learning_rate": 4.805443523034116e-05, + "loss": 2.4373, + "step": 1021500 + }, + { + "epoch": 0.78, + "learning_rate": 4.805348257284591e-05, + "loss": 2.4647, + "step": 1022000 + }, + { + "epoch": 0.78, + "learning_rate": 4.805252991535067e-05, + "loss": 2.4548, + "step": 1022500 + }, + { + "epoch": 0.78, + "learning_rate": 4.8051577257855426e-05, + "loss": 2.446, + "step": 1023000 + }, + { + "epoch": 0.78, + "learning_rate": 4.8050624600360184e-05, + "loss": 2.5157, + "step": 1023500 + }, + { + "epoch": 0.78, + "learning_rate": 4.804967194286494e-05, + "loss": 2.4779, + "step": 1024000 + }, + { + "epoch": 0.78, + "learning_rate": 4.80487192853697e-05, + "loss": 2.4501, + "step": 1024500 + }, + { + "epoch": 0.78, + "learning_rate": 4.804776662787446e-05, + "loss": 2.4923, + "step": 1025000 + }, + { + "epoch": 0.78, + "learning_rate": 4.804681397037921e-05, + "loss": 2.4818, + "step": 1025500 + }, + { + "epoch": 0.78, + "learning_rate": 4.804586131288397e-05, + "loss": 2.4651, + "step": 1026000 + }, + { + "epoch": 0.78, + "learning_rate": 4.804490865538873e-05, + "loss": 2.4953, + "step": 1026500 + }, + { + "epoch": 0.78, + "learning_rate": 4.8043955997893486e-05, + "loss": 2.453, + "step": 1027000 + }, + { + "epoch": 0.78, + "learning_rate": 4.8043003340398244e-05, + "loss": 2.4559, + "step": 1027500 + }, + { + "epoch": 0.78, + "learning_rate": 4.804205258821799e-05, + "loss": 2.4869, + "step": 1028000 + }, + { + "epoch": 0.78, + "learning_rate": 4.804109993072275e-05, + "loss": 2.4966, + "step": 1028500 + }, + { + "epoch": 0.78, + "learning_rate": 4.804014727322751e-05, + "loss": 2.4683, + "step": 1029000 + }, + { + "epoch": 0.78, + "learning_rate": 4.803919652104725e-05, + "loss": 2.4765, + "step": 1029500 + }, + { + "epoch": 0.78, + "learning_rate": 4.8038243863552015e-05, + "loss": 2.5122, + "step": 1030000 + }, + { + "epoch": 0.79, + "learning_rate": 4.8037291206056774e-05, + "loss": 2.4904, + "step": 1030500 + }, + { + "epoch": 0.79, + "learning_rate": 4.8036338548561525e-05, + "loss": 2.5092, + "step": 1031000 + }, + { + "epoch": 0.79, + "learning_rate": 4.8035385891066284e-05, + "loss": 2.4733, + "step": 1031500 + }, + { + "epoch": 0.79, + "learning_rate": 4.803443323357104e-05, + "loss": 2.4707, + "step": 1032000 + }, + { + "epoch": 0.79, + "learning_rate": 4.803348248139079e-05, + "loss": 2.4936, + "step": 1032500 + }, + { + "epoch": 0.79, + "learning_rate": 4.803252982389555e-05, + "loss": 2.4895, + "step": 1033000 + }, + { + "epoch": 0.79, + "learning_rate": 4.803157716640031e-05, + "loss": 2.4742, + "step": 1033500 + }, + { + "epoch": 0.79, + "learning_rate": 4.8030624508905065e-05, + "loss": 2.4679, + "step": 1034000 + }, + { + "epoch": 0.79, + "learning_rate": 4.8029671851409816e-05, + "loss": 2.4937, + "step": 1034500 + }, + { + "epoch": 0.79, + "learning_rate": 4.8028721099229565e-05, + "loss": 2.4666, + "step": 1035000 + }, + { + "epoch": 0.79, + "learning_rate": 4.802776844173433e-05, + "loss": 2.4896, + "step": 1035500 + }, + { + "epoch": 0.79, + "learning_rate": 4.802681578423908e-05, + "loss": 2.4912, + "step": 1036000 + }, + { + "epoch": 0.79, + "learning_rate": 4.802586312674384e-05, + "loss": 2.4853, + "step": 1036500 + }, + { + "epoch": 0.79, + "learning_rate": 4.8024910469248605e-05, + "loss": 2.4839, + "step": 1037000 + }, + { + "epoch": 0.79, + "learning_rate": 4.8023959717068346e-05, + "loss": 2.4711, + "step": 1037500 + }, + { + "epoch": 0.79, + "learning_rate": 4.8023007059573104e-05, + "loss": 2.4715, + "step": 1038000 + }, + { + "epoch": 0.79, + "learning_rate": 4.802205440207786e-05, + "loss": 2.4911, + "step": 1038500 + }, + { + "epoch": 0.79, + "learning_rate": 4.802110174458262e-05, + "loss": 2.4726, + "step": 1039000 + }, + { + "epoch": 0.79, + "learning_rate": 4.802014908708738e-05, + "loss": 2.4622, + "step": 1039500 + }, + { + "epoch": 0.79, + "learning_rate": 4.801919642959213e-05, + "loss": 2.4334, + "step": 1040000 + }, + { + "epoch": 0.79, + "learning_rate": 4.8018245677411886e-05, + "loss": 2.4867, + "step": 1040500 + }, + { + "epoch": 0.79, + "learning_rate": 4.8017293019916644e-05, + "loss": 2.4656, + "step": 1041000 + }, + { + "epoch": 0.79, + "learning_rate": 4.8016340362421396e-05, + "loss": 2.4652, + "step": 1041500 + }, + { + "epoch": 0.79, + "learning_rate": 4.8015387704926154e-05, + "loss": 2.4633, + "step": 1042000 + }, + { + "epoch": 0.79, + "learning_rate": 4.801443504743091e-05, + "loss": 2.4289, + "step": 1042500 + }, + { + "epoch": 0.79, + "learning_rate": 4.801348429525066e-05, + "loss": 2.5151, + "step": 1043000 + }, + { + "epoch": 0.8, + "learning_rate": 4.801253163775542e-05, + "loss": 2.478, + "step": 1043500 + }, + { + "epoch": 0.8, + "learning_rate": 4.801157898026018e-05, + "loss": 2.4469, + "step": 1044000 + }, + { + "epoch": 0.8, + "learning_rate": 4.8010628228079925e-05, + "loss": 2.4742, + "step": 1044500 + }, + { + "epoch": 0.8, + "learning_rate": 4.8009675570584684e-05, + "loss": 2.4527, + "step": 1045000 + }, + { + "epoch": 0.8, + "learning_rate": 4.8008722913089435e-05, + "loss": 2.4618, + "step": 1045500 + }, + { + "epoch": 0.8, + "learning_rate": 4.80077702555942e-05, + "loss": 2.4466, + "step": 1046000 + }, + { + "epoch": 0.8, + "learning_rate": 4.800681759809896e-05, + "loss": 2.4846, + "step": 1046500 + }, + { + "epoch": 0.8, + "learning_rate": 4.800586494060371e-05, + "loss": 2.4428, + "step": 1047000 + }, + { + "epoch": 0.8, + "learning_rate": 4.8004912283108475e-05, + "loss": 2.4663, + "step": 1047500 + }, + { + "epoch": 0.8, + "learning_rate": 4.800395962561323e-05, + "loss": 2.4806, + "step": 1048000 + }, + { + "epoch": 0.8, + "learning_rate": 4.8003006968117985e-05, + "loss": 2.4414, + "step": 1048500 + }, + { + "epoch": 0.8, + "learning_rate": 4.800205621593773e-05, + "loss": 2.4551, + "step": 1049000 + }, + { + "epoch": 0.8, + "learning_rate": 4.800110355844249e-05, + "loss": 2.459, + "step": 1049500 + }, + { + "epoch": 0.8, + "learning_rate": 4.800015090094725e-05, + "loss": 2.4836, + "step": 1050000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.5422612451756714, + "eval_loss": 2.3978164196014404, + "eval_runtime": 9409.8053, + "eval_samples_per_second": 29.224, + "eval_steps_per_second": 7.306, + "step": 1050000 + }, + { + "epoch": 0.8, + "learning_rate": 4.7999198243452e-05, + "loss": 2.4703, + "step": 1050500 + }, + { + "epoch": 0.8, + "learning_rate": 4.799824749127175e-05, + "loss": 2.4687, + "step": 1051000 + }, + { + "epoch": 0.8, + "learning_rate": 4.7997294833776515e-05, + "loss": 2.4701, + "step": 1051500 + }, + { + "epoch": 0.8, + "learning_rate": 4.7996342176281266e-05, + "loss": 2.4913, + "step": 1052000 + }, + { + "epoch": 0.8, + "learning_rate": 4.7995389518786024e-05, + "loss": 2.4866, + "step": 1052500 + }, + { + "epoch": 0.8, + "learning_rate": 4.799443686129079e-05, + "loss": 2.4522, + "step": 1053000 + }, + { + "epoch": 0.8, + "learning_rate": 4.799348420379554e-05, + "loss": 2.458, + "step": 1053500 + }, + { + "epoch": 0.8, + "learning_rate": 4.799253345161529e-05, + "loss": 2.4775, + "step": 1054000 + }, + { + "epoch": 0.8, + "learning_rate": 4.799158079412005e-05, + "loss": 2.4731, + "step": 1054500 + }, + { + "epoch": 0.8, + "learning_rate": 4.7990628136624806e-05, + "loss": 2.4765, + "step": 1055000 + }, + { + "epoch": 0.8, + "learning_rate": 4.7989675479129564e-05, + "loss": 2.4676, + "step": 1055500 + }, + { + "epoch": 0.8, + "learning_rate": 4.7988722821634316e-05, + "loss": 2.4659, + "step": 1056000 + }, + { + "epoch": 0.81, + "learning_rate": 4.798777016413908e-05, + "loss": 2.4868, + "step": 1056500 + }, + { + "epoch": 0.81, + "learning_rate": 4.798681750664383e-05, + "loss": 2.4644, + "step": 1057000 + }, + { + "epoch": 0.81, + "learning_rate": 4.798586484914859e-05, + "loss": 2.4655, + "step": 1057500 + }, + { + "epoch": 0.81, + "learning_rate": 4.798491409696834e-05, + "loss": 2.4758, + "step": 1058000 + }, + { + "epoch": 0.81, + "learning_rate": 4.798396525010308e-05, + "loss": 2.4683, + "step": 1058500 + }, + { + "epoch": 0.81, + "learning_rate": 4.798301259260784e-05, + "loss": 2.4612, + "step": 1059000 + }, + { + "epoch": 0.81, + "learning_rate": 4.7982059935112594e-05, + "loss": 2.4633, + "step": 1059500 + }, + { + "epoch": 0.81, + "learning_rate": 4.798110727761735e-05, + "loss": 2.4523, + "step": 1060000 + }, + { + "epoch": 0.81, + "learning_rate": 4.798015462012211e-05, + "loss": 2.4712, + "step": 1060500 + }, + { + "epoch": 0.81, + "learning_rate": 4.797920196262687e-05, + "loss": 2.5003, + "step": 1061000 + }, + { + "epoch": 0.81, + "learning_rate": 4.797824930513162e-05, + "loss": 2.4579, + "step": 1061500 + }, + { + "epoch": 0.81, + "learning_rate": 4.7977296647636385e-05, + "loss": 2.4768, + "step": 1062000 + }, + { + "epoch": 0.81, + "learning_rate": 4.7976343990141143e-05, + "loss": 2.4944, + "step": 1062500 + }, + { + "epoch": 0.81, + "learning_rate": 4.7975391332645895e-05, + "loss": 2.4808, + "step": 1063000 + }, + { + "epoch": 0.81, + "learning_rate": 4.797444058046564e-05, + "loss": 2.4921, + "step": 1063500 + }, + { + "epoch": 0.81, + "learning_rate": 4.79734898282854e-05, + "loss": 2.4625, + "step": 1064000 + }, + { + "epoch": 0.81, + "learning_rate": 4.797253717079015e-05, + "loss": 2.4526, + "step": 1064500 + }, + { + "epoch": 0.81, + "learning_rate": 4.797158451329491e-05, + "loss": 2.4814, + "step": 1065000 + }, + { + "epoch": 0.81, + "learning_rate": 4.7970631855799666e-05, + "loss": 2.4245, + "step": 1065500 + }, + { + "epoch": 0.81, + "learning_rate": 4.7969679198304425e-05, + "loss": 2.4921, + "step": 1066000 + }, + { + "epoch": 0.81, + "learning_rate": 4.796872654080918e-05, + "loss": 2.4531, + "step": 1066500 + }, + { + "epoch": 0.81, + "learning_rate": 4.796777388331394e-05, + "loss": 2.4505, + "step": 1067000 + }, + { + "epoch": 0.81, + "learning_rate": 4.796682313113369e-05, + "loss": 2.4607, + "step": 1067500 + }, + { + "epoch": 0.81, + "learning_rate": 4.796587047363845e-05, + "loss": 2.4782, + "step": 1068000 + }, + { + "epoch": 0.81, + "learning_rate": 4.79649178161432e-05, + "loss": 2.4652, + "step": 1068500 + }, + { + "epoch": 0.81, + "learning_rate": 4.7963965158647964e-05, + "loss": 2.4647, + "step": 1069000 + }, + { + "epoch": 0.82, + "learning_rate": 4.7963012501152716e-05, + "loss": 2.4741, + "step": 1069500 + }, + { + "epoch": 0.82, + "learning_rate": 4.7962059843657474e-05, + "loss": 2.4717, + "step": 1070000 + }, + { + "epoch": 0.82, + "learning_rate": 4.796110718616223e-05, + "loss": 2.4616, + "step": 1070500 + }, + { + "epoch": 0.82, + "learning_rate": 4.796015452866699e-05, + "loss": 2.4862, + "step": 1071000 + }, + { + "epoch": 0.82, + "learning_rate": 4.795920377648674e-05, + "loss": 2.4684, + "step": 1071500 + }, + { + "epoch": 0.82, + "learning_rate": 4.79582511189915e-05, + "loss": 2.4619, + "step": 1072000 + }, + { + "epoch": 0.82, + "learning_rate": 4.7957298461496256e-05, + "loss": 2.4633, + "step": 1072500 + }, + { + "epoch": 0.82, + "learning_rate": 4.7956345804001014e-05, + "loss": 2.4753, + "step": 1073000 + }, + { + "epoch": 0.82, + "learning_rate": 4.7955393146505765e-05, + "loss": 2.4488, + "step": 1073500 + }, + { + "epoch": 0.82, + "learning_rate": 4.7954440489010524e-05, + "loss": 2.4815, + "step": 1074000 + }, + { + "epoch": 0.82, + "learning_rate": 4.795348783151528e-05, + "loss": 2.4365, + "step": 1074500 + }, + { + "epoch": 0.82, + "learning_rate": 4.795253707933503e-05, + "loss": 2.4861, + "step": 1075000 + }, + { + "epoch": 0.82, + "learning_rate": 4.795158442183979e-05, + "loss": 2.4586, + "step": 1075500 + }, + { + "epoch": 0.82, + "learning_rate": 4.795063176434455e-05, + "loss": 2.4764, + "step": 1076000 + }, + { + "epoch": 0.82, + "learning_rate": 4.7949679106849305e-05, + "loss": 2.4341, + "step": 1076500 + }, + { + "epoch": 0.82, + "learning_rate": 4.7948726449354063e-05, + "loss": 2.4677, + "step": 1077000 + }, + { + "epoch": 0.82, + "learning_rate": 4.7947775697173805e-05, + "loss": 2.4554, + "step": 1077500 + }, + { + "epoch": 0.82, + "learning_rate": 4.794682303967857e-05, + "loss": 2.4678, + "step": 1078000 + }, + { + "epoch": 0.82, + "learning_rate": 4.794587038218333e-05, + "loss": 2.4601, + "step": 1078500 + }, + { + "epoch": 0.82, + "learning_rate": 4.794491772468808e-05, + "loss": 2.4675, + "step": 1079000 + }, + { + "epoch": 0.82, + "learning_rate": 4.7943965067192845e-05, + "loss": 2.4539, + "step": 1079500 + }, + { + "epoch": 0.82, + "learning_rate": 4.7943012409697596e-05, + "loss": 2.4806, + "step": 1080000 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.5427739343919616, + "eval_loss": 2.394293785095215, + "eval_runtime": 9414.9166, + "eval_samples_per_second": 29.208, + "eval_steps_per_second": 7.302, + "step": 1080000 + }, + { + "epoch": 0.82, + "learning_rate": 4.7942059752202355e-05, + "loss": 2.4776, + "step": 1080500 + }, + { + "epoch": 0.82, + "learning_rate": 4.794110709470711e-05, + "loss": 2.4622, + "step": 1081000 + }, + { + "epoch": 0.82, + "learning_rate": 4.794015443721187e-05, + "loss": 2.4604, + "step": 1081500 + }, + { + "epoch": 0.82, + "learning_rate": 4.793920177971663e-05, + "loss": 2.4715, + "step": 1082000 + }, + { + "epoch": 0.83, + "learning_rate": 4.793825102753638e-05, + "loss": 2.4878, + "step": 1082500 + }, + { + "epoch": 0.83, + "learning_rate": 4.7937298370041136e-05, + "loss": 2.4794, + "step": 1083000 + }, + { + "epoch": 0.83, + "learning_rate": 4.7936345712545894e-05, + "loss": 2.4649, + "step": 1083500 + }, + { + "epoch": 0.83, + "learning_rate": 4.7935393055050646e-05, + "loss": 2.4654, + "step": 1084000 + }, + { + "epoch": 0.83, + "learning_rate": 4.793444039755541e-05, + "loss": 2.4496, + "step": 1084500 + }, + { + "epoch": 0.83, + "learning_rate": 4.793348964537516e-05, + "loss": 2.4605, + "step": 1085000 + }, + { + "epoch": 0.83, + "learning_rate": 4.793253698787991e-05, + "loss": 2.4658, + "step": 1085500 + }, + { + "epoch": 0.83, + "learning_rate": 4.793158433038467e-05, + "loss": 2.464, + "step": 1086000 + }, + { + "epoch": 0.83, + "learning_rate": 4.793063167288943e-05, + "loss": 2.4869, + "step": 1086500 + }, + { + "epoch": 0.83, + "learning_rate": 4.7929680920709176e-05, + "loss": 2.4393, + "step": 1087000 + }, + { + "epoch": 0.83, + "learning_rate": 4.7928728263213934e-05, + "loss": 2.4718, + "step": 1087500 + }, + { + "epoch": 0.83, + "learning_rate": 4.792777560571869e-05, + "loss": 2.4957, + "step": 1088000 + }, + { + "epoch": 0.83, + "learning_rate": 4.792682294822345e-05, + "loss": 2.4557, + "step": 1088500 + }, + { + "epoch": 0.83, + "learning_rate": 4.792587029072821e-05, + "loss": 2.4624, + "step": 1089000 + }, + { + "epoch": 0.83, + "learning_rate": 4.792491953854795e-05, + "loss": 2.4763, + "step": 1089500 + }, + { + "epoch": 0.83, + "learning_rate": 4.7923966881052715e-05, + "loss": 2.5094, + "step": 1090000 + }, + { + "epoch": 0.83, + "learning_rate": 4.7923014223557474e-05, + "loss": 2.4537, + "step": 1090500 + }, + { + "epoch": 0.83, + "learning_rate": 4.7922061566062225e-05, + "loss": 2.4459, + "step": 1091000 + }, + { + "epoch": 0.83, + "learning_rate": 4.792110890856698e-05, + "loss": 2.4685, + "step": 1091500 + }, + { + "epoch": 0.83, + "learning_rate": 4.792015625107174e-05, + "loss": 2.4108, + "step": 1092000 + }, + { + "epoch": 0.83, + "learning_rate": 4.79192035935765e-05, + "loss": 2.4233, + "step": 1092500 + }, + { + "epoch": 0.83, + "learning_rate": 4.791825093608125e-05, + "loss": 2.422, + "step": 1093000 + }, + { + "epoch": 0.83, + "learning_rate": 4.7917300183901007e-05, + "loss": 2.4647, + "step": 1093500 + }, + { + "epoch": 0.83, + "learning_rate": 4.7916347526405765e-05, + "loss": 2.4625, + "step": 1094000 + }, + { + "epoch": 0.83, + "learning_rate": 4.7915394868910516e-05, + "loss": 2.4868, + "step": 1094500 + }, + { + "epoch": 0.83, + "learning_rate": 4.7914442211415275e-05, + "loss": 2.431, + "step": 1095000 + }, + { + "epoch": 0.83, + "learning_rate": 4.791349145923503e-05, + "loss": 2.5103, + "step": 1095500 + }, + { + "epoch": 0.84, + "learning_rate": 4.791254070705478e-05, + "loss": 2.4664, + "step": 1096000 + }, + { + "epoch": 0.84, + "learning_rate": 4.791158804955953e-05, + "loss": 2.484, + "step": 1096500 + }, + { + "epoch": 0.84, + "learning_rate": 4.791063539206429e-05, + "loss": 2.4987, + "step": 1097000 + }, + { + "epoch": 0.84, + "learning_rate": 4.7909682734569046e-05, + "loss": 2.4712, + "step": 1097500 + }, + { + "epoch": 0.84, + "learning_rate": 4.7908730077073804e-05, + "loss": 2.461, + "step": 1098000 + }, + { + "epoch": 0.84, + "learning_rate": 4.790777932489355e-05, + "loss": 2.449, + "step": 1098500 + }, + { + "epoch": 0.84, + "learning_rate": 4.790682666739831e-05, + "loss": 2.445, + "step": 1099000 + }, + { + "epoch": 0.84, + "learning_rate": 4.790587400990307e-05, + "loss": 2.4625, + "step": 1099500 + }, + { + "epoch": 0.84, + "learning_rate": 4.790492135240783e-05, + "loss": 2.4803, + "step": 1100000 + }, + { + "epoch": 0.84, + "learning_rate": 4.790396869491258e-05, + "loss": 2.4724, + "step": 1100500 + }, + { + "epoch": 0.84, + "learning_rate": 4.7903017942732334e-05, + "loss": 2.4759, + "step": 1101000 + }, + { + "epoch": 0.84, + "learning_rate": 4.790206528523709e-05, + "loss": 2.4644, + "step": 1101500 + }, + { + "epoch": 0.84, + "learning_rate": 4.7901112627741844e-05, + "loss": 2.4567, + "step": 1102000 + }, + { + "epoch": 0.84, + "learning_rate": 4.79001599702466e-05, + "loss": 2.43, + "step": 1102500 + }, + { + "epoch": 0.84, + "learning_rate": 4.789920731275136e-05, + "loss": 2.4679, + "step": 1103000 + }, + { + "epoch": 0.84, + "learning_rate": 4.789825465525612e-05, + "loss": 2.4397, + "step": 1103500 + }, + { + "epoch": 0.84, + "learning_rate": 4.789730199776088e-05, + "loss": 2.458, + "step": 1104000 + }, + { + "epoch": 0.84, + "learning_rate": 4.7896349340265635e-05, + "loss": 2.4941, + "step": 1104500 + }, + { + "epoch": 0.84, + "learning_rate": 4.7895396682770394e-05, + "loss": 2.4389, + "step": 1105000 + }, + { + "epoch": 0.84, + "learning_rate": 4.7894445930590135e-05, + "loss": 2.469, + "step": 1105500 + }, + { + "epoch": 0.84, + "learning_rate": 4.78934932730949e-05, + "loss": 2.4282, + "step": 1106000 + }, + { + "epoch": 0.84, + "learning_rate": 4.789254061559966e-05, + "loss": 2.4757, + "step": 1106500 + }, + { + "epoch": 0.84, + "learning_rate": 4.789158795810441e-05, + "loss": 2.4651, + "step": 1107000 + }, + { + "epoch": 0.84, + "learning_rate": 4.789063720592416e-05, + "loss": 2.4845, + "step": 1107500 + }, + { + "epoch": 0.84, + "learning_rate": 4.788968454842892e-05, + "loss": 2.4633, + "step": 1108000 + }, + { + "epoch": 0.84, + "learning_rate": 4.7888731890933675e-05, + "loss": 2.4726, + "step": 1108500 + }, + { + "epoch": 0.85, + "learning_rate": 4.788777923343843e-05, + "loss": 2.4672, + "step": 1109000 + }, + { + "epoch": 0.85, + "learning_rate": 4.788682657594319e-05, + "loss": 2.4586, + "step": 1109500 + }, + { + "epoch": 0.85, + "learning_rate": 4.788587582376294e-05, + "loss": 2.4515, + "step": 1110000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.5432032964139937, + "eval_loss": 2.391061544418335, + "eval_runtime": 9406.8304, + "eval_samples_per_second": 29.233, + "eval_steps_per_second": 7.308, + "step": 1110000 + }, + { + "epoch": 0.85, + "learning_rate": 4.78849231662677e-05, + "loss": 2.4367, + "step": 1110500 + }, + { + "epoch": 0.85, + "learning_rate": 4.788397050877245e-05, + "loss": 2.4391, + "step": 1111000 + }, + { + "epoch": 0.85, + "learning_rate": 4.7883017851277215e-05, + "loss": 2.4522, + "step": 1111500 + }, + { + "epoch": 0.85, + "learning_rate": 4.7882065193781966e-05, + "loss": 2.4586, + "step": 1112000 + }, + { + "epoch": 0.85, + "learning_rate": 4.7881114441601714e-05, + "loss": 2.4483, + "step": 1112500 + }, + { + "epoch": 0.85, + "learning_rate": 4.788016178410647e-05, + "loss": 2.4518, + "step": 1113000 + }, + { + "epoch": 0.85, + "learning_rate": 4.787921103192623e-05, + "loss": 2.4534, + "step": 1113500 + }, + { + "epoch": 0.85, + "learning_rate": 4.787825837443098e-05, + "loss": 2.4442, + "step": 1114000 + }, + { + "epoch": 0.85, + "learning_rate": 4.787730571693574e-05, + "loss": 2.4764, + "step": 1114500 + }, + { + "epoch": 0.85, + "learning_rate": 4.7876353059440496e-05, + "loss": 2.4609, + "step": 1115000 + }, + { + "epoch": 0.85, + "learning_rate": 4.7875400401945254e-05, + "loss": 2.4787, + "step": 1115500 + }, + { + "epoch": 0.85, + "learning_rate": 4.787444774445001e-05, + "loss": 2.4552, + "step": 1116000 + }, + { + "epoch": 0.85, + "learning_rate": 4.7873496992269754e-05, + "loss": 2.4662, + "step": 1116500 + }, + { + "epoch": 0.85, + "learning_rate": 4.787254433477452e-05, + "loss": 2.457, + "step": 1117000 + }, + { + "epoch": 0.85, + "learning_rate": 4.787159167727928e-05, + "loss": 2.4292, + "step": 1117500 + }, + { + "epoch": 0.85, + "learning_rate": 4.787063901978403e-05, + "loss": 2.4751, + "step": 1118000 + }, + { + "epoch": 0.85, + "learning_rate": 4.786968636228879e-05, + "loss": 2.4481, + "step": 1118500 + }, + { + "epoch": 0.85, + "learning_rate": 4.786873561010854e-05, + "loss": 2.4504, + "step": 1119000 + }, + { + "epoch": 0.85, + "learning_rate": 4.7867782952613294e-05, + "loss": 2.4468, + "step": 1119500 + }, + { + "epoch": 0.85, + "learning_rate": 4.786683029511805e-05, + "loss": 2.4582, + "step": 1120000 + }, + { + "epoch": 0.85, + "learning_rate": 4.786587763762281e-05, + "loss": 2.4614, + "step": 1120500 + }, + { + "epoch": 0.85, + "learning_rate": 4.786492498012757e-05, + "loss": 2.4775, + "step": 1121000 + }, + { + "epoch": 0.85, + "learning_rate": 4.786397232263232e-05, + "loss": 2.4417, + "step": 1121500 + }, + { + "epoch": 0.86, + "learning_rate": 4.7863019665137085e-05, + "loss": 2.4511, + "step": 1122000 + }, + { + "epoch": 0.86, + "learning_rate": 4.786206700764184e-05, + "loss": 2.462, + "step": 1122500 + }, + { + "epoch": 0.86, + "learning_rate": 4.7861116255461585e-05, + "loss": 2.4773, + "step": 1123000 + }, + { + "epoch": 0.86, + "learning_rate": 4.786016550328133e-05, + "loss": 2.4427, + "step": 1123500 + }, + { + "epoch": 0.86, + "learning_rate": 4.785921284578609e-05, + "loss": 2.4562, + "step": 1124000 + }, + { + "epoch": 0.86, + "learning_rate": 4.785826018829085e-05, + "loss": 2.4459, + "step": 1124500 + }, + { + "epoch": 0.86, + "learning_rate": 4.785730753079561e-05, + "loss": 2.4546, + "step": 1125000 + }, + { + "epoch": 0.86, + "learning_rate": 4.7856354873300366e-05, + "loss": 2.4492, + "step": 1125500 + }, + { + "epoch": 0.86, + "learning_rate": 4.7855402215805124e-05, + "loss": 2.4348, + "step": 1126000 + }, + { + "epoch": 0.86, + "learning_rate": 4.785444955830988e-05, + "loss": 2.4667, + "step": 1126500 + }, + { + "epoch": 0.86, + "learning_rate": 4.7853496900814634e-05, + "loss": 2.4562, + "step": 1127000 + }, + { + "epoch": 0.86, + "learning_rate": 4.78525442433194e-05, + "loss": 2.4696, + "step": 1127500 + }, + { + "epoch": 0.86, + "learning_rate": 4.785159158582415e-05, + "loss": 2.4712, + "step": 1128000 + }, + { + "epoch": 0.86, + "learning_rate": 4.78506408336439e-05, + "loss": 2.4574, + "step": 1128500 + }, + { + "epoch": 0.86, + "learning_rate": 4.784968817614866e-05, + "loss": 2.4461, + "step": 1129000 + }, + { + "epoch": 0.86, + "learning_rate": 4.7848735518653416e-05, + "loss": 2.4691, + "step": 1129500 + }, + { + "epoch": 0.86, + "learning_rate": 4.7847782861158174e-05, + "loss": 2.4558, + "step": 1130000 + }, + { + "epoch": 0.86, + "learning_rate": 4.784683020366293e-05, + "loss": 2.4493, + "step": 1130500 + }, + { + "epoch": 0.86, + "learning_rate": 4.784587754616769e-05, + "loss": 2.4758, + "step": 1131000 + }, + { + "epoch": 0.86, + "learning_rate": 4.784492679398744e-05, + "loss": 2.474, + "step": 1131500 + }, + { + "epoch": 0.86, + "learning_rate": 4.78439741364922e-05, + "loss": 2.4538, + "step": 1132000 + }, + { + "epoch": 0.86, + "learning_rate": 4.7843021478996955e-05, + "loss": 2.4499, + "step": 1132500 + }, + { + "epoch": 0.86, + "learning_rate": 4.7842068821501714e-05, + "loss": 2.4515, + "step": 1133000 + }, + { + "epoch": 0.86, + "learning_rate": 4.7841116164006465e-05, + "loss": 2.4718, + "step": 1133500 + }, + { + "epoch": 0.86, + "learning_rate": 4.7840165411826214e-05, + "loss": 2.4298, + "step": 1134000 + }, + { + "epoch": 0.86, + "learning_rate": 4.783921275433098e-05, + "loss": 2.47, + "step": 1134500 + }, + { + "epoch": 0.87, + "learning_rate": 4.783826009683573e-05, + "loss": 2.4324, + "step": 1135000 + }, + { + "epoch": 0.87, + "learning_rate": 4.783730743934049e-05, + "loss": 2.4586, + "step": 1135500 + }, + { + "epoch": 0.87, + "learning_rate": 4.783635478184525e-05, + "loss": 2.4645, + "step": 1136000 + }, + { + "epoch": 0.87, + "learning_rate": 4.7835402124350005e-05, + "loss": 2.4315, + "step": 1136500 + }, + { + "epoch": 0.87, + "learning_rate": 4.783444946685476e-05, + "loss": 2.4378, + "step": 1137000 + }, + { + "epoch": 0.87, + "learning_rate": 4.7833496809359515e-05, + "loss": 2.4169, + "step": 1137500 + }, + { + "epoch": 0.87, + "learning_rate": 4.783254415186428e-05, + "loss": 2.4285, + "step": 1138000 + }, + { + "epoch": 0.87, + "learning_rate": 4.783159530499902e-05, + "loss": 2.4878, + "step": 1138500 + }, + { + "epoch": 0.87, + "learning_rate": 4.783064264750377e-05, + "loss": 2.4743, + "step": 1139000 + }, + { + "epoch": 0.87, + "learning_rate": 4.782968999000853e-05, + "loss": 2.4721, + "step": 1139500 + }, + { + "epoch": 0.87, + "learning_rate": 4.782873733251329e-05, + "loss": 2.4267, + "step": 1140000 + }, + { + "epoch": 0.87, + "eval_accuracy": 0.5439521534049061, + "eval_loss": 2.385629415512085, + "eval_runtime": 9413.2002, + "eval_samples_per_second": 29.213, + "eval_steps_per_second": 7.303, + "step": 1140000 + }, + { + "epoch": 0.87, + "learning_rate": 4.7827784675018044e-05, + "loss": 2.4325, + "step": 1140500 + }, + { + "epoch": 0.87, + "learning_rate": 4.78268320175228e-05, + "loss": 2.4674, + "step": 1141000 + }, + { + "epoch": 0.87, + "learning_rate": 4.782587936002756e-05, + "loss": 2.4581, + "step": 1141500 + }, + { + "epoch": 0.87, + "learning_rate": 4.782492670253232e-05, + "loss": 2.4251, + "step": 1142000 + }, + { + "epoch": 0.87, + "learning_rate": 4.782397595035207e-05, + "loss": 2.4928, + "step": 1142500 + }, + { + "epoch": 0.87, + "learning_rate": 4.782302329285682e-05, + "loss": 2.4455, + "step": 1143000 + }, + { + "epoch": 0.87, + "learning_rate": 4.7822070635361584e-05, + "loss": 2.4517, + "step": 1143500 + }, + { + "epoch": 0.87, + "learning_rate": 4.7821117977866336e-05, + "loss": 2.4544, + "step": 1144000 + }, + { + "epoch": 0.87, + "learning_rate": 4.7820165320371094e-05, + "loss": 2.4457, + "step": 1144500 + }, + { + "epoch": 0.87, + "learning_rate": 4.781921456819084e-05, + "loss": 2.4503, + "step": 1145000 + }, + { + "epoch": 0.87, + "learning_rate": 4.78182638160106e-05, + "loss": 2.4293, + "step": 1145500 + }, + { + "epoch": 0.87, + "learning_rate": 4.781731115851535e-05, + "loss": 2.4274, + "step": 1146000 + }, + { + "epoch": 0.87, + "learning_rate": 4.781635850102011e-05, + "loss": 2.4641, + "step": 1146500 + }, + { + "epoch": 0.87, + "learning_rate": 4.7815407748839855e-05, + "loss": 2.4524, + "step": 1147000 + }, + { + "epoch": 0.87, + "learning_rate": 4.7814455091344614e-05, + "loss": 2.4499, + "step": 1147500 + }, + { + "epoch": 0.87, + "learning_rate": 4.781350243384937e-05, + "loss": 2.4756, + "step": 1148000 + }, + { + "epoch": 0.88, + "learning_rate": 4.7812549776354123e-05, + "loss": 2.4268, + "step": 1148500 + }, + { + "epoch": 0.88, + "learning_rate": 4.781159711885889e-05, + "loss": 2.4259, + "step": 1149000 + }, + { + "epoch": 0.88, + "learning_rate": 4.781064446136365e-05, + "loss": 2.4636, + "step": 1149500 + }, + { + "epoch": 0.88, + "learning_rate": 4.78096918038684e-05, + "loss": 2.4621, + "step": 1150000 + }, + { + "epoch": 0.88, + "learning_rate": 4.7808739146373163e-05, + "loss": 2.4458, + "step": 1150500 + }, + { + "epoch": 0.88, + "learning_rate": 4.7807786488877915e-05, + "loss": 2.4375, + "step": 1151000 + }, + { + "epoch": 0.88, + "learning_rate": 4.780683573669766e-05, + "loss": 2.4406, + "step": 1151500 + }, + { + "epoch": 0.88, + "learning_rate": 4.780588307920242e-05, + "loss": 2.4774, + "step": 1152000 + }, + { + "epoch": 0.88, + "learning_rate": 4.780493042170718e-05, + "loss": 2.4625, + "step": 1152500 + }, + { + "epoch": 0.88, + "learning_rate": 4.780397776421194e-05, + "loss": 2.4336, + "step": 1153000 + }, + { + "epoch": 0.88, + "learning_rate": 4.780302510671669e-05, + "loss": 2.4659, + "step": 1153500 + }, + { + "epoch": 0.88, + "learning_rate": 4.7802072449221455e-05, + "loss": 2.483, + "step": 1154000 + }, + { + "epoch": 0.88, + "learning_rate": 4.780111979172621e-05, + "loss": 2.4625, + "step": 1154500 + }, + { + "epoch": 0.88, + "learning_rate": 4.7800167134230964e-05, + "loss": 2.4835, + "step": 1155000 + }, + { + "epoch": 0.88, + "learning_rate": 4.779921638205071e-05, + "loss": 2.4456, + "step": 1155500 + }, + { + "epoch": 0.88, + "learning_rate": 4.779826372455548e-05, + "loss": 2.4641, + "step": 1156000 + }, + { + "epoch": 0.88, + "learning_rate": 4.779731106706023e-05, + "loss": 2.4305, + "step": 1156500 + }, + { + "epoch": 0.88, + "learning_rate": 4.779635840956499e-05, + "loss": 2.4385, + "step": 1157000 + }, + { + "epoch": 0.88, + "learning_rate": 4.7795405752069746e-05, + "loss": 2.4703, + "step": 1157500 + }, + { + "epoch": 0.88, + "learning_rate": 4.7794454999889494e-05, + "loss": 2.4794, + "step": 1158000 + }, + { + "epoch": 0.88, + "learning_rate": 4.779350234239425e-05, + "loss": 2.4139, + "step": 1158500 + }, + { + "epoch": 0.88, + "learning_rate": 4.7792549684899004e-05, + "loss": 2.4586, + "step": 1159000 + }, + { + "epoch": 0.88, + "learning_rate": 4.779159702740377e-05, + "loss": 2.4661, + "step": 1159500 + }, + { + "epoch": 0.88, + "learning_rate": 4.779064436990852e-05, + "loss": 2.4846, + "step": 1160000 + }, + { + "epoch": 0.88, + "learning_rate": 4.778969361772827e-05, + "loss": 2.4414, + "step": 1160500 + }, + { + "epoch": 0.88, + "learning_rate": 4.778874286554802e-05, + "loss": 2.4364, + "step": 1161000 + }, + { + "epoch": 0.89, + "learning_rate": 4.778779020805278e-05, + "loss": 2.4949, + "step": 1161500 + }, + { + "epoch": 0.89, + "learning_rate": 4.7786837550557534e-05, + "loss": 2.46, + "step": 1162000 + }, + { + "epoch": 0.89, + "learning_rate": 4.778588489306229e-05, + "loss": 2.465, + "step": 1162500 + }, + { + "epoch": 0.89, + "learning_rate": 4.778493223556705e-05, + "loss": 2.4527, + "step": 1163000 + }, + { + "epoch": 0.89, + "learning_rate": 4.77839814833868e-05, + "loss": 2.4326, + "step": 1163500 + }, + { + "epoch": 0.89, + "learning_rate": 4.778302882589156e-05, + "loss": 2.4477, + "step": 1164000 + }, + { + "epoch": 0.89, + "learning_rate": 4.778207616839631e-05, + "loss": 2.46, + "step": 1164500 + }, + { + "epoch": 0.89, + "learning_rate": 4.7781123510901073e-05, + "loss": 2.4465, + "step": 1165000 + }, + { + "epoch": 0.89, + "learning_rate": 4.778017085340583e-05, + "loss": 2.4404, + "step": 1165500 + }, + { + "epoch": 0.89, + "learning_rate": 4.777921819591058e-05, + "loss": 2.452, + "step": 1166000 + }, + { + "epoch": 0.89, + "learning_rate": 4.777826553841535e-05, + "loss": 2.4442, + "step": 1166500 + }, + { + "epoch": 0.89, + "learning_rate": 4.77773128809201e-05, + "loss": 2.4324, + "step": 1167000 + }, + { + "epoch": 0.89, + "learning_rate": 4.777636022342486e-05, + "loss": 2.4679, + "step": 1167500 + }, + { + "epoch": 0.89, + "learning_rate": 4.7775409471244606e-05, + "loss": 2.4462, + "step": 1168000 + }, + { + "epoch": 0.89, + "learning_rate": 4.7774456813749365e-05, + "loss": 2.4241, + "step": 1168500 + }, + { + "epoch": 0.89, + "learning_rate": 4.777350606156911e-05, + "loss": 2.4454, + "step": 1169000 + }, + { + "epoch": 0.89, + "learning_rate": 4.777255340407387e-05, + "loss": 2.4423, + "step": 1169500 + }, + { + "epoch": 0.89, + "learning_rate": 4.777160074657863e-05, + "loss": 2.3964, + "step": 1170000 + }, + { + "epoch": 0.89, + "eval_accuracy": 0.5446484994400751, + "eval_loss": 2.3814730644226074, + "eval_runtime": 9420.5632, + "eval_samples_per_second": 29.19, + "eval_steps_per_second": 7.298, + "step": 1170000 + }, + { + "epoch": 0.89, + "learning_rate": 4.777064808908339e-05, + "loss": 2.4406, + "step": 1170500 + }, + { + "epoch": 0.89, + "learning_rate": 4.776969543158814e-05, + "loss": 2.448, + "step": 1171000 + }, + { + "epoch": 0.89, + "learning_rate": 4.77687427740929e-05, + "loss": 2.4208, + "step": 1171500 + }, + { + "epoch": 0.89, + "learning_rate": 4.776779011659766e-05, + "loss": 2.4359, + "step": 1172000 + }, + { + "epoch": 0.89, + "learning_rate": 4.7766839364417404e-05, + "loss": 2.4596, + "step": 1172500 + }, + { + "epoch": 0.89, + "learning_rate": 4.776588670692216e-05, + "loss": 2.4564, + "step": 1173000 + }, + { + "epoch": 0.89, + "learning_rate": 4.776493404942692e-05, + "loss": 2.4353, + "step": 1173500 + }, + { + "epoch": 0.89, + "learning_rate": 4.776398139193168e-05, + "loss": 2.4617, + "step": 1174000 + }, + { + "epoch": 0.9, + "learning_rate": 4.776302873443644e-05, + "loss": 2.476, + "step": 1174500 + }, + { + "epoch": 0.9, + "learning_rate": 4.7762076076941196e-05, + "loss": 2.4436, + "step": 1175000 + }, + { + "epoch": 0.9, + "learning_rate": 4.7761123419445954e-05, + "loss": 2.4418, + "step": 1175500 + }, + { + "epoch": 0.9, + "learning_rate": 4.776017076195071e-05, + "loss": 2.4541, + "step": 1176000 + }, + { + "epoch": 0.9, + "learning_rate": 4.7759220009770454e-05, + "loss": 2.4424, + "step": 1176500 + }, + { + "epoch": 0.9, + "learning_rate": 4.775826735227522e-05, + "loss": 2.5097, + "step": 1177000 + }, + { + "epoch": 0.9, + "learning_rate": 4.775731469477998e-05, + "loss": 2.4514, + "step": 1177500 + }, + { + "epoch": 0.9, + "learning_rate": 4.775636394259972e-05, + "loss": 2.457, + "step": 1178000 + }, + { + "epoch": 0.9, + "learning_rate": 4.775541128510448e-05, + "loss": 2.4624, + "step": 1178500 + }, + { + "epoch": 0.9, + "learning_rate": 4.7754458627609235e-05, + "loss": 2.4465, + "step": 1179000 + }, + { + "epoch": 0.9, + "learning_rate": 4.7753505970113993e-05, + "loss": 2.4631, + "step": 1179500 + }, + { + "epoch": 0.9, + "learning_rate": 4.775255521793374e-05, + "loss": 2.454, + "step": 1180000 + }, + { + "epoch": 0.9, + "learning_rate": 4.775160256043849e-05, + "loss": 2.4103, + "step": 1180500 + }, + { + "epoch": 0.9, + "learning_rate": 4.775064990294326e-05, + "loss": 2.4276, + "step": 1181000 + }, + { + "epoch": 0.9, + "learning_rate": 4.7749697245448017e-05, + "loss": 2.459, + "step": 1181500 + }, + { + "epoch": 0.9, + "learning_rate": 4.774874458795277e-05, + "loss": 2.4622, + "step": 1182000 + }, + { + "epoch": 0.9, + "learning_rate": 4.774779193045753e-05, + "loss": 2.4372, + "step": 1182500 + }, + { + "epoch": 0.9, + "learning_rate": 4.7746839272962285e-05, + "loss": 2.4385, + "step": 1183000 + }, + { + "epoch": 0.9, + "learning_rate": 4.774588661546704e-05, + "loss": 2.4579, + "step": 1183500 + }, + { + "epoch": 0.9, + "learning_rate": 4.774493586328679e-05, + "loss": 2.405, + "step": 1184000 + }, + { + "epoch": 0.9, + "learning_rate": 4.774398511110654e-05, + "loss": 2.4625, + "step": 1184500 + }, + { + "epoch": 0.9, + "learning_rate": 4.774303435892629e-05, + "loss": 2.4594, + "step": 1185000 + }, + { + "epoch": 0.9, + "learning_rate": 4.7742081701431046e-05, + "loss": 2.4533, + "step": 1185500 + }, + { + "epoch": 0.9, + "learning_rate": 4.7741129043935804e-05, + "loss": 2.4499, + "step": 1186000 + }, + { + "epoch": 0.9, + "learning_rate": 4.774017638644056e-05, + "loss": 2.435, + "step": 1186500 + }, + { + "epoch": 0.9, + "learning_rate": 4.773922372894532e-05, + "loss": 2.4371, + "step": 1187000 + }, + { + "epoch": 0.91, + "learning_rate": 4.773827107145007e-05, + "loss": 2.4572, + "step": 1187500 + }, + { + "epoch": 0.91, + "learning_rate": 4.773731841395484e-05, + "loss": 2.4763, + "step": 1188000 + }, + { + "epoch": 0.91, + "learning_rate": 4.773636575645959e-05, + "loss": 2.4357, + "step": 1188500 + }, + { + "epoch": 0.91, + "learning_rate": 4.773541309896435e-05, + "loss": 2.4211, + "step": 1189000 + }, + { + "epoch": 0.91, + "learning_rate": 4.7734460441469106e-05, + "loss": 2.4481, + "step": 1189500 + }, + { + "epoch": 0.91, + "learning_rate": 4.7733511594603844e-05, + "loss": 2.4566, + "step": 1190000 + }, + { + "epoch": 0.91, + "learning_rate": 4.77325589371086e-05, + "loss": 2.4505, + "step": 1190500 + }, + { + "epoch": 0.91, + "learning_rate": 4.773160627961336e-05, + "loss": 2.4457, + "step": 1191000 + }, + { + "epoch": 0.91, + "learning_rate": 4.773065362211812e-05, + "loss": 2.4757, + "step": 1191500 + }, + { + "epoch": 0.91, + "learning_rate": 4.772970096462288e-05, + "loss": 2.4399, + "step": 1192000 + }, + { + "epoch": 0.91, + "learning_rate": 4.7728748307127635e-05, + "loss": 2.4767, + "step": 1192500 + }, + { + "epoch": 0.91, + "learning_rate": 4.772779564963239e-05, + "loss": 2.4568, + "step": 1193000 + }, + { + "epoch": 0.91, + "learning_rate": 4.772684299213715e-05, + "loss": 2.4412, + "step": 1193500 + }, + { + "epoch": 0.91, + "learning_rate": 4.77258903346419e-05, + "loss": 2.4723, + "step": 1194000 + }, + { + "epoch": 0.91, + "learning_rate": 4.772493767714666e-05, + "loss": 2.4569, + "step": 1194500 + }, + { + "epoch": 0.91, + "learning_rate": 4.772398501965143e-05, + "loss": 2.4732, + "step": 1195000 + }, + { + "epoch": 0.91, + "learning_rate": 4.772303236215618e-05, + "loss": 2.4534, + "step": 1195500 + }, + { + "epoch": 0.91, + "learning_rate": 4.7722079704660937e-05, + "loss": 2.453, + "step": 1196000 + }, + { + "epoch": 0.91, + "learning_rate": 4.7721128952480685e-05, + "loss": 2.4507, + "step": 1196500 + }, + { + "epoch": 0.91, + "learning_rate": 4.772017629498544e-05, + "loss": 2.4421, + "step": 1197000 + }, + { + "epoch": 0.91, + "learning_rate": 4.77192236374902e-05, + "loss": 2.4792, + "step": 1197500 + }, + { + "epoch": 0.91, + "learning_rate": 4.771827288530994e-05, + "loss": 2.433, + "step": 1198000 + }, + { + "epoch": 0.91, + "learning_rate": 4.771732022781471e-05, + "loss": 2.4545, + "step": 1198500 + }, + { + "epoch": 0.91, + "learning_rate": 4.7716367570319466e-05, + "loss": 2.4525, + "step": 1199000 + }, + { + "epoch": 0.91, + "learning_rate": 4.771541681813921e-05, + "loss": 2.4323, + "step": 1199500 + }, + { + "epoch": 0.91, + "learning_rate": 4.7714464160643966e-05, + "loss": 2.445, + "step": 1200000 + }, + { + "epoch": 0.91, + "eval_accuracy": 0.5452781652350712, + "eval_loss": 2.3779778480529785, + "eval_runtime": 9418.4291, + "eval_samples_per_second": 29.197, + "eval_steps_per_second": 7.299, + "step": 1200000 + }, + { + "epoch": 0.91, + "learning_rate": 4.771351150314873e-05, + "loss": 2.4534, + "step": 1200500 + }, + { + "epoch": 0.92, + "learning_rate": 4.771255884565348e-05, + "loss": 2.4407, + "step": 1201000 + }, + { + "epoch": 0.92, + "learning_rate": 4.771160618815824e-05, + "loss": 2.4623, + "step": 1201500 + }, + { + "epoch": 0.92, + "learning_rate": 4.7710653530663e-05, + "loss": 2.4527, + "step": 1202000 + }, + { + "epoch": 0.92, + "learning_rate": 4.770970087316776e-05, + "loss": 2.4325, + "step": 1202500 + }, + { + "epoch": 0.92, + "learning_rate": 4.7708748215672516e-05, + "loss": 2.4457, + "step": 1203000 + }, + { + "epoch": 0.92, + "learning_rate": 4.770779555817727e-05, + "loss": 2.4407, + "step": 1203500 + }, + { + "epoch": 0.92, + "learning_rate": 4.770684290068203e-05, + "loss": 2.4487, + "step": 1204000 + }, + { + "epoch": 0.92, + "learning_rate": 4.7705890243186784e-05, + "loss": 2.415, + "step": 1204500 + }, + { + "epoch": 0.92, + "learning_rate": 4.770493758569154e-05, + "loss": 2.4298, + "step": 1205000 + }, + { + "epoch": 0.92, + "learning_rate": 4.77039849281963e-05, + "loss": 2.4653, + "step": 1205500 + }, + { + "epoch": 0.92, + "learning_rate": 4.770303417601605e-05, + "loss": 2.4568, + "step": 1206000 + }, + { + "epoch": 0.92, + "learning_rate": 4.770208151852081e-05, + "loss": 2.4529, + "step": 1206500 + }, + { + "epoch": 0.92, + "learning_rate": 4.7701130766340555e-05, + "loss": 2.4402, + "step": 1207000 + }, + { + "epoch": 0.92, + "learning_rate": 4.7700178108845314e-05, + "loss": 2.4254, + "step": 1207500 + }, + { + "epoch": 0.92, + "learning_rate": 4.769922545135007e-05, + "loss": 2.4466, + "step": 1208000 + }, + { + "epoch": 0.92, + "learning_rate": 4.769827279385482e-05, + "loss": 2.438, + "step": 1208500 + }, + { + "epoch": 0.92, + "learning_rate": 4.769732013635959e-05, + "loss": 2.455, + "step": 1209000 + }, + { + "epoch": 0.92, + "learning_rate": 4.769636938417934e-05, + "loss": 2.4445, + "step": 1209500 + }, + { + "epoch": 0.92, + "learning_rate": 4.769541672668409e-05, + "loss": 2.4321, + "step": 1210000 + }, + { + "epoch": 0.92, + "learning_rate": 4.7694464069188846e-05, + "loss": 2.4361, + "step": 1210500 + }, + { + "epoch": 0.92, + "learning_rate": 4.769351141169361e-05, + "loss": 2.4632, + "step": 1211000 + }, + { + "epoch": 0.92, + "learning_rate": 4.769255875419836e-05, + "loss": 2.4711, + "step": 1211500 + }, + { + "epoch": 0.92, + "learning_rate": 4.769160800201811e-05, + "loss": 2.4287, + "step": 1212000 + }, + { + "epoch": 0.92, + "learning_rate": 4.769065534452287e-05, + "loss": 2.4288, + "step": 1212500 + }, + { + "epoch": 0.92, + "learning_rate": 4.768970268702763e-05, + "loss": 2.4304, + "step": 1213000 + }, + { + "epoch": 0.92, + "learning_rate": 4.7688750029532386e-05, + "loss": 2.4764, + "step": 1213500 + }, + { + "epoch": 0.93, + "learning_rate": 4.768779737203714e-05, + "loss": 2.4436, + "step": 1214000 + }, + { + "epoch": 0.93, + "learning_rate": 4.76868447145419e-05, + "loss": 2.4726, + "step": 1214500 + }, + { + "epoch": 0.93, + "learning_rate": 4.7685892057046654e-05, + "loss": 2.4391, + "step": 1215000 + }, + { + "epoch": 0.93, + "learning_rate": 4.76849413048664e-05, + "loss": 2.4477, + "step": 1215500 + }, + { + "epoch": 0.93, + "learning_rate": 4.768398864737116e-05, + "loss": 2.4675, + "step": 1216000 + }, + { + "epoch": 0.93, + "learning_rate": 4.768303598987592e-05, + "loss": 2.4406, + "step": 1216500 + }, + { + "epoch": 0.93, + "learning_rate": 4.768208523769567e-05, + "loss": 2.4212, + "step": 1217000 + }, + { + "epoch": 0.93, + "learning_rate": 4.7681132580200426e-05, + "loss": 2.4576, + "step": 1217500 + }, + { + "epoch": 0.93, + "learning_rate": 4.7680179922705184e-05, + "loss": 2.4724, + "step": 1218000 + }, + { + "epoch": 0.93, + "learning_rate": 4.767922726520994e-05, + "loss": 2.4229, + "step": 1218500 + }, + { + "epoch": 0.93, + "learning_rate": 4.76782746077147e-05, + "loss": 2.448, + "step": 1219000 + }, + { + "epoch": 0.93, + "learning_rate": 4.767732195021946e-05, + "loss": 2.4539, + "step": 1219500 + }, + { + "epoch": 0.93, + "learning_rate": 4.767636929272422e-05, + "loss": 2.4208, + "step": 1220000 + }, + { + "epoch": 0.93, + "learning_rate": 4.767541663522897e-05, + "loss": 2.4288, + "step": 1220500 + }, + { + "epoch": 0.93, + "learning_rate": 4.767446397773373e-05, + "loss": 2.4625, + "step": 1221000 + }, + { + "epoch": 0.93, + "learning_rate": 4.7673511320238485e-05, + "loss": 2.4565, + "step": 1221500 + }, + { + "epoch": 0.93, + "learning_rate": 4.7672558662743244e-05, + "loss": 2.454, + "step": 1222000 + }, + { + "epoch": 0.93, + "learning_rate": 4.7671606005248e-05, + "loss": 2.4292, + "step": 1222500 + }, + { + "epoch": 0.93, + "learning_rate": 4.767065334775276e-05, + "loss": 2.4394, + "step": 1223000 + }, + { + "epoch": 0.93, + "learning_rate": 4.766970259557251e-05, + "loss": 2.4593, + "step": 1223500 + }, + { + "epoch": 0.93, + "learning_rate": 4.766874993807727e-05, + "loss": 2.4652, + "step": 1224000 + }, + { + "epoch": 0.93, + "learning_rate": 4.7667797280582025e-05, + "loss": 2.4719, + "step": 1224500 + }, + { + "epoch": 0.93, + "learning_rate": 4.766684462308678e-05, + "loss": 2.4309, + "step": 1225000 + }, + { + "epoch": 0.93, + "learning_rate": 4.766589387090653e-05, + "loss": 2.4518, + "step": 1225500 + }, + { + "epoch": 0.93, + "learning_rate": 4.766494121341128e-05, + "loss": 2.4464, + "step": 1226000 + }, + { + "epoch": 0.93, + "learning_rate": 4.766398855591605e-05, + "loss": 2.4406, + "step": 1226500 + }, + { + "epoch": 0.94, + "learning_rate": 4.76630358984208e-05, + "loss": 2.4466, + "step": 1227000 + }, + { + "epoch": 0.94, + "learning_rate": 4.766208514624055e-05, + "loss": 2.4317, + "step": 1227500 + }, + { + "epoch": 0.94, + "learning_rate": 4.7661132488745306e-05, + "loss": 2.4657, + "step": 1228000 + }, + { + "epoch": 0.94, + "learning_rate": 4.7660179831250064e-05, + "loss": 2.4223, + "step": 1228500 + }, + { + "epoch": 0.94, + "learning_rate": 4.765922717375482e-05, + "loss": 2.4664, + "step": 1229000 + }, + { + "epoch": 0.94, + "learning_rate": 4.7658274516259574e-05, + "loss": 2.4872, + "step": 1229500 + }, + { + "epoch": 0.94, + "learning_rate": 4.765732376407932e-05, + "loss": 2.426, + "step": 1230000 + }, + { + "epoch": 0.94, + "eval_accuracy": 0.545539682023057, + "eval_loss": 2.3749537467956543, + "eval_runtime": 9421.9591, + "eval_samples_per_second": 29.186, + "eval_steps_per_second": 7.296, + "step": 1230000 + }, + { + "epoch": 0.94, + "learning_rate": 4.765637110658409e-05, + "loss": 2.4645, + "step": 1230500 + }, + { + "epoch": 0.94, + "learning_rate": 4.765541844908884e-05, + "loss": 2.4384, + "step": 1231000 + }, + { + "epoch": 0.94, + "learning_rate": 4.76544657915936e-05, + "loss": 2.4337, + "step": 1231500 + }, + { + "epoch": 0.94, + "learning_rate": 4.765351313409836e-05, + "loss": 2.4517, + "step": 1232000 + }, + { + "epoch": 0.94, + "learning_rate": 4.7652560476603114e-05, + "loss": 2.44, + "step": 1232500 + }, + { + "epoch": 0.94, + "learning_rate": 4.765160781910787e-05, + "loss": 2.4386, + "step": 1233000 + }, + { + "epoch": 0.94, + "learning_rate": 4.765065706692762e-05, + "loss": 2.4378, + "step": 1233500 + }, + { + "epoch": 0.94, + "learning_rate": 4.764970440943238e-05, + "loss": 2.4887, + "step": 1234000 + }, + { + "epoch": 0.94, + "learning_rate": 4.764875175193714e-05, + "loss": 2.4659, + "step": 1234500 + }, + { + "epoch": 0.94, + "learning_rate": 4.764779909444189e-05, + "loss": 2.424, + "step": 1235000 + }, + { + "epoch": 0.94, + "learning_rate": 4.7646848342261644e-05, + "loss": 2.4355, + "step": 1235500 + }, + { + "epoch": 0.94, + "learning_rate": 4.76458956847664e-05, + "loss": 2.4244, + "step": 1236000 + }, + { + "epoch": 0.94, + "learning_rate": 4.7644943027271154e-05, + "loss": 2.3749, + "step": 1236500 + }, + { + "epoch": 0.94, + "learning_rate": 4.764399036977591e-05, + "loss": 2.4463, + "step": 1237000 + }, + { + "epoch": 0.94, + "learning_rate": 4.764303771228067e-05, + "loss": 2.4514, + "step": 1237500 + }, + { + "epoch": 0.94, + "learning_rate": 4.764208505478543e-05, + "loss": 2.4449, + "step": 1238000 + }, + { + "epoch": 0.94, + "learning_rate": 4.764113430260518e-05, + "loss": 2.4579, + "step": 1238500 + }, + { + "epoch": 0.94, + "learning_rate": 4.7640181645109935e-05, + "loss": 2.4395, + "step": 1239000 + }, + { + "epoch": 0.94, + "learning_rate": 4.763922898761469e-05, + "loss": 2.4286, + "step": 1239500 + }, + { + "epoch": 0.95, + "learning_rate": 4.763827633011945e-05, + "loss": 2.4426, + "step": 1240000 + }, + { + "epoch": 0.95, + "learning_rate": 4.763732367262421e-05, + "loss": 2.4494, + "step": 1240500 + }, + { + "epoch": 0.95, + "learning_rate": 4.763637101512897e-05, + "loss": 2.4401, + "step": 1241000 + }, + { + "epoch": 0.95, + "learning_rate": 4.763541835763372e-05, + "loss": 2.4457, + "step": 1241500 + }, + { + "epoch": 0.95, + "learning_rate": 4.763446570013848e-05, + "loss": 2.4725, + "step": 1242000 + }, + { + "epoch": 0.95, + "learning_rate": 4.763351494795823e-05, + "loss": 2.4496, + "step": 1242500 + }, + { + "epoch": 0.95, + "learning_rate": 4.763256419577798e-05, + "loss": 2.4385, + "step": 1243000 + }, + { + "epoch": 0.95, + "learning_rate": 4.763161153828273e-05, + "loss": 2.4381, + "step": 1243500 + }, + { + "epoch": 0.95, + "learning_rate": 4.763065888078749e-05, + "loss": 2.4714, + "step": 1244000 + }, + { + "epoch": 0.95, + "learning_rate": 4.762970622329225e-05, + "loss": 2.4348, + "step": 1244500 + }, + { + "epoch": 0.95, + "learning_rate": 4.762875356579701e-05, + "loss": 2.4444, + "step": 1245000 + }, + { + "epoch": 0.95, + "learning_rate": 4.7627804718931746e-05, + "loss": 2.4591, + "step": 1245500 + }, + { + "epoch": 0.95, + "learning_rate": 4.7626852061436504e-05, + "loss": 2.4604, + "step": 1246000 + }, + { + "epoch": 0.95, + "learning_rate": 4.762589940394126e-05, + "loss": 2.4265, + "step": 1246500 + }, + { + "epoch": 0.95, + "learning_rate": 4.762494674644602e-05, + "loss": 2.4375, + "step": 1247000 + }, + { + "epoch": 0.95, + "learning_rate": 4.762399408895077e-05, + "loss": 2.4498, + "step": 1247500 + }, + { + "epoch": 0.95, + "learning_rate": 4.762304143145554e-05, + "loss": 2.4491, + "step": 1248000 + }, + { + "epoch": 0.95, + "learning_rate": 4.762208877396029e-05, + "loss": 2.4241, + "step": 1248500 + }, + { + "epoch": 0.95, + "learning_rate": 4.762113611646505e-05, + "loss": 2.4576, + "step": 1249000 + }, + { + "epoch": 0.95, + "learning_rate": 4.7620183458969805e-05, + "loss": 2.4299, + "step": 1249500 + }, + { + "epoch": 0.95, + "learning_rate": 4.7619230801474564e-05, + "loss": 2.4504, + "step": 1250000 + }, + { + "epoch": 0.95, + "learning_rate": 4.761827814397932e-05, + "loss": 2.477, + "step": 1250500 + }, + { + "epoch": 0.95, + "learning_rate": 4.7617325486484074e-05, + "loss": 2.4614, + "step": 1251000 + }, + { + "epoch": 0.95, + "learning_rate": 4.761637282898884e-05, + "loss": 2.4511, + "step": 1251500 + }, + { + "epoch": 0.95, + "learning_rate": 4.761542207680859e-05, + "loss": 2.4488, + "step": 1252000 + }, + { + "epoch": 0.95, + "learning_rate": 4.7614471324628335e-05, + "loss": 2.4652, + "step": 1252500 + }, + { + "epoch": 0.95, + "learning_rate": 4.761351866713309e-05, + "loss": 2.445, + "step": 1253000 + }, + { + "epoch": 0.96, + "learning_rate": 4.761256600963785e-05, + "loss": 2.45, + "step": 1253500 + }, + { + "epoch": 0.96, + "learning_rate": 4.76116133521426e-05, + "loss": 2.4297, + "step": 1254000 + }, + { + "epoch": 0.96, + "learning_rate": 4.761066069464736e-05, + "loss": 2.4438, + "step": 1254500 + }, + { + "epoch": 0.96, + "learning_rate": 4.760970994246711e-05, + "loss": 2.4604, + "step": 1255000 + }, + { + "epoch": 0.96, + "learning_rate": 4.760875728497187e-05, + "loss": 2.4621, + "step": 1255500 + }, + { + "epoch": 0.96, + "learning_rate": 4.7607804627476626e-05, + "loss": 2.4478, + "step": 1256000 + }, + { + "epoch": 0.96, + "learning_rate": 4.760685196998138e-05, + "loss": 2.4338, + "step": 1256500 + }, + { + "epoch": 0.96, + "learning_rate": 4.760589931248614e-05, + "loss": 2.4517, + "step": 1257000 + }, + { + "epoch": 0.96, + "learning_rate": 4.760494856030589e-05, + "loss": 2.4592, + "step": 1257500 + }, + { + "epoch": 0.96, + "learning_rate": 4.760399590281064e-05, + "loss": 2.4118, + "step": 1258000 + }, + { + "epoch": 0.96, + "learning_rate": 4.76030432453154e-05, + "loss": 2.4349, + "step": 1258500 + }, + { + "epoch": 0.96, + "learning_rate": 4.7602090587820166e-05, + "loss": 2.443, + "step": 1259000 + }, + { + "epoch": 0.96, + "learning_rate": 4.760113793032492e-05, + "loss": 2.4299, + "step": 1259500 + }, + { + "epoch": 0.96, + "learning_rate": 4.7600185272829676e-05, + "loss": 2.4209, + "step": 1260000 + }, + { + "epoch": 0.96, + "eval_accuracy": 0.5460106632076834, + "eval_loss": 2.3717141151428223, + "eval_runtime": 9429.812, + "eval_samples_per_second": 29.162, + "eval_steps_per_second": 7.29, + "step": 1260000 + }, + { + "epoch": 0.96, + "learning_rate": 4.7599232615334434e-05, + "loss": 2.4372, + "step": 1260500 + }, + { + "epoch": 0.96, + "learning_rate": 4.759827995783919e-05, + "loss": 2.4361, + "step": 1261000 + }, + { + "epoch": 0.96, + "learning_rate": 4.759732730034395e-05, + "loss": 2.4553, + "step": 1261500 + }, + { + "epoch": 0.96, + "learning_rate": 4.75963765481637e-05, + "loss": 2.4358, + "step": 1262000 + }, + { + "epoch": 0.96, + "learning_rate": 4.759542389066846e-05, + "loss": 2.4493, + "step": 1262500 + }, + { + "epoch": 0.96, + "learning_rate": 4.7594473138488206e-05, + "loss": 2.4218, + "step": 1263000 + }, + { + "epoch": 0.96, + "learning_rate": 4.759352048099296e-05, + "loss": 2.4555, + "step": 1263500 + }, + { + "epoch": 0.96, + "learning_rate": 4.759256782349772e-05, + "loss": 2.462, + "step": 1264000 + }, + { + "epoch": 0.96, + "learning_rate": 4.7591615166002474e-05, + "loss": 2.4469, + "step": 1264500 + }, + { + "epoch": 0.96, + "learning_rate": 4.759066250850723e-05, + "loss": 2.453, + "step": 1265000 + }, + { + "epoch": 0.96, + "learning_rate": 4.758970985101199e-05, + "loss": 2.4316, + "step": 1265500 + }, + { + "epoch": 0.96, + "learning_rate": 4.758875719351675e-05, + "loss": 2.4283, + "step": 1266000 + }, + { + "epoch": 0.97, + "learning_rate": 4.75878064413365e-05, + "loss": 2.4142, + "step": 1266500 + }, + { + "epoch": 0.97, + "learning_rate": 4.7586853783841255e-05, + "loss": 2.4259, + "step": 1267000 + }, + { + "epoch": 0.97, + "learning_rate": 4.7585901126346013e-05, + "loss": 2.4472, + "step": 1267500 + }, + { + "epoch": 0.97, + "learning_rate": 4.758494846885077e-05, + "loss": 2.4415, + "step": 1268000 + }, + { + "epoch": 0.97, + "learning_rate": 4.758399771667052e-05, + "loss": 2.4481, + "step": 1268500 + }, + { + "epoch": 0.97, + "learning_rate": 4.758304505917527e-05, + "loss": 2.4556, + "step": 1269000 + }, + { + "epoch": 0.97, + "learning_rate": 4.7582092401680037e-05, + "loss": 2.4314, + "step": 1269500 + }, + { + "epoch": 0.97, + "learning_rate": 4.758113974418479e-05, + "loss": 2.4641, + "step": 1270000 + }, + { + "epoch": 0.97, + "learning_rate": 4.7580187086689546e-05, + "loss": 2.4439, + "step": 1270500 + }, + { + "epoch": 0.97, + "learning_rate": 4.757923442919431e-05, + "loss": 2.4514, + "step": 1271000 + }, + { + "epoch": 0.97, + "learning_rate": 4.757828177169906e-05, + "loss": 2.4243, + "step": 1271500 + }, + { + "epoch": 0.97, + "learning_rate": 4.757732911420382e-05, + "loss": 2.4347, + "step": 1272000 + }, + { + "epoch": 0.97, + "learning_rate": 4.757637836202357e-05, + "loss": 2.448, + "step": 1272500 + }, + { + "epoch": 0.97, + "learning_rate": 4.757542570452833e-05, + "loss": 2.4335, + "step": 1273000 + }, + { + "epoch": 0.97, + "learning_rate": 4.7574474952348076e-05, + "loss": 2.4376, + "step": 1273500 + }, + { + "epoch": 0.97, + "learning_rate": 4.757352229485283e-05, + "loss": 2.4371, + "step": 1274000 + }, + { + "epoch": 0.97, + "learning_rate": 4.7572569637357586e-05, + "loss": 2.4221, + "step": 1274500 + }, + { + "epoch": 0.97, + "learning_rate": 4.757161697986235e-05, + "loss": 2.429, + "step": 1275000 + }, + { + "epoch": 0.97, + "learning_rate": 4.75706643223671e-05, + "loss": 2.4395, + "step": 1275500 + }, + { + "epoch": 0.97, + "learning_rate": 4.756971166487186e-05, + "loss": 2.4272, + "step": 1276000 + }, + { + "epoch": 0.97, + "learning_rate": 4.756876091269161e-05, + "loss": 2.4779, + "step": 1276500 + }, + { + "epoch": 0.97, + "learning_rate": 4.756780825519637e-05, + "loss": 2.4485, + "step": 1277000 + }, + { + "epoch": 0.97, + "learning_rate": 4.7566855597701126e-05, + "loss": 2.4643, + "step": 1277500 + }, + { + "epoch": 0.97, + "learning_rate": 4.7565902940205884e-05, + "loss": 2.424, + "step": 1278000 + }, + { + "epoch": 0.97, + "learning_rate": 4.756495028271064e-05, + "loss": 2.4251, + "step": 1278500 + }, + { + "epoch": 0.97, + "learning_rate": 4.756399953053039e-05, + "loss": 2.4298, + "step": 1279000 + }, + { + "epoch": 0.98, + "learning_rate": 4.756304687303514e-05, + "loss": 2.4454, + "step": 1279500 + }, + { + "epoch": 0.98, + "learning_rate": 4.756209421553991e-05, + "loss": 2.4731, + "step": 1280000 + }, + { + "epoch": 0.98, + "learning_rate": 4.7561141558044665e-05, + "loss": 2.3861, + "step": 1280500 + }, + { + "epoch": 0.98, + "learning_rate": 4.756018890054942e-05, + "loss": 2.4305, + "step": 1281000 + }, + { + "epoch": 0.98, + "learning_rate": 4.7559236243054175e-05, + "loss": 2.4326, + "step": 1281500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7558283585558933e-05, + "loss": 2.4333, + "step": 1282000 + }, + { + "epoch": 0.98, + "learning_rate": 4.755733092806369e-05, + "loss": 2.4663, + "step": 1282500 + }, + { + "epoch": 0.98, + "learning_rate": 4.755637827056845e-05, + "loss": 2.4284, + "step": 1283000 + }, + { + "epoch": 0.98, + "learning_rate": 4.75554275183882e-05, + "loss": 2.4206, + "step": 1283500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7554474860892957e-05, + "loss": 2.4531, + "step": 1284000 + }, + { + "epoch": 0.98, + "learning_rate": 4.755352220339771e-05, + "loss": 2.4535, + "step": 1284500 + }, + { + "epoch": 0.98, + "learning_rate": 4.755256954590247e-05, + "loss": 2.4424, + "step": 1285000 + }, + { + "epoch": 0.98, + "learning_rate": 4.755161688840723e-05, + "loss": 2.4311, + "step": 1285500 + }, + { + "epoch": 0.98, + "learning_rate": 4.755066423091198e-05, + "loss": 2.443, + "step": 1286000 + }, + { + "epoch": 0.98, + "learning_rate": 4.754971157341674e-05, + "loss": 2.4266, + "step": 1286500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7548760821236496e-05, + "loss": 2.447, + "step": 1287000 + }, + { + "epoch": 0.98, + "learning_rate": 4.754780816374125e-05, + "loss": 2.4502, + "step": 1287500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7546855506246006e-05, + "loss": 2.4308, + "step": 1288000 + }, + { + "epoch": 0.98, + "learning_rate": 4.7545902848750764e-05, + "loss": 2.438, + "step": 1288500 + }, + { + "epoch": 0.98, + "learning_rate": 4.754495019125552e-05, + "loss": 2.4328, + "step": 1289000 + }, + { + "epoch": 0.98, + "learning_rate": 4.754399943907527e-05, + "loss": 2.4774, + "step": 1289500 + }, + { + "epoch": 0.98, + "learning_rate": 4.754304868689502e-05, + "loss": 2.4336, + "step": 1290000 + }, + { + "epoch": 0.98, + "eval_accuracy": 0.5466836224585419, + "eval_loss": 2.367841958999634, + "eval_runtime": 9409.8624, + "eval_samples_per_second": 29.223, + "eval_steps_per_second": 7.306, + "step": 1290000 + }, + { + "epoch": 0.98, + "learning_rate": 4.754209602939978e-05, + "loss": 2.4576, + "step": 1290500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7541143371904536e-05, + "loss": 2.4288, + "step": 1291000 + }, + { + "epoch": 0.98, + "learning_rate": 4.754019071440929e-05, + "loss": 2.4196, + "step": 1291500 + }, + { + "epoch": 0.98, + "learning_rate": 4.7539238056914046e-05, + "loss": 2.4701, + "step": 1292000 + }, + { + "epoch": 0.99, + "learning_rate": 4.7538285399418804e-05, + "loss": 2.3882, + "step": 1292500 + }, + { + "epoch": 0.99, + "learning_rate": 4.753733274192356e-05, + "loss": 2.4307, + "step": 1293000 + }, + { + "epoch": 0.99, + "learning_rate": 4.753638008442832e-05, + "loss": 2.4462, + "step": 1293500 + }, + { + "epoch": 0.99, + "learning_rate": 4.753542933224807e-05, + "loss": 2.4597, + "step": 1294000 + }, + { + "epoch": 0.99, + "learning_rate": 4.753447667475283e-05, + "loss": 2.4105, + "step": 1294500 + }, + { + "epoch": 0.99, + "learning_rate": 4.7533524017257585e-05, + "loss": 2.4247, + "step": 1295000 + }, + { + "epoch": 0.99, + "learning_rate": 4.753257135976234e-05, + "loss": 2.4193, + "step": 1295500 + }, + { + "epoch": 0.99, + "learning_rate": 4.75316187022671e-05, + "loss": 2.4141, + "step": 1296000 + }, + { + "epoch": 0.99, + "learning_rate": 4.753066604477185e-05, + "loss": 2.4735, + "step": 1296500 + }, + { + "epoch": 0.99, + "learning_rate": 4.752971338727661e-05, + "loss": 2.4149, + "step": 1297000 + }, + { + "epoch": 0.99, + "learning_rate": 4.752876072978137e-05, + "loss": 2.4019, + "step": 1297500 + }, + { + "epoch": 0.99, + "learning_rate": 4.752780997760112e-05, + "loss": 2.4309, + "step": 1298000 + }, + { + "epoch": 0.99, + "learning_rate": 4.7526857320105877e-05, + "loss": 2.4284, + "step": 1298500 + }, + { + "epoch": 0.99, + "learning_rate": 4.7525904662610635e-05, + "loss": 2.4322, + "step": 1299000 + }, + { + "epoch": 0.99, + "learning_rate": 4.752495200511539e-05, + "loss": 2.4123, + "step": 1299500 + }, + { + "epoch": 0.99, + "learning_rate": 4.752400125293514e-05, + "loss": 2.4198, + "step": 1300000 + }, + { + "epoch": 0.99, + "learning_rate": 4.752304859543989e-05, + "loss": 2.4349, + "step": 1300500 + }, + { + "epoch": 0.99, + "learning_rate": 4.752209593794466e-05, + "loss": 2.4353, + "step": 1301000 + }, + { + "epoch": 0.99, + "learning_rate": 4.7521143280449416e-05, + "loss": 2.44, + "step": 1301500 + }, + { + "epoch": 0.99, + "learning_rate": 4.752019252826916e-05, + "loss": 2.4681, + "step": 1302000 + }, + { + "epoch": 0.99, + "learning_rate": 4.7519241776088906e-05, + "loss": 2.4758, + "step": 1302500 + }, + { + "epoch": 0.99, + "learning_rate": 4.7518289118593664e-05, + "loss": 2.4167, + "step": 1303000 + }, + { + "epoch": 0.99, + "learning_rate": 4.751733646109842e-05, + "loss": 2.43, + "step": 1303500 + }, + { + "epoch": 0.99, + "learning_rate": 4.751638380360318e-05, + "loss": 2.4617, + "step": 1304000 + }, + { + "epoch": 0.99, + "learning_rate": 4.751543114610794e-05, + "loss": 2.38, + "step": 1304500 + }, + { + "epoch": 0.99, + "learning_rate": 4.75144784886127e-05, + "loss": 2.4408, + "step": 1305000 + }, + { + "epoch": 0.99, + "learning_rate": 4.7513527736432446e-05, + "loss": 2.4388, + "step": 1305500 + }, + { + "epoch": 1.0, + "learning_rate": 4.7512575078937204e-05, + "loss": 2.4548, + "step": 1306000 + }, + { + "epoch": 1.0, + "learning_rate": 4.751162242144196e-05, + "loss": 2.4251, + "step": 1306500 + }, + { + "epoch": 1.0, + "learning_rate": 4.751066976394672e-05, + "loss": 2.4523, + "step": 1307000 + }, + { + "epoch": 1.0, + "learning_rate": 4.750971710645147e-05, + "loss": 2.4168, + "step": 1307500 + }, + { + "epoch": 1.0, + "learning_rate": 4.750876444895623e-05, + "loss": 2.4403, + "step": 1308000 + }, + { + "epoch": 1.0, + "learning_rate": 4.750781179146099e-05, + "loss": 2.4531, + "step": 1308500 + }, + { + "epoch": 1.0, + "learning_rate": 4.750686103928074e-05, + "loss": 2.4373, + "step": 1309000 + }, + { + "epoch": 1.0, + "learning_rate": 4.7505908381785495e-05, + "loss": 2.4364, + "step": 1309500 + }, + { + "epoch": 1.0, + "learning_rate": 4.7504955724290254e-05, + "loss": 2.4515, + "step": 1310000 + }, + { + "epoch": 1.0, + "learning_rate": 4.750400306679501e-05, + "loss": 2.446, + "step": 1310500 + }, + { + "epoch": 1.0, + "learning_rate": 4.750305040929977e-05, + "loss": 2.4509, + "step": 1311000 + }, + { + "epoch": 1.0, + "learning_rate": 4.750209775180453e-05, + "loss": 2.4453, + "step": 1311500 + }, + { + "epoch": 1.0, + "learning_rate": 4.750114509430929e-05, + "loss": 2.4448, + "step": 1312000 + } + ], + "max_steps": 26242380, + "num_train_epochs": 20, + "total_flos": 9.748521040622911e+18, + "trial_name": null, + "trial_params": null +}