{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4752526370140886, "global_step": 20000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1e-05, "loss": 178.9465, "step": 50 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 164.9707, "step": 100 }, { "epoch": 0.01, "learning_rate": 3e-05, "loss": 142.2782, "step": 150 }, { "epoch": 0.01, "learning_rate": 4e-05, "loss": 121.5122, "step": 200 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 91.8622, "step": 250 }, { "epoch": 0.02, "learning_rate": 6e-05, "loss": 82.2062, "step": 300 }, { "epoch": 0.03, "learning_rate": 7e-05, "loss": 72.6893, "step": 350 }, { "epoch": 0.03, "learning_rate": 8e-05, "loss": 71.8709, "step": 400 }, { "epoch": 0.03, "learning_rate": 9e-05, "loss": 69.9995, "step": 450 }, { "epoch": 0.04, "learning_rate": 0.0001, "loss": 70.6458, "step": 500 }, { "epoch": 0.04, "learning_rate": 9.994977448744865e-05, "loss": 73.9929, "step": 550 }, { "epoch": 0.04, "learning_rate": 9.989954897489729e-05, "loss": 66.52, "step": 600 }, { "epoch": 0.05, "learning_rate": 9.984932346234594e-05, "loss": 65.8947, "step": 650 }, { "epoch": 0.05, "learning_rate": 9.979909794979458e-05, "loss": 62.5809, "step": 700 }, { "epoch": 0.06, "learning_rate": 9.974887243724323e-05, "loss": 61.212, "step": 750 }, { "epoch": 0.06, "learning_rate": 9.969864692469187e-05, "loss": 68.2408, "step": 800 }, { "epoch": 0.06, "learning_rate": 9.964842141214051e-05, "loss": 61.5308, "step": 850 }, { "epoch": 0.07, "learning_rate": 9.959819589958916e-05, "loss": 58.9116, "step": 900 }, { "epoch": 0.07, "learning_rate": 9.95479703870378e-05, "loss": 60.0702, "step": 950 }, { "epoch": 0.07, "learning_rate": 9.949774487448646e-05, "loss": 57.6135, "step": 1000 }, { "epoch": 0.08, "learning_rate": 9.944751936193509e-05, "loss": 50.9231, "step": 1050 }, { "epoch": 0.08, "learning_rate": 9.939729384938373e-05, "loss": 51.187, "step": 1100 }, { "epoch": 0.08, "learning_rate": 9.934706833683238e-05, "loss": 52.1127, "step": 1150 }, { "epoch": 0.09, "learning_rate": 9.929684282428102e-05, "loss": 47.4608, "step": 1200 }, { "epoch": 0.09, "learning_rate": 9.924661731172968e-05, "loss": 51.6108, "step": 1250 }, { "epoch": 0.1, "learning_rate": 9.919639179917831e-05, "loss": 46.5874, "step": 1300 }, { "epoch": 0.1, "learning_rate": 9.914616628662697e-05, "loss": 41.4706, "step": 1350 }, { "epoch": 0.1, "learning_rate": 9.90959407740756e-05, "loss": 43.7544, "step": 1400 }, { "epoch": 0.11, "learning_rate": 9.904571526152426e-05, "loss": 44.6039, "step": 1450 }, { "epoch": 0.11, "learning_rate": 9.899548974897289e-05, "loss": 41.4384, "step": 1500 }, { "epoch": 0.11, "learning_rate": 9.894526423642154e-05, "loss": 42.8289, "step": 1550 }, { "epoch": 0.12, "learning_rate": 9.889503872387019e-05, "loss": 39.9726, "step": 1600 }, { "epoch": 0.12, "learning_rate": 9.884481321131882e-05, "loss": 43.9533, "step": 1650 }, { "epoch": 0.13, "learning_rate": 9.879458769876748e-05, "loss": 38.7605, "step": 1700 }, { "epoch": 0.13, "learning_rate": 9.87443621862161e-05, "loss": 39.5425, "step": 1750 }, { "epoch": 0.13, "learning_rate": 9.869413667366476e-05, "loss": 37.588, "step": 1800 }, { "epoch": 0.14, "learning_rate": 9.86439111611134e-05, "loss": 39.7744, "step": 1850 }, { "epoch": 0.14, "learning_rate": 9.859368564856205e-05, "loss": 38.2154, "step": 1900 }, { "epoch": 0.14, "learning_rate": 9.85434601360107e-05, "loss": 35.0806, "step": 1950 }, { "epoch": 0.15, "learning_rate": 9.849323462345934e-05, "loss": 39.061, "step": 2000 }, { "epoch": 0.15, "learning_rate": 9.844300911090798e-05, "loss": 35.1544, "step": 2050 }, { "epoch": 0.15, "learning_rate": 9.839278359835663e-05, "loss": 38.123, "step": 2100 }, { "epoch": 0.16, "learning_rate": 9.834255808580527e-05, "loss": 33.1144, "step": 2150 }, { "epoch": 0.16, "learning_rate": 9.829233257325392e-05, "loss": 34.3476, "step": 2200 }, { "epoch": 0.17, "learning_rate": 9.824210706070256e-05, "loss": 29.5665, "step": 2250 }, { "epoch": 0.17, "learning_rate": 9.81918815481512e-05, "loss": 35.8756, "step": 2300 }, { "epoch": 0.17, "learning_rate": 9.814165603559985e-05, "loss": 37.2579, "step": 2350 }, { "epoch": 0.18, "learning_rate": 9.809143052304849e-05, "loss": 33.6245, "step": 2400 }, { "epoch": 0.18, "learning_rate": 9.804120501049714e-05, "loss": 35.6543, "step": 2450 }, { "epoch": 0.18, "learning_rate": 9.799097949794578e-05, "loss": 36.7847, "step": 2500 }, { "epoch": 0.19, "learning_rate": 9.794075398539442e-05, "loss": 33.463, "step": 2550 }, { "epoch": 0.19, "learning_rate": 9.789052847284307e-05, "loss": 32.2215, "step": 2600 }, { "epoch": 0.2, "learning_rate": 9.784030296029171e-05, "loss": 33.4301, "step": 2650 }, { "epoch": 0.2, "learning_rate": 9.779007744774036e-05, "loss": 29.9579, "step": 2700 }, { "epoch": 0.2, "learning_rate": 9.773985193518901e-05, "loss": 31.9141, "step": 2750 }, { "epoch": 0.21, "learning_rate": 9.768962642263764e-05, "loss": 33.2049, "step": 2800 }, { "epoch": 0.21, "learning_rate": 9.763940091008629e-05, "loss": 32.8774, "step": 2850 }, { "epoch": 0.21, "learning_rate": 9.758917539753493e-05, "loss": 29.0858, "step": 2900 }, { "epoch": 0.22, "learning_rate": 9.753894988498358e-05, "loss": 30.1145, "step": 2950 }, { "epoch": 0.22, "learning_rate": 9.748872437243222e-05, "loss": 27.6986, "step": 3000 }, { "epoch": 0.22, "learning_rate": 9.743849885988087e-05, "loss": 31.7807, "step": 3050 }, { "epoch": 0.23, "learning_rate": 9.738827334732952e-05, "loss": 30.5108, "step": 3100 }, { "epoch": 0.23, "learning_rate": 9.733804783477815e-05, "loss": 31.0909, "step": 3150 }, { "epoch": 0.24, "learning_rate": 9.728782232222681e-05, "loss": 27.9057, "step": 3200 }, { "epoch": 0.24, "learning_rate": 9.723759680967544e-05, "loss": 29.7323, "step": 3250 }, { "epoch": 0.24, "learning_rate": 9.71873712971241e-05, "loss": 29.7527, "step": 3300 }, { "epoch": 0.25, "learning_rate": 9.713714578457273e-05, "loss": 29.1442, "step": 3350 }, { "epoch": 0.25, "learning_rate": 9.708692027202137e-05, "loss": 30.8906, "step": 3400 }, { "epoch": 0.25, "learning_rate": 9.703669475947003e-05, "loss": 26.8419, "step": 3450 }, { "epoch": 0.26, "learning_rate": 9.698646924691866e-05, "loss": 29.2181, "step": 3500 }, { "epoch": 0.26, "learning_rate": 9.693624373436732e-05, "loss": 27.6549, "step": 3550 }, { "epoch": 0.27, "learning_rate": 9.688601822181595e-05, "loss": 34.0701, "step": 3600 }, { "epoch": 0.27, "learning_rate": 9.683579270926461e-05, "loss": 24.7487, "step": 3650 }, { "epoch": 0.27, "learning_rate": 9.678556719671325e-05, "loss": 30.0266, "step": 3700 }, { "epoch": 0.28, "learning_rate": 9.67353416841619e-05, "loss": 25.5011, "step": 3750 }, { "epoch": 0.28, "learning_rate": 9.668511617161054e-05, "loss": 26.1437, "step": 3800 }, { "epoch": 0.28, "learning_rate": 9.663489065905918e-05, "loss": 23.2303, "step": 3850 }, { "epoch": 0.29, "learning_rate": 9.658466514650783e-05, "loss": 26.357, "step": 3900 }, { "epoch": 0.29, "learning_rate": 9.653443963395646e-05, "loss": 27.2201, "step": 3950 }, { "epoch": 0.3, "learning_rate": 9.648421412140512e-05, "loss": 25.5695, "step": 4000 }, { "epoch": 0.3, "learning_rate": 9.643398860885376e-05, "loss": 24.8346, "step": 4050 }, { "epoch": 0.3, "learning_rate": 9.63837630963024e-05, "loss": 22.3957, "step": 4100 }, { "epoch": 0.31, "learning_rate": 9.633353758375105e-05, "loss": 24.9532, "step": 4150 }, { "epoch": 0.31, "learning_rate": 9.628331207119969e-05, "loss": 23.1574, "step": 4200 }, { "epoch": 0.31, "learning_rate": 9.623308655864834e-05, "loss": 23.7018, "step": 4250 }, { "epoch": 0.32, "learning_rate": 9.618286104609698e-05, "loss": 25.1433, "step": 4300 }, { "epoch": 0.32, "learning_rate": 9.613263553354562e-05, "loss": 25.0571, "step": 4350 }, { "epoch": 0.32, "learning_rate": 9.608241002099427e-05, "loss": 24.2231, "step": 4400 }, { "epoch": 0.33, "learning_rate": 9.603218450844291e-05, "loss": 23.0983, "step": 4450 }, { "epoch": 0.33, "learning_rate": 9.598195899589156e-05, "loss": 25.0078, "step": 4500 }, { "epoch": 0.34, "learning_rate": 9.59317334833402e-05, "loss": 20.6933, "step": 4550 }, { "epoch": 0.34, "learning_rate": 9.588150797078884e-05, "loss": 23.6196, "step": 4600 }, { "epoch": 0.34, "learning_rate": 9.583128245823749e-05, "loss": 25.2331, "step": 4650 }, { "epoch": 0.35, "learning_rate": 9.578105694568613e-05, "loss": 24.7932, "step": 4700 }, { "epoch": 0.35, "learning_rate": 9.573083143313478e-05, "loss": 24.3586, "step": 4750 }, { "epoch": 0.35, "learning_rate": 9.568060592058342e-05, "loss": 22.7161, "step": 4800 }, { "epoch": 0.36, "learning_rate": 9.563038040803208e-05, "loss": 22.4188, "step": 4850 }, { "epoch": 0.36, "learning_rate": 9.558015489548071e-05, "loss": 21.6516, "step": 4900 }, { "epoch": 0.37, "learning_rate": 9.552992938292937e-05, "loss": 21.78, "step": 4950 }, { "epoch": 0.37, "learning_rate": 9.5479703870378e-05, "loss": 21.0172, "step": 5000 }, { "epoch": 0.37, "learning_rate": 9.542947835782665e-05, "loss": 22.4624, "step": 5050 }, { "epoch": 0.38, "learning_rate": 9.537925284527528e-05, "loss": 23.6615, "step": 5100 }, { "epoch": 0.38, "learning_rate": 9.532902733272393e-05, "loss": 21.8091, "step": 5150 }, { "epoch": 0.38, "learning_rate": 9.527880182017259e-05, "loss": 21.4173, "step": 5200 }, { "epoch": 0.39, "learning_rate": 9.522857630762122e-05, "loss": 20.5415, "step": 5250 }, { "epoch": 0.39, "learning_rate": 9.517835079506987e-05, "loss": 21.0639, "step": 5300 }, { "epoch": 0.39, "learning_rate": 9.51281252825185e-05, "loss": 21.6078, "step": 5350 }, { "epoch": 0.4, "learning_rate": 9.507789976996716e-05, "loss": 19.4142, "step": 5400 }, { "epoch": 0.4, "learning_rate": 9.50276742574158e-05, "loss": 20.2504, "step": 5450 }, { "epoch": 0.41, "learning_rate": 9.497744874486445e-05, "loss": 23.8683, "step": 5500 }, { "epoch": 0.41, "learning_rate": 9.49272232323131e-05, "loss": 19.7559, "step": 5550 }, { "epoch": 0.41, "learning_rate": 9.487699771976174e-05, "loss": 21.1743, "step": 5600 }, { "epoch": 0.42, "learning_rate": 9.482677220721038e-05, "loss": 21.1908, "step": 5650 }, { "epoch": 0.42, "learning_rate": 9.477654669465901e-05, "loss": 20.9591, "step": 5700 }, { "epoch": 0.42, "learning_rate": 9.472632118210767e-05, "loss": 20.9036, "step": 5750 }, { "epoch": 0.43, "learning_rate": 9.46760956695563e-05, "loss": 22.249, "step": 5800 }, { "epoch": 0.43, "learning_rate": 9.462587015700496e-05, "loss": 19.1093, "step": 5850 }, { "epoch": 0.44, "learning_rate": 9.45756446444536e-05, "loss": 21.2714, "step": 5900 }, { "epoch": 0.44, "learning_rate": 9.452541913190225e-05, "loss": 21.3794, "step": 5950 }, { "epoch": 0.44, "learning_rate": 9.447519361935089e-05, "loss": 20.0326, "step": 6000 }, { "epoch": 0.45, "learning_rate": 9.442496810679954e-05, "loss": 19.8004, "step": 6050 }, { "epoch": 0.45, "learning_rate": 9.437474259424818e-05, "loss": 19.0229, "step": 6100 }, { "epoch": 0.45, "learning_rate": 9.432451708169682e-05, "loss": 17.6587, "step": 6150 }, { "epoch": 0.46, "learning_rate": 9.427429156914547e-05, "loss": 21.9247, "step": 6200 }, { "epoch": 0.46, "learning_rate": 9.422406605659411e-05, "loss": 19.743, "step": 6250 }, { "epoch": 0.46, "learning_rate": 9.417384054404276e-05, "loss": 22.9746, "step": 6300 }, { "epoch": 0.47, "learning_rate": 9.41236150314914e-05, "loss": 19.6693, "step": 6350 }, { "epoch": 0.47, "learning_rate": 9.407338951894004e-05, "loss": 19.1141, "step": 6400 }, { "epoch": 0.48, "learning_rate": 9.402316400638869e-05, "loss": 18.3847, "step": 6450 }, { "epoch": 0.48, "learning_rate": 9.397293849383733e-05, "loss": 18.9357, "step": 6500 }, { "epoch": 0.48, "learning_rate": 9.392271298128598e-05, "loss": 18.9316, "step": 6550 }, { "epoch": 0.49, "learning_rate": 9.387248746873462e-05, "loss": 20.9141, "step": 6600 }, { "epoch": 0.49, "learning_rate": 9.382226195618326e-05, "loss": 18.7472, "step": 6650 }, { "epoch": 0.49, "learning_rate": 9.377203644363192e-05, "loss": 18.8577, "step": 6700 }, { "epoch": 0.5, "learning_rate": 9.372181093108055e-05, "loss": 17.8061, "step": 6750 }, { "epoch": 0.5, "learning_rate": 9.36715854185292e-05, "loss": 19.4687, "step": 6800 }, { "epoch": 0.51, "learning_rate": 9.362135990597784e-05, "loss": 19.5103, "step": 6850 }, { "epoch": 0.51, "learning_rate": 9.357113439342648e-05, "loss": 18.5319, "step": 6900 }, { "epoch": 0.51, "learning_rate": 9.352090888087514e-05, "loss": 20.16, "step": 6950 }, { "epoch": 0.52, "learning_rate": 9.347068336832377e-05, "loss": 18.1913, "step": 7000 }, { "epoch": 0.52, "learning_rate": 9.342045785577243e-05, "loss": 21.341, "step": 7050 }, { "epoch": 0.52, "learning_rate": 9.337023234322106e-05, "loss": 16.7701, "step": 7100 }, { "epoch": 0.53, "learning_rate": 9.332000683066972e-05, "loss": 18.045, "step": 7150 }, { "epoch": 0.53, "learning_rate": 9.326978131811835e-05, "loss": 16.0393, "step": 7200 }, { "epoch": 0.53, "learning_rate": 9.3219555805567e-05, "loss": 17.4833, "step": 7250 }, { "epoch": 0.54, "learning_rate": 9.316933029301565e-05, "loss": 17.3978, "step": 7300 }, { "epoch": 0.54, "learning_rate": 9.31191047804643e-05, "loss": 18.2649, "step": 7350 }, { "epoch": 0.55, "learning_rate": 9.306887926791294e-05, "loss": 16.3891, "step": 7400 }, { "epoch": 0.55, "learning_rate": 9.301865375536157e-05, "loss": 21.4399, "step": 7450 }, { "epoch": 0.55, "learning_rate": 9.296842824281023e-05, "loss": 16.3082, "step": 7500 }, { "epoch": 0.56, "learning_rate": 9.291820273025886e-05, "loss": 14.8713, "step": 7550 }, { "epoch": 0.56, "learning_rate": 9.286797721770751e-05, "loss": 16.3099, "step": 7600 }, { "epoch": 0.56, "learning_rate": 9.281775170515616e-05, "loss": 17.8771, "step": 7650 }, { "epoch": 0.57, "learning_rate": 9.27675261926048e-05, "loss": 17.1421, "step": 7700 }, { "epoch": 0.57, "learning_rate": 9.271730068005345e-05, "loss": 16.6478, "step": 7750 }, { "epoch": 0.58, "learning_rate": 9.266707516750209e-05, "loss": 15.3247, "step": 7800 }, { "epoch": 0.58, "learning_rate": 9.261684965495073e-05, "loss": 17.6577, "step": 7850 }, { "epoch": 0.58, "learning_rate": 9.256662414239938e-05, "loss": 18.8549, "step": 7900 }, { "epoch": 0.59, "learning_rate": 9.251639862984802e-05, "loss": 17.4187, "step": 7950 }, { "epoch": 0.59, "learning_rate": 9.246617311729667e-05, "loss": 15.6643, "step": 8000 }, { "epoch": 0.59, "learning_rate": 9.241594760474531e-05, "loss": 17.1987, "step": 8050 }, { "epoch": 0.6, "learning_rate": 9.236572209219396e-05, "loss": 18.1712, "step": 8100 }, { "epoch": 0.6, "learning_rate": 9.23154965796426e-05, "loss": 15.8015, "step": 8150 }, { "epoch": 0.6, "learning_rate": 9.226527106709124e-05, "loss": 19.064, "step": 8200 }, { "epoch": 0.61, "learning_rate": 9.221504555453989e-05, "loss": 18.2748, "step": 8250 }, { "epoch": 0.61, "learning_rate": 9.216482004198853e-05, "loss": 15.0679, "step": 8300 }, { "epoch": 0.62, "learning_rate": 9.211459452943718e-05, "loss": 17.995, "step": 8350 }, { "epoch": 0.62, "learning_rate": 9.206436901688582e-05, "loss": 17.467, "step": 8400 }, { "epoch": 0.62, "learning_rate": 9.201414350433448e-05, "loss": 18.6665, "step": 8450 }, { "epoch": 0.63, "learning_rate": 9.196391799178311e-05, "loss": 17.2848, "step": 8500 }, { "epoch": 0.63, "learning_rate": 9.191369247923175e-05, "loss": 14.4767, "step": 8550 }, { "epoch": 0.63, "learning_rate": 9.18634669666804e-05, "loss": 17.5444, "step": 8600 }, { "epoch": 0.64, "learning_rate": 9.181324145412904e-05, "loss": 14.4661, "step": 8650 }, { "epoch": 0.64, "learning_rate": 9.176301594157768e-05, "loss": 16.3339, "step": 8700 }, { "epoch": 0.65, "learning_rate": 9.171279042902633e-05, "loss": 17.5122, "step": 8750 }, { "epoch": 0.65, "learning_rate": 9.166256491647499e-05, "loss": 16.7631, "step": 8800 }, { "epoch": 0.65, "learning_rate": 9.161233940392362e-05, "loss": 16.5193, "step": 8850 }, { "epoch": 0.66, "learning_rate": 9.156211389137227e-05, "loss": 17.8364, "step": 8900 }, { "epoch": 0.66, "learning_rate": 9.15118883788209e-05, "loss": 16.2916, "step": 8950 }, { "epoch": 0.66, "learning_rate": 9.146166286626956e-05, "loss": 14.1719, "step": 9000 }, { "epoch": 0.67, "learning_rate": 9.141143735371819e-05, "loss": 18.2987, "step": 9050 }, { "epoch": 0.67, "learning_rate": 9.136121184116684e-05, "loss": 17.4248, "step": 9100 }, { "epoch": 0.67, "learning_rate": 9.13109863286155e-05, "loss": 16.1862, "step": 9150 }, { "epoch": 0.68, "learning_rate": 9.126076081606412e-05, "loss": 16.3134, "step": 9200 }, { "epoch": 0.68, "learning_rate": 9.121053530351278e-05, "loss": 14.9158, "step": 9250 }, { "epoch": 0.69, "learning_rate": 9.116030979096141e-05, "loss": 15.2504, "step": 9300 }, { "epoch": 0.69, "learning_rate": 9.111008427841007e-05, "loss": 14.1967, "step": 9350 }, { "epoch": 0.69, "learning_rate": 9.105985876585871e-05, "loss": 17.3165, "step": 9400 }, { "epoch": 0.7, "learning_rate": 9.100963325330736e-05, "loss": 14.5912, "step": 9450 }, { "epoch": 0.7, "learning_rate": 9.0959407740756e-05, "loss": 17.5593, "step": 9500 }, { "epoch": 0.7, "learning_rate": 9.090918222820465e-05, "loss": 16.3421, "step": 9550 }, { "epoch": 0.71, "learning_rate": 9.085895671565329e-05, "loss": 16.2821, "step": 9600 }, { "epoch": 0.71, "learning_rate": 9.080873120310192e-05, "loss": 16.4985, "step": 9650 }, { "epoch": 0.72, "learning_rate": 9.075850569055058e-05, "loss": 16.1138, "step": 9700 }, { "epoch": 0.72, "learning_rate": 9.070828017799922e-05, "loss": 16.3997, "step": 9750 }, { "epoch": 0.72, "learning_rate": 9.065805466544787e-05, "loss": 15.518, "step": 9800 }, { "epoch": 0.73, "learning_rate": 9.060782915289651e-05, "loss": 13.8424, "step": 9850 }, { "epoch": 0.73, "learning_rate": 9.055760364034515e-05, "loss": 15.0784, "step": 9900 }, { "epoch": 0.73, "learning_rate": 9.05073781277938e-05, "loss": 14.0163, "step": 9950 }, { "epoch": 0.74, "learning_rate": 9.045715261524244e-05, "loss": 16.7863, "step": 10000 }, { "epoch": 0.74, "learning_rate": 9.040692710269109e-05, "loss": 13.6715, "step": 10050 }, { "epoch": 0.75, "learning_rate": 9.035670159013973e-05, "loss": 15.1071, "step": 10100 }, { "epoch": 0.75, "learning_rate": 9.030647607758837e-05, "loss": 14.2658, "step": 10150 }, { "epoch": 0.75, "learning_rate": 9.025625056503703e-05, "loss": 15.1115, "step": 10200 }, { "epoch": 0.76, "learning_rate": 9.020602505248566e-05, "loss": 14.028, "step": 10250 }, { "epoch": 0.76, "learning_rate": 9.015579953993431e-05, "loss": 13.3066, "step": 10300 }, { "epoch": 0.76, "learning_rate": 9.010557402738295e-05, "loss": 14.1185, "step": 10350 }, { "epoch": 0.77, "learning_rate": 9.00553485148316e-05, "loss": 14.061, "step": 10400 }, { "epoch": 0.77, "learning_rate": 9.000512300228024e-05, "loss": 15.2439, "step": 10450 }, { "epoch": 0.77, "learning_rate": 8.995489748972888e-05, "loss": 13.3617, "step": 10500 }, { "epoch": 0.78, "learning_rate": 8.990467197717754e-05, "loss": 14.5514, "step": 10550 }, { "epoch": 0.78, "learning_rate": 8.985444646462617e-05, "loss": 15.2426, "step": 10600 }, { "epoch": 0.79, "learning_rate": 8.980422095207483e-05, "loss": 16.6418, "step": 10650 }, { "epoch": 0.79, "learning_rate": 8.975399543952346e-05, "loss": 13.3146, "step": 10700 }, { "epoch": 0.79, "learning_rate": 8.970376992697212e-05, "loss": 14.9333, "step": 10750 }, { "epoch": 0.8, "learning_rate": 8.965354441442075e-05, "loss": 14.4502, "step": 10800 }, { "epoch": 0.8, "learning_rate": 8.960331890186939e-05, "loss": 14.7886, "step": 10850 }, { "epoch": 0.8, "learning_rate": 8.955309338931805e-05, "loss": 15.0266, "step": 10900 }, { "epoch": 0.81, "learning_rate": 8.950286787676668e-05, "loss": 14.543, "step": 10950 }, { "epoch": 0.81, "learning_rate": 8.945264236421534e-05, "loss": 15.8078, "step": 11000 }, { "epoch": 0.82, "learning_rate": 8.940241685166397e-05, "loss": 13.6052, "step": 11050 }, { "epoch": 0.82, "learning_rate": 8.935219133911263e-05, "loss": 14.2995, "step": 11100 }, { "epoch": 0.82, "learning_rate": 8.930196582656126e-05, "loss": 15.732, "step": 11150 }, { "epoch": 0.83, "learning_rate": 8.925174031400991e-05, "loss": 14.0573, "step": 11200 }, { "epoch": 0.83, "learning_rate": 8.920151480145856e-05, "loss": 17.5941, "step": 11250 }, { "epoch": 0.83, "learning_rate": 8.91512892889072e-05, "loss": 14.7829, "step": 11300 }, { "epoch": 0.84, "learning_rate": 8.910106377635585e-05, "loss": 14.6669, "step": 11350 }, { "epoch": 0.84, "learning_rate": 8.905083826380448e-05, "loss": 14.3315, "step": 11400 }, { "epoch": 0.84, "learning_rate": 8.900061275125313e-05, "loss": 14.2639, "step": 11450 }, { "epoch": 0.85, "learning_rate": 8.895038723870176e-05, "loss": 14.3226, "step": 11500 }, { "epoch": 0.85, "learning_rate": 8.890016172615042e-05, "loss": 14.4975, "step": 11550 }, { "epoch": 0.86, "learning_rate": 8.884993621359907e-05, "loss": 14.8436, "step": 11600 }, { "epoch": 0.86, "learning_rate": 8.879971070104771e-05, "loss": 13.8481, "step": 11650 }, { "epoch": 0.86, "learning_rate": 8.874948518849635e-05, "loss": 12.8151, "step": 11700 }, { "epoch": 0.87, "learning_rate": 8.8699259675945e-05, "loss": 13.1659, "step": 11750 }, { "epoch": 0.87, "learning_rate": 8.864903416339364e-05, "loss": 15.0919, "step": 11800 }, { "epoch": 0.87, "learning_rate": 8.859880865084229e-05, "loss": 14.4382, "step": 11850 }, { "epoch": 0.88, "learning_rate": 8.854858313829093e-05, "loss": 14.0989, "step": 11900 }, { "epoch": 0.88, "learning_rate": 8.849835762573957e-05, "loss": 14.5763, "step": 11950 }, { "epoch": 0.89, "learning_rate": 8.844813211318822e-05, "loss": 13.4144, "step": 12000 }, { "epoch": 0.89, "learning_rate": 8.839790660063686e-05, "loss": 15.6018, "step": 12050 }, { "epoch": 0.89, "learning_rate": 8.83476810880855e-05, "loss": 14.7849, "step": 12100 }, { "epoch": 0.9, "learning_rate": 8.829745557553415e-05, "loss": 14.441, "step": 12150 }, { "epoch": 0.9, "learning_rate": 8.82472300629828e-05, "loss": 14.2135, "step": 12200 }, { "epoch": 0.9, "learning_rate": 8.819700455043144e-05, "loss": 17.1245, "step": 12250 }, { "epoch": 0.91, "learning_rate": 8.814677903788008e-05, "loss": 14.6629, "step": 12300 }, { "epoch": 0.91, "learning_rate": 8.809655352532873e-05, "loss": 16.6715, "step": 12350 }, { "epoch": 0.91, "learning_rate": 8.804632801277738e-05, "loss": 13.0133, "step": 12400 }, { "epoch": 0.92, "learning_rate": 8.799610250022601e-05, "loss": 14.1551, "step": 12450 }, { "epoch": 0.92, "learning_rate": 8.794587698767466e-05, "loss": 14.019, "step": 12500 }, { "epoch": 0.93, "learning_rate": 8.78956514751233e-05, "loss": 14.4279, "step": 12550 }, { "epoch": 0.93, "learning_rate": 8.784542596257195e-05, "loss": 12.5293, "step": 12600 }, { "epoch": 0.93, "learning_rate": 8.77952004500206e-05, "loss": 15.0403, "step": 12650 }, { "epoch": 0.94, "learning_rate": 8.774497493746924e-05, "loss": 13.8193, "step": 12700 }, { "epoch": 0.94, "learning_rate": 8.769474942491789e-05, "loss": 13.1564, "step": 12750 }, { "epoch": 0.94, "learning_rate": 8.764452391236652e-05, "loss": 14.6415, "step": 12800 }, { "epoch": 0.95, "learning_rate": 8.759429839981518e-05, "loss": 12.2339, "step": 12850 }, { "epoch": 0.95, "learning_rate": 8.754407288726381e-05, "loss": 12.1604, "step": 12900 }, { "epoch": 0.96, "learning_rate": 8.749384737471247e-05, "loss": 15.4939, "step": 12950 }, { "epoch": 0.96, "learning_rate": 8.744362186216111e-05, "loss": 13.9713, "step": 13000 }, { "epoch": 0.96, "learning_rate": 8.739339634960976e-05, "loss": 14.0986, "step": 13050 }, { "epoch": 0.97, "learning_rate": 8.73431708370584e-05, "loss": 13.6334, "step": 13100 }, { "epoch": 0.97, "learning_rate": 8.729294532450703e-05, "loss": 13.5201, "step": 13150 }, { "epoch": 0.97, "learning_rate": 8.724271981195569e-05, "loss": 14.3793, "step": 13200 }, { "epoch": 0.98, "learning_rate": 8.719249429940432e-05, "loss": 13.1741, "step": 13250 }, { "epoch": 0.98, "learning_rate": 8.714226878685298e-05, "loss": 11.7782, "step": 13300 }, { "epoch": 0.98, "learning_rate": 8.709204327430162e-05, "loss": 12.2758, "step": 13350 }, { "epoch": 0.99, "learning_rate": 8.704181776175027e-05, "loss": 13.1723, "step": 13400 }, { "epoch": 0.99, "learning_rate": 8.699159224919891e-05, "loss": 14.0858, "step": 13450 }, { "epoch": 1.0, "learning_rate": 8.694136673664755e-05, "loss": 11.2836, "step": 13500 }, { "epoch": 1.0, "learning_rate": 8.68911412240962e-05, "loss": 15.7226, "step": 13550 }, { "epoch": 1.0, "learning_rate": 8.684091571154484e-05, "loss": 15.8889, "step": 13600 }, { "epoch": 1.01, "learning_rate": 8.679069019899349e-05, "loss": 12.2185, "step": 13650 }, { "epoch": 1.01, "learning_rate": 8.674046468644213e-05, "loss": 11.4647, "step": 13700 }, { "epoch": 1.01, "learning_rate": 8.669023917389077e-05, "loss": 13.1238, "step": 13750 }, { "epoch": 1.02, "learning_rate": 8.664001366133942e-05, "loss": 11.909, "step": 13800 }, { "epoch": 1.02, "learning_rate": 8.658978814878806e-05, "loss": 12.5478, "step": 13850 }, { "epoch": 1.03, "learning_rate": 8.65395626362367e-05, "loss": 13.017, "step": 13900 }, { "epoch": 1.03, "learning_rate": 8.648933712368535e-05, "loss": 12.9134, "step": 13950 }, { "epoch": 1.03, "learning_rate": 8.6439111611134e-05, "loss": 13.3485, "step": 14000 }, { "epoch": 1.04, "learning_rate": 8.638888609858264e-05, "loss": 11.4706, "step": 14050 }, { "epoch": 1.04, "learning_rate": 8.633866058603128e-05, "loss": 11.1063, "step": 14100 }, { "epoch": 1.04, "learning_rate": 8.628843507347994e-05, "loss": 12.7408, "step": 14150 }, { "epoch": 1.05, "learning_rate": 8.623820956092857e-05, "loss": 12.0689, "step": 14200 }, { "epoch": 1.05, "learning_rate": 8.618798404837721e-05, "loss": 11.0724, "step": 14250 }, { "epoch": 1.05, "learning_rate": 8.613775853582586e-05, "loss": 12.5685, "step": 14300 }, { "epoch": 1.06, "learning_rate": 8.60875330232745e-05, "loss": 12.7776, "step": 14350 }, { "epoch": 1.06, "learning_rate": 8.603730751072315e-05, "loss": 11.3066, "step": 14400 }, { "epoch": 1.07, "learning_rate": 8.598708199817179e-05, "loss": 13.06, "step": 14450 }, { "epoch": 1.07, "learning_rate": 8.593685648562045e-05, "loss": 15.6523, "step": 14500 }, { "epoch": 1.07, "learning_rate": 8.588663097306908e-05, "loss": 12.019, "step": 14550 }, { "epoch": 1.08, "learning_rate": 8.583640546051774e-05, "loss": 11.0941, "step": 14600 }, { "epoch": 1.08, "learning_rate": 8.578617994796637e-05, "loss": 12.4755, "step": 14650 }, { "epoch": 1.08, "learning_rate": 8.573595443541502e-05, "loss": 13.7012, "step": 14700 }, { "epoch": 1.09, "learning_rate": 8.568572892286366e-05, "loss": 12.2024, "step": 14750 }, { "epoch": 1.09, "learning_rate": 8.56355034103123e-05, "loss": 12.4744, "step": 14800 }, { "epoch": 1.1, "learning_rate": 8.558527789776096e-05, "loss": 12.3234, "step": 14850 }, { "epoch": 1.1, "learning_rate": 8.553505238520959e-05, "loss": 12.5616, "step": 14900 }, { "epoch": 1.1, "learning_rate": 8.548482687265824e-05, "loss": 11.9559, "step": 14950 }, { "epoch": 1.11, "learning_rate": 8.543460136010688e-05, "loss": 12.0734, "step": 15000 }, { "epoch": 1.11, "learning_rate": 8.538437584755553e-05, "loss": 13.0341, "step": 15050 }, { "epoch": 1.11, "learning_rate": 8.533415033500418e-05, "loss": 12.7406, "step": 15100 }, { "epoch": 1.12, "learning_rate": 8.528392482245282e-05, "loss": 11.7258, "step": 15150 }, { "epoch": 1.12, "learning_rate": 8.523369930990147e-05, "loss": 11.8709, "step": 15200 }, { "epoch": 1.12, "learning_rate": 8.518347379735011e-05, "loss": 11.7021, "step": 15250 }, { "epoch": 1.13, "learning_rate": 8.513324828479875e-05, "loss": 13.2674, "step": 15300 }, { "epoch": 1.13, "learning_rate": 8.508302277224738e-05, "loss": 11.9099, "step": 15350 }, { "epoch": 1.14, "learning_rate": 8.503279725969604e-05, "loss": 11.7841, "step": 15400 }, { "epoch": 1.14, "learning_rate": 8.498257174714469e-05, "loss": 11.9573, "step": 15450 }, { "epoch": 1.14, "learning_rate": 8.493234623459333e-05, "loss": 11.7211, "step": 15500 }, { "epoch": 1.15, "learning_rate": 8.488212072204197e-05, "loss": 12.3513, "step": 15550 }, { "epoch": 1.15, "learning_rate": 8.483189520949062e-05, "loss": 11.0709, "step": 15600 }, { "epoch": 1.15, "learning_rate": 8.478166969693926e-05, "loss": 11.6544, "step": 15650 }, { "epoch": 1.16, "learning_rate": 8.47314441843879e-05, "loss": 11.8285, "step": 15700 }, { "epoch": 1.16, "learning_rate": 8.468121867183655e-05, "loss": 10.4208, "step": 15750 }, { "epoch": 1.17, "learning_rate": 8.46309931592852e-05, "loss": 10.7821, "step": 15800 }, { "epoch": 1.17, "learning_rate": 8.458076764673384e-05, "loss": 13.2724, "step": 15850 }, { "epoch": 1.17, "learning_rate": 8.45305421341825e-05, "loss": 10.9219, "step": 15900 }, { "epoch": 1.18, "learning_rate": 8.448031662163113e-05, "loss": 12.2532, "step": 15950 }, { "epoch": 1.18, "learning_rate": 8.443009110907977e-05, "loss": 11.0132, "step": 16000 }, { "epoch": 1.18, "learning_rate": 8.437986559652841e-05, "loss": 12.319, "step": 16050 }, { "epoch": 1.19, "learning_rate": 8.432964008397706e-05, "loss": 12.9871, "step": 16100 }, { "epoch": 1.19, "learning_rate": 8.42794145714257e-05, "loss": 12.0625, "step": 16150 }, { "epoch": 1.19, "learning_rate": 8.422918905887435e-05, "loss": 13.4629, "step": 16200 }, { "epoch": 1.2, "learning_rate": 8.4178963546323e-05, "loss": 10.9291, "step": 16250 }, { "epoch": 1.2, "learning_rate": 8.412873803377163e-05, "loss": 13.7719, "step": 16300 }, { "epoch": 1.21, "learning_rate": 8.407851252122029e-05, "loss": 11.3634, "step": 16350 }, { "epoch": 1.21, "learning_rate": 8.402828700866892e-05, "loss": 12.7941, "step": 16400 }, { "epoch": 1.21, "learning_rate": 8.397806149611758e-05, "loss": 11.8863, "step": 16450 }, { "epoch": 1.22, "learning_rate": 8.392783598356621e-05, "loss": 9.5225, "step": 16500 }, { "epoch": 1.22, "learning_rate": 8.387761047101485e-05, "loss": 12.983, "step": 16550 }, { "epoch": 1.22, "learning_rate": 8.382738495846351e-05, "loss": 11.8489, "step": 16600 }, { "epoch": 1.23, "learning_rate": 8.377715944591214e-05, "loss": 11.8122, "step": 16650 }, { "epoch": 1.23, "learning_rate": 8.37269339333608e-05, "loss": 12.3387, "step": 16700 }, { "epoch": 1.24, "learning_rate": 8.367670842080943e-05, "loss": 13.4648, "step": 16750 }, { "epoch": 1.24, "learning_rate": 8.362648290825809e-05, "loss": 10.2301, "step": 16800 }, { "epoch": 1.24, "learning_rate": 8.357625739570672e-05, "loss": 11.492, "step": 16850 }, { "epoch": 1.25, "learning_rate": 8.352603188315538e-05, "loss": 12.5997, "step": 16900 }, { "epoch": 1.25, "learning_rate": 8.347580637060402e-05, "loss": 11.5588, "step": 16950 }, { "epoch": 1.25, "learning_rate": 8.342558085805266e-05, "loss": 11.8627, "step": 17000 }, { "epoch": 1.26, "learning_rate": 8.337535534550131e-05, "loss": 13.2469, "step": 17050 }, { "epoch": 1.26, "learning_rate": 8.332512983294994e-05, "loss": 10.4327, "step": 17100 }, { "epoch": 1.27, "learning_rate": 8.32749043203986e-05, "loss": 12.7566, "step": 17150 }, { "epoch": 1.27, "learning_rate": 8.322467880784723e-05, "loss": 11.0729, "step": 17200 }, { "epoch": 1.27, "learning_rate": 8.317445329529588e-05, "loss": 12.3484, "step": 17250 }, { "epoch": 1.28, "learning_rate": 8.312422778274453e-05, "loss": 10.5193, "step": 17300 }, { "epoch": 1.28, "learning_rate": 8.307400227019317e-05, "loss": 12.2369, "step": 17350 }, { "epoch": 1.28, "learning_rate": 8.302377675764182e-05, "loss": 12.2976, "step": 17400 }, { "epoch": 1.29, "learning_rate": 8.297355124509046e-05, "loss": 12.3852, "step": 17450 }, { "epoch": 1.29, "learning_rate": 8.29233257325391e-05, "loss": 11.2137, "step": 17500 }, { "epoch": 1.29, "learning_rate": 8.287310021998775e-05, "loss": 11.609, "step": 17550 }, { "epoch": 1.3, "learning_rate": 8.282287470743639e-05, "loss": 13.3339, "step": 17600 }, { "epoch": 1.3, "learning_rate": 8.277264919488504e-05, "loss": 11.4263, "step": 17650 }, { "epoch": 1.31, "learning_rate": 8.272242368233368e-05, "loss": 12.6949, "step": 17700 }, { "epoch": 1.31, "learning_rate": 8.267219816978233e-05, "loss": 11.4767, "step": 17750 }, { "epoch": 1.31, "learning_rate": 8.262197265723097e-05, "loss": 12.2225, "step": 17800 }, { "epoch": 1.32, "learning_rate": 8.257174714467961e-05, "loss": 11.0755, "step": 17850 }, { "epoch": 1.32, "learning_rate": 8.252152163212826e-05, "loss": 11.9677, "step": 17900 }, { "epoch": 1.32, "learning_rate": 8.24712961195769e-05, "loss": 11.098, "step": 17950 }, { "epoch": 1.33, "learning_rate": 8.242107060702555e-05, "loss": 11.1102, "step": 18000 }, { "epoch": 1.33, "learning_rate": 8.237084509447419e-05, "loss": 11.4985, "step": 18050 }, { "epoch": 1.34, "learning_rate": 8.232061958192285e-05, "loss": 11.7356, "step": 18100 }, { "epoch": 1.34, "learning_rate": 8.227039406937148e-05, "loss": 11.3336, "step": 18150 }, { "epoch": 1.34, "learning_rate": 8.222016855682012e-05, "loss": 11.0448, "step": 18200 }, { "epoch": 1.35, "learning_rate": 8.216994304426877e-05, "loss": 10.9986, "step": 18250 }, { "epoch": 1.35, "learning_rate": 8.211971753171741e-05, "loss": 10.768, "step": 18300 }, { "epoch": 1.35, "learning_rate": 8.206949201916607e-05, "loss": 11.6844, "step": 18350 }, { "epoch": 1.36, "learning_rate": 8.20192665066147e-05, "loss": 11.5615, "step": 18400 }, { "epoch": 1.36, "learning_rate": 8.196904099406336e-05, "loss": 11.4019, "step": 18450 }, { "epoch": 1.36, "learning_rate": 8.191881548151199e-05, "loss": 12.1784, "step": 18500 }, { "epoch": 1.37, "learning_rate": 8.186858996896064e-05, "loss": 12.4565, "step": 18550 }, { "epoch": 1.37, "learning_rate": 8.181836445640927e-05, "loss": 11.0557, "step": 18600 }, { "epoch": 1.38, "learning_rate": 8.176813894385793e-05, "loss": 12.1892, "step": 18650 }, { "epoch": 1.38, "learning_rate": 8.171791343130658e-05, "loss": 12.0531, "step": 18700 }, { "epoch": 1.38, "learning_rate": 8.166768791875522e-05, "loss": 10.1791, "step": 18750 }, { "epoch": 1.39, "learning_rate": 8.161746240620386e-05, "loss": 11.2501, "step": 18800 }, { "epoch": 1.39, "learning_rate": 8.15672368936525e-05, "loss": 9.92, "step": 18850 }, { "epoch": 1.39, "learning_rate": 8.151701138110115e-05, "loss": 10.0603, "step": 18900 }, { "epoch": 1.4, "learning_rate": 8.146678586854978e-05, "loss": 10.9477, "step": 18950 }, { "epoch": 1.4, "learning_rate": 8.141656035599844e-05, "loss": 9.7579, "step": 19000 }, { "epoch": 1.41, "learning_rate": 8.136633484344708e-05, "loss": 11.243, "step": 19050 }, { "epoch": 1.41, "learning_rate": 8.131610933089573e-05, "loss": 11.0069, "step": 19100 }, { "epoch": 1.41, "learning_rate": 8.126588381834437e-05, "loss": 9.7387, "step": 19150 }, { "epoch": 1.42, "learning_rate": 8.121565830579302e-05, "loss": 11.4624, "step": 19200 }, { "epoch": 1.42, "learning_rate": 8.116543279324166e-05, "loss": 12.1299, "step": 19250 }, { "epoch": 1.42, "learning_rate": 8.11152072806903e-05, "loss": 12.2796, "step": 19300 }, { "epoch": 1.43, "learning_rate": 8.106498176813895e-05, "loss": 10.3295, "step": 19350 }, { "epoch": 1.43, "learning_rate": 8.101475625558759e-05, "loss": 10.0709, "step": 19400 }, { "epoch": 1.43, "learning_rate": 8.096453074303624e-05, "loss": 11.0725, "step": 19450 }, { "epoch": 1.44, "learning_rate": 8.091430523048488e-05, "loss": 10.7882, "step": 19500 }, { "epoch": 1.44, "learning_rate": 8.086407971793352e-05, "loss": 11.4124, "step": 19550 }, { "epoch": 1.45, "learning_rate": 8.081385420538217e-05, "loss": 10.4941, "step": 19600 }, { "epoch": 1.45, "learning_rate": 8.076362869283081e-05, "loss": 11.8687, "step": 19650 }, { "epoch": 1.45, "learning_rate": 8.071340318027946e-05, "loss": 11.3221, "step": 19700 }, { "epoch": 1.46, "learning_rate": 8.06631776677281e-05, "loss": 10.2167, "step": 19750 }, { "epoch": 1.46, "learning_rate": 8.061295215517675e-05, "loss": 10.5425, "step": 19800 }, { "epoch": 1.46, "learning_rate": 8.05627266426254e-05, "loss": 11.2982, "step": 19850 }, { "epoch": 1.47, "learning_rate": 8.051250113007403e-05, "loss": 12.0685, "step": 19900 }, { "epoch": 1.47, "learning_rate": 8.046227561752268e-05, "loss": 10.6613, "step": 19950 }, { "epoch": 1.48, "learning_rate": 8.041205010497132e-05, "loss": 10.8245, "step": 20000 }, { "epoch": 1.48, "eval_loss": 10.409339904785156, "eval_runtime": 890.9956, "eval_samples_per_second": 14.7, "eval_steps_per_second": 3.676, "eval_wer": 0.2624627273109067, "step": 20000 } ], "max_steps": 100051, "num_train_epochs": 8, "total_flos": 0.0, "trial_name": null, "trial_params": null }