sanchit-gandhi's picture
Push to Hub
4ee7109
raw
history blame
147 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.4257579110422665,
"global_step": 60000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1e-05,
"loss": 178.9465,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 2e-05,
"loss": 164.9707,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 3e-05,
"loss": 142.2782,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 4e-05,
"loss": 121.5122,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 5e-05,
"loss": 91.8622,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 6e-05,
"loss": 82.2062,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 7e-05,
"loss": 72.6893,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 8e-05,
"loss": 71.8709,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 9e-05,
"loss": 69.9995,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 0.0001,
"loss": 70.6458,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.994977448744865e-05,
"loss": 73.9929,
"step": 550
},
{
"epoch": 0.04,
"learning_rate": 9.989954897489729e-05,
"loss": 66.52,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.984932346234594e-05,
"loss": 65.8947,
"step": 650
},
{
"epoch": 0.05,
"learning_rate": 9.979909794979458e-05,
"loss": 62.5809,
"step": 700
},
{
"epoch": 0.06,
"learning_rate": 9.974887243724323e-05,
"loss": 61.212,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 9.969864692469187e-05,
"loss": 68.2408,
"step": 800
},
{
"epoch": 0.06,
"learning_rate": 9.964842141214051e-05,
"loss": 61.5308,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 9.959819589958916e-05,
"loss": 58.9116,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 9.95479703870378e-05,
"loss": 60.0702,
"step": 950
},
{
"epoch": 0.07,
"learning_rate": 9.949774487448646e-05,
"loss": 57.6135,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 9.944751936193509e-05,
"loss": 50.9231,
"step": 1050
},
{
"epoch": 0.08,
"learning_rate": 9.939729384938373e-05,
"loss": 51.187,
"step": 1100
},
{
"epoch": 0.08,
"learning_rate": 9.934706833683238e-05,
"loss": 52.1127,
"step": 1150
},
{
"epoch": 0.09,
"learning_rate": 9.929684282428102e-05,
"loss": 47.4608,
"step": 1200
},
{
"epoch": 0.09,
"learning_rate": 9.924661731172968e-05,
"loss": 51.6108,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 9.919639179917831e-05,
"loss": 46.5874,
"step": 1300
},
{
"epoch": 0.1,
"learning_rate": 9.914616628662697e-05,
"loss": 41.4706,
"step": 1350
},
{
"epoch": 0.1,
"learning_rate": 9.90959407740756e-05,
"loss": 43.7544,
"step": 1400
},
{
"epoch": 0.11,
"learning_rate": 9.904571526152426e-05,
"loss": 44.6039,
"step": 1450
},
{
"epoch": 0.11,
"learning_rate": 9.899548974897289e-05,
"loss": 41.4384,
"step": 1500
},
{
"epoch": 0.11,
"learning_rate": 9.894526423642154e-05,
"loss": 42.8289,
"step": 1550
},
{
"epoch": 0.12,
"learning_rate": 9.889503872387019e-05,
"loss": 39.9726,
"step": 1600
},
{
"epoch": 0.12,
"learning_rate": 9.884481321131882e-05,
"loss": 43.9533,
"step": 1650
},
{
"epoch": 0.13,
"learning_rate": 9.879458769876748e-05,
"loss": 38.7605,
"step": 1700
},
{
"epoch": 0.13,
"learning_rate": 9.87443621862161e-05,
"loss": 39.5425,
"step": 1750
},
{
"epoch": 0.13,
"learning_rate": 9.869413667366476e-05,
"loss": 37.588,
"step": 1800
},
{
"epoch": 0.14,
"learning_rate": 9.86439111611134e-05,
"loss": 39.7744,
"step": 1850
},
{
"epoch": 0.14,
"learning_rate": 9.859368564856205e-05,
"loss": 38.2154,
"step": 1900
},
{
"epoch": 0.14,
"learning_rate": 9.85434601360107e-05,
"loss": 35.0806,
"step": 1950
},
{
"epoch": 0.15,
"learning_rate": 9.849323462345934e-05,
"loss": 39.061,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 9.844300911090798e-05,
"loss": 35.1544,
"step": 2050
},
{
"epoch": 0.15,
"learning_rate": 9.839278359835663e-05,
"loss": 38.123,
"step": 2100
},
{
"epoch": 0.16,
"learning_rate": 9.834255808580527e-05,
"loss": 33.1144,
"step": 2150
},
{
"epoch": 0.16,
"learning_rate": 9.829233257325392e-05,
"loss": 34.3476,
"step": 2200
},
{
"epoch": 0.17,
"learning_rate": 9.824210706070256e-05,
"loss": 29.5665,
"step": 2250
},
{
"epoch": 0.17,
"learning_rate": 9.81918815481512e-05,
"loss": 35.8756,
"step": 2300
},
{
"epoch": 0.17,
"learning_rate": 9.814165603559985e-05,
"loss": 37.2579,
"step": 2350
},
{
"epoch": 0.18,
"learning_rate": 9.809143052304849e-05,
"loss": 33.6245,
"step": 2400
},
{
"epoch": 0.18,
"learning_rate": 9.804120501049714e-05,
"loss": 35.6543,
"step": 2450
},
{
"epoch": 0.18,
"learning_rate": 9.799097949794578e-05,
"loss": 36.7847,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 9.794075398539442e-05,
"loss": 33.463,
"step": 2550
},
{
"epoch": 0.19,
"learning_rate": 9.789052847284307e-05,
"loss": 32.2215,
"step": 2600
},
{
"epoch": 0.2,
"learning_rate": 9.784030296029171e-05,
"loss": 33.4301,
"step": 2650
},
{
"epoch": 0.2,
"learning_rate": 9.779007744774036e-05,
"loss": 29.9579,
"step": 2700
},
{
"epoch": 0.2,
"learning_rate": 9.773985193518901e-05,
"loss": 31.9141,
"step": 2750
},
{
"epoch": 0.21,
"learning_rate": 9.768962642263764e-05,
"loss": 33.2049,
"step": 2800
},
{
"epoch": 0.21,
"learning_rate": 9.763940091008629e-05,
"loss": 32.8774,
"step": 2850
},
{
"epoch": 0.21,
"learning_rate": 9.758917539753493e-05,
"loss": 29.0858,
"step": 2900
},
{
"epoch": 0.22,
"learning_rate": 9.753894988498358e-05,
"loss": 30.1145,
"step": 2950
},
{
"epoch": 0.22,
"learning_rate": 9.748872437243222e-05,
"loss": 27.6986,
"step": 3000
},
{
"epoch": 0.22,
"learning_rate": 9.743849885988087e-05,
"loss": 31.7807,
"step": 3050
},
{
"epoch": 0.23,
"learning_rate": 9.738827334732952e-05,
"loss": 30.5108,
"step": 3100
},
{
"epoch": 0.23,
"learning_rate": 9.733804783477815e-05,
"loss": 31.0909,
"step": 3150
},
{
"epoch": 0.24,
"learning_rate": 9.728782232222681e-05,
"loss": 27.9057,
"step": 3200
},
{
"epoch": 0.24,
"learning_rate": 9.723759680967544e-05,
"loss": 29.7323,
"step": 3250
},
{
"epoch": 0.24,
"learning_rate": 9.71873712971241e-05,
"loss": 29.7527,
"step": 3300
},
{
"epoch": 0.25,
"learning_rate": 9.713714578457273e-05,
"loss": 29.1442,
"step": 3350
},
{
"epoch": 0.25,
"learning_rate": 9.708692027202137e-05,
"loss": 30.8906,
"step": 3400
},
{
"epoch": 0.25,
"learning_rate": 9.703669475947003e-05,
"loss": 26.8419,
"step": 3450
},
{
"epoch": 0.26,
"learning_rate": 9.698646924691866e-05,
"loss": 29.2181,
"step": 3500
},
{
"epoch": 0.26,
"learning_rate": 9.693624373436732e-05,
"loss": 27.6549,
"step": 3550
},
{
"epoch": 0.27,
"learning_rate": 9.688601822181595e-05,
"loss": 34.0701,
"step": 3600
},
{
"epoch": 0.27,
"learning_rate": 9.683579270926461e-05,
"loss": 24.7487,
"step": 3650
},
{
"epoch": 0.27,
"learning_rate": 9.678556719671325e-05,
"loss": 30.0266,
"step": 3700
},
{
"epoch": 0.28,
"learning_rate": 9.67353416841619e-05,
"loss": 25.5011,
"step": 3750
},
{
"epoch": 0.28,
"learning_rate": 9.668511617161054e-05,
"loss": 26.1437,
"step": 3800
},
{
"epoch": 0.28,
"learning_rate": 9.663489065905918e-05,
"loss": 23.2303,
"step": 3850
},
{
"epoch": 0.29,
"learning_rate": 9.658466514650783e-05,
"loss": 26.357,
"step": 3900
},
{
"epoch": 0.29,
"learning_rate": 9.653443963395646e-05,
"loss": 27.2201,
"step": 3950
},
{
"epoch": 0.3,
"learning_rate": 9.648421412140512e-05,
"loss": 25.5695,
"step": 4000
},
{
"epoch": 0.3,
"learning_rate": 9.643398860885376e-05,
"loss": 24.8346,
"step": 4050
},
{
"epoch": 0.3,
"learning_rate": 9.63837630963024e-05,
"loss": 22.3957,
"step": 4100
},
{
"epoch": 0.31,
"learning_rate": 9.633353758375105e-05,
"loss": 24.9532,
"step": 4150
},
{
"epoch": 0.31,
"learning_rate": 9.628331207119969e-05,
"loss": 23.1574,
"step": 4200
},
{
"epoch": 0.31,
"learning_rate": 9.623308655864834e-05,
"loss": 23.7018,
"step": 4250
},
{
"epoch": 0.32,
"learning_rate": 9.618286104609698e-05,
"loss": 25.1433,
"step": 4300
},
{
"epoch": 0.32,
"learning_rate": 9.613263553354562e-05,
"loss": 25.0571,
"step": 4350
},
{
"epoch": 0.32,
"learning_rate": 9.608241002099427e-05,
"loss": 24.2231,
"step": 4400
},
{
"epoch": 0.33,
"learning_rate": 9.603218450844291e-05,
"loss": 23.0983,
"step": 4450
},
{
"epoch": 0.33,
"learning_rate": 9.598195899589156e-05,
"loss": 25.0078,
"step": 4500
},
{
"epoch": 0.34,
"learning_rate": 9.59317334833402e-05,
"loss": 20.6933,
"step": 4550
},
{
"epoch": 0.34,
"learning_rate": 9.588150797078884e-05,
"loss": 23.6196,
"step": 4600
},
{
"epoch": 0.34,
"learning_rate": 9.583128245823749e-05,
"loss": 25.2331,
"step": 4650
},
{
"epoch": 0.35,
"learning_rate": 9.578105694568613e-05,
"loss": 24.7932,
"step": 4700
},
{
"epoch": 0.35,
"learning_rate": 9.573083143313478e-05,
"loss": 24.3586,
"step": 4750
},
{
"epoch": 0.35,
"learning_rate": 9.568060592058342e-05,
"loss": 22.7161,
"step": 4800
},
{
"epoch": 0.36,
"learning_rate": 9.563038040803208e-05,
"loss": 22.4188,
"step": 4850
},
{
"epoch": 0.36,
"learning_rate": 9.558015489548071e-05,
"loss": 21.6516,
"step": 4900
},
{
"epoch": 0.37,
"learning_rate": 9.552992938292937e-05,
"loss": 21.78,
"step": 4950
},
{
"epoch": 0.37,
"learning_rate": 9.5479703870378e-05,
"loss": 21.0172,
"step": 5000
},
{
"epoch": 0.37,
"learning_rate": 9.542947835782665e-05,
"loss": 22.4624,
"step": 5050
},
{
"epoch": 0.38,
"learning_rate": 9.537925284527528e-05,
"loss": 23.6615,
"step": 5100
},
{
"epoch": 0.38,
"learning_rate": 9.532902733272393e-05,
"loss": 21.8091,
"step": 5150
},
{
"epoch": 0.38,
"learning_rate": 9.527880182017259e-05,
"loss": 21.4173,
"step": 5200
},
{
"epoch": 0.39,
"learning_rate": 9.522857630762122e-05,
"loss": 20.5415,
"step": 5250
},
{
"epoch": 0.39,
"learning_rate": 9.517835079506987e-05,
"loss": 21.0639,
"step": 5300
},
{
"epoch": 0.39,
"learning_rate": 9.51281252825185e-05,
"loss": 21.6078,
"step": 5350
},
{
"epoch": 0.4,
"learning_rate": 9.507789976996716e-05,
"loss": 19.4142,
"step": 5400
},
{
"epoch": 0.4,
"learning_rate": 9.50276742574158e-05,
"loss": 20.2504,
"step": 5450
},
{
"epoch": 0.41,
"learning_rate": 9.497744874486445e-05,
"loss": 23.8683,
"step": 5500
},
{
"epoch": 0.41,
"learning_rate": 9.49272232323131e-05,
"loss": 19.7559,
"step": 5550
},
{
"epoch": 0.41,
"learning_rate": 9.487699771976174e-05,
"loss": 21.1743,
"step": 5600
},
{
"epoch": 0.42,
"learning_rate": 9.482677220721038e-05,
"loss": 21.1908,
"step": 5650
},
{
"epoch": 0.42,
"learning_rate": 9.477654669465901e-05,
"loss": 20.9591,
"step": 5700
},
{
"epoch": 0.42,
"learning_rate": 9.472632118210767e-05,
"loss": 20.9036,
"step": 5750
},
{
"epoch": 0.43,
"learning_rate": 9.46760956695563e-05,
"loss": 22.249,
"step": 5800
},
{
"epoch": 0.43,
"learning_rate": 9.462587015700496e-05,
"loss": 19.1093,
"step": 5850
},
{
"epoch": 0.44,
"learning_rate": 9.45756446444536e-05,
"loss": 21.2714,
"step": 5900
},
{
"epoch": 0.44,
"learning_rate": 9.452541913190225e-05,
"loss": 21.3794,
"step": 5950
},
{
"epoch": 0.44,
"learning_rate": 9.447519361935089e-05,
"loss": 20.0326,
"step": 6000
},
{
"epoch": 0.45,
"learning_rate": 9.442496810679954e-05,
"loss": 19.8004,
"step": 6050
},
{
"epoch": 0.45,
"learning_rate": 9.437474259424818e-05,
"loss": 19.0229,
"step": 6100
},
{
"epoch": 0.45,
"learning_rate": 9.432451708169682e-05,
"loss": 17.6587,
"step": 6150
},
{
"epoch": 0.46,
"learning_rate": 9.427429156914547e-05,
"loss": 21.9247,
"step": 6200
},
{
"epoch": 0.46,
"learning_rate": 9.422406605659411e-05,
"loss": 19.743,
"step": 6250
},
{
"epoch": 0.46,
"learning_rate": 9.417384054404276e-05,
"loss": 22.9746,
"step": 6300
},
{
"epoch": 0.47,
"learning_rate": 9.41236150314914e-05,
"loss": 19.6693,
"step": 6350
},
{
"epoch": 0.47,
"learning_rate": 9.407338951894004e-05,
"loss": 19.1141,
"step": 6400
},
{
"epoch": 0.48,
"learning_rate": 9.402316400638869e-05,
"loss": 18.3847,
"step": 6450
},
{
"epoch": 0.48,
"learning_rate": 9.397293849383733e-05,
"loss": 18.9357,
"step": 6500
},
{
"epoch": 0.48,
"learning_rate": 9.392271298128598e-05,
"loss": 18.9316,
"step": 6550
},
{
"epoch": 0.49,
"learning_rate": 9.387248746873462e-05,
"loss": 20.9141,
"step": 6600
},
{
"epoch": 0.49,
"learning_rate": 9.382226195618326e-05,
"loss": 18.7472,
"step": 6650
},
{
"epoch": 0.49,
"learning_rate": 9.377203644363192e-05,
"loss": 18.8577,
"step": 6700
},
{
"epoch": 0.5,
"learning_rate": 9.372181093108055e-05,
"loss": 17.8061,
"step": 6750
},
{
"epoch": 0.5,
"learning_rate": 9.36715854185292e-05,
"loss": 19.4687,
"step": 6800
},
{
"epoch": 0.51,
"learning_rate": 9.362135990597784e-05,
"loss": 19.5103,
"step": 6850
},
{
"epoch": 0.51,
"learning_rate": 9.357113439342648e-05,
"loss": 18.5319,
"step": 6900
},
{
"epoch": 0.51,
"learning_rate": 9.352090888087514e-05,
"loss": 20.16,
"step": 6950
},
{
"epoch": 0.52,
"learning_rate": 9.347068336832377e-05,
"loss": 18.1913,
"step": 7000
},
{
"epoch": 0.52,
"learning_rate": 9.342045785577243e-05,
"loss": 21.341,
"step": 7050
},
{
"epoch": 0.52,
"learning_rate": 9.337023234322106e-05,
"loss": 16.7701,
"step": 7100
},
{
"epoch": 0.53,
"learning_rate": 9.332000683066972e-05,
"loss": 18.045,
"step": 7150
},
{
"epoch": 0.53,
"learning_rate": 9.326978131811835e-05,
"loss": 16.0393,
"step": 7200
},
{
"epoch": 0.53,
"learning_rate": 9.3219555805567e-05,
"loss": 17.4833,
"step": 7250
},
{
"epoch": 0.54,
"learning_rate": 9.316933029301565e-05,
"loss": 17.3978,
"step": 7300
},
{
"epoch": 0.54,
"learning_rate": 9.31191047804643e-05,
"loss": 18.2649,
"step": 7350
},
{
"epoch": 0.55,
"learning_rate": 9.306887926791294e-05,
"loss": 16.3891,
"step": 7400
},
{
"epoch": 0.55,
"learning_rate": 9.301865375536157e-05,
"loss": 21.4399,
"step": 7450
},
{
"epoch": 0.55,
"learning_rate": 9.296842824281023e-05,
"loss": 16.3082,
"step": 7500
},
{
"epoch": 0.56,
"learning_rate": 9.291820273025886e-05,
"loss": 14.8713,
"step": 7550
},
{
"epoch": 0.56,
"learning_rate": 9.286797721770751e-05,
"loss": 16.3099,
"step": 7600
},
{
"epoch": 0.56,
"learning_rate": 9.281775170515616e-05,
"loss": 17.8771,
"step": 7650
},
{
"epoch": 0.57,
"learning_rate": 9.27675261926048e-05,
"loss": 17.1421,
"step": 7700
},
{
"epoch": 0.57,
"learning_rate": 9.271730068005345e-05,
"loss": 16.6478,
"step": 7750
},
{
"epoch": 0.58,
"learning_rate": 9.266707516750209e-05,
"loss": 15.3247,
"step": 7800
},
{
"epoch": 0.58,
"learning_rate": 9.261684965495073e-05,
"loss": 17.6577,
"step": 7850
},
{
"epoch": 0.58,
"learning_rate": 9.256662414239938e-05,
"loss": 18.8549,
"step": 7900
},
{
"epoch": 0.59,
"learning_rate": 9.251639862984802e-05,
"loss": 17.4187,
"step": 7950
},
{
"epoch": 0.59,
"learning_rate": 9.246617311729667e-05,
"loss": 15.6643,
"step": 8000
},
{
"epoch": 0.59,
"learning_rate": 9.241594760474531e-05,
"loss": 17.1987,
"step": 8050
},
{
"epoch": 0.6,
"learning_rate": 9.236572209219396e-05,
"loss": 18.1712,
"step": 8100
},
{
"epoch": 0.6,
"learning_rate": 9.23154965796426e-05,
"loss": 15.8015,
"step": 8150
},
{
"epoch": 0.6,
"learning_rate": 9.226527106709124e-05,
"loss": 19.064,
"step": 8200
},
{
"epoch": 0.61,
"learning_rate": 9.221504555453989e-05,
"loss": 18.2748,
"step": 8250
},
{
"epoch": 0.61,
"learning_rate": 9.216482004198853e-05,
"loss": 15.0679,
"step": 8300
},
{
"epoch": 0.62,
"learning_rate": 9.211459452943718e-05,
"loss": 17.995,
"step": 8350
},
{
"epoch": 0.62,
"learning_rate": 9.206436901688582e-05,
"loss": 17.467,
"step": 8400
},
{
"epoch": 0.62,
"learning_rate": 9.201414350433448e-05,
"loss": 18.6665,
"step": 8450
},
{
"epoch": 0.63,
"learning_rate": 9.196391799178311e-05,
"loss": 17.2848,
"step": 8500
},
{
"epoch": 0.63,
"learning_rate": 9.191369247923175e-05,
"loss": 14.4767,
"step": 8550
},
{
"epoch": 0.63,
"learning_rate": 9.18634669666804e-05,
"loss": 17.5444,
"step": 8600
},
{
"epoch": 0.64,
"learning_rate": 9.181324145412904e-05,
"loss": 14.4661,
"step": 8650
},
{
"epoch": 0.64,
"learning_rate": 9.176301594157768e-05,
"loss": 16.3339,
"step": 8700
},
{
"epoch": 0.65,
"learning_rate": 9.171279042902633e-05,
"loss": 17.5122,
"step": 8750
},
{
"epoch": 0.65,
"learning_rate": 9.166256491647499e-05,
"loss": 16.7631,
"step": 8800
},
{
"epoch": 0.65,
"learning_rate": 9.161233940392362e-05,
"loss": 16.5193,
"step": 8850
},
{
"epoch": 0.66,
"learning_rate": 9.156211389137227e-05,
"loss": 17.8364,
"step": 8900
},
{
"epoch": 0.66,
"learning_rate": 9.15118883788209e-05,
"loss": 16.2916,
"step": 8950
},
{
"epoch": 0.66,
"learning_rate": 9.146166286626956e-05,
"loss": 14.1719,
"step": 9000
},
{
"epoch": 0.67,
"learning_rate": 9.141143735371819e-05,
"loss": 18.2987,
"step": 9050
},
{
"epoch": 0.67,
"learning_rate": 9.136121184116684e-05,
"loss": 17.4248,
"step": 9100
},
{
"epoch": 0.67,
"learning_rate": 9.13109863286155e-05,
"loss": 16.1862,
"step": 9150
},
{
"epoch": 0.68,
"learning_rate": 9.126076081606412e-05,
"loss": 16.3134,
"step": 9200
},
{
"epoch": 0.68,
"learning_rate": 9.121053530351278e-05,
"loss": 14.9158,
"step": 9250
},
{
"epoch": 0.69,
"learning_rate": 9.116030979096141e-05,
"loss": 15.2504,
"step": 9300
},
{
"epoch": 0.69,
"learning_rate": 9.111008427841007e-05,
"loss": 14.1967,
"step": 9350
},
{
"epoch": 0.69,
"learning_rate": 9.105985876585871e-05,
"loss": 17.3165,
"step": 9400
},
{
"epoch": 0.7,
"learning_rate": 9.100963325330736e-05,
"loss": 14.5912,
"step": 9450
},
{
"epoch": 0.7,
"learning_rate": 9.0959407740756e-05,
"loss": 17.5593,
"step": 9500
},
{
"epoch": 0.7,
"learning_rate": 9.090918222820465e-05,
"loss": 16.3421,
"step": 9550
},
{
"epoch": 0.71,
"learning_rate": 9.085895671565329e-05,
"loss": 16.2821,
"step": 9600
},
{
"epoch": 0.71,
"learning_rate": 9.080873120310192e-05,
"loss": 16.4985,
"step": 9650
},
{
"epoch": 0.72,
"learning_rate": 9.075850569055058e-05,
"loss": 16.1138,
"step": 9700
},
{
"epoch": 0.72,
"learning_rate": 9.070828017799922e-05,
"loss": 16.3997,
"step": 9750
},
{
"epoch": 0.72,
"learning_rate": 9.065805466544787e-05,
"loss": 15.518,
"step": 9800
},
{
"epoch": 0.73,
"learning_rate": 9.060782915289651e-05,
"loss": 13.8424,
"step": 9850
},
{
"epoch": 0.73,
"learning_rate": 9.055760364034515e-05,
"loss": 15.0784,
"step": 9900
},
{
"epoch": 0.73,
"learning_rate": 9.05073781277938e-05,
"loss": 14.0163,
"step": 9950
},
{
"epoch": 0.74,
"learning_rate": 9.045715261524244e-05,
"loss": 16.7863,
"step": 10000
},
{
"epoch": 0.74,
"learning_rate": 9.040692710269109e-05,
"loss": 13.6715,
"step": 10050
},
{
"epoch": 0.75,
"learning_rate": 9.035670159013973e-05,
"loss": 15.1071,
"step": 10100
},
{
"epoch": 0.75,
"learning_rate": 9.030647607758837e-05,
"loss": 14.2658,
"step": 10150
},
{
"epoch": 0.75,
"learning_rate": 9.025625056503703e-05,
"loss": 15.1115,
"step": 10200
},
{
"epoch": 0.76,
"learning_rate": 9.020602505248566e-05,
"loss": 14.028,
"step": 10250
},
{
"epoch": 0.76,
"learning_rate": 9.015579953993431e-05,
"loss": 13.3066,
"step": 10300
},
{
"epoch": 0.76,
"learning_rate": 9.010557402738295e-05,
"loss": 14.1185,
"step": 10350
},
{
"epoch": 0.77,
"learning_rate": 9.00553485148316e-05,
"loss": 14.061,
"step": 10400
},
{
"epoch": 0.77,
"learning_rate": 9.000512300228024e-05,
"loss": 15.2439,
"step": 10450
},
{
"epoch": 0.77,
"learning_rate": 8.995489748972888e-05,
"loss": 13.3617,
"step": 10500
},
{
"epoch": 0.78,
"learning_rate": 8.990467197717754e-05,
"loss": 14.5514,
"step": 10550
},
{
"epoch": 0.78,
"learning_rate": 8.985444646462617e-05,
"loss": 15.2426,
"step": 10600
},
{
"epoch": 0.79,
"learning_rate": 8.980422095207483e-05,
"loss": 16.6418,
"step": 10650
},
{
"epoch": 0.79,
"learning_rate": 8.975399543952346e-05,
"loss": 13.3146,
"step": 10700
},
{
"epoch": 0.79,
"learning_rate": 8.970376992697212e-05,
"loss": 14.9333,
"step": 10750
},
{
"epoch": 0.8,
"learning_rate": 8.965354441442075e-05,
"loss": 14.4502,
"step": 10800
},
{
"epoch": 0.8,
"learning_rate": 8.960331890186939e-05,
"loss": 14.7886,
"step": 10850
},
{
"epoch": 0.8,
"learning_rate": 8.955309338931805e-05,
"loss": 15.0266,
"step": 10900
},
{
"epoch": 0.81,
"learning_rate": 8.950286787676668e-05,
"loss": 14.543,
"step": 10950
},
{
"epoch": 0.81,
"learning_rate": 8.945264236421534e-05,
"loss": 15.8078,
"step": 11000
},
{
"epoch": 0.82,
"learning_rate": 8.940241685166397e-05,
"loss": 13.6052,
"step": 11050
},
{
"epoch": 0.82,
"learning_rate": 8.935219133911263e-05,
"loss": 14.2995,
"step": 11100
},
{
"epoch": 0.82,
"learning_rate": 8.930196582656126e-05,
"loss": 15.732,
"step": 11150
},
{
"epoch": 0.83,
"learning_rate": 8.925174031400991e-05,
"loss": 14.0573,
"step": 11200
},
{
"epoch": 0.83,
"learning_rate": 8.920151480145856e-05,
"loss": 17.5941,
"step": 11250
},
{
"epoch": 0.83,
"learning_rate": 8.91512892889072e-05,
"loss": 14.7829,
"step": 11300
},
{
"epoch": 0.84,
"learning_rate": 8.910106377635585e-05,
"loss": 14.6669,
"step": 11350
},
{
"epoch": 0.84,
"learning_rate": 8.905083826380448e-05,
"loss": 14.3315,
"step": 11400
},
{
"epoch": 0.84,
"learning_rate": 8.900061275125313e-05,
"loss": 14.2639,
"step": 11450
},
{
"epoch": 0.85,
"learning_rate": 8.895038723870176e-05,
"loss": 14.3226,
"step": 11500
},
{
"epoch": 0.85,
"learning_rate": 8.890016172615042e-05,
"loss": 14.4975,
"step": 11550
},
{
"epoch": 0.86,
"learning_rate": 8.884993621359907e-05,
"loss": 14.8436,
"step": 11600
},
{
"epoch": 0.86,
"learning_rate": 8.879971070104771e-05,
"loss": 13.8481,
"step": 11650
},
{
"epoch": 0.86,
"learning_rate": 8.874948518849635e-05,
"loss": 12.8151,
"step": 11700
},
{
"epoch": 0.87,
"learning_rate": 8.8699259675945e-05,
"loss": 13.1659,
"step": 11750
},
{
"epoch": 0.87,
"learning_rate": 8.864903416339364e-05,
"loss": 15.0919,
"step": 11800
},
{
"epoch": 0.87,
"learning_rate": 8.859880865084229e-05,
"loss": 14.4382,
"step": 11850
},
{
"epoch": 0.88,
"learning_rate": 8.854858313829093e-05,
"loss": 14.0989,
"step": 11900
},
{
"epoch": 0.88,
"learning_rate": 8.849835762573957e-05,
"loss": 14.5763,
"step": 11950
},
{
"epoch": 0.89,
"learning_rate": 8.844813211318822e-05,
"loss": 13.4144,
"step": 12000
},
{
"epoch": 0.89,
"learning_rate": 8.839790660063686e-05,
"loss": 15.6018,
"step": 12050
},
{
"epoch": 0.89,
"learning_rate": 8.83476810880855e-05,
"loss": 14.7849,
"step": 12100
},
{
"epoch": 0.9,
"learning_rate": 8.829745557553415e-05,
"loss": 14.441,
"step": 12150
},
{
"epoch": 0.9,
"learning_rate": 8.82472300629828e-05,
"loss": 14.2135,
"step": 12200
},
{
"epoch": 0.9,
"learning_rate": 8.819700455043144e-05,
"loss": 17.1245,
"step": 12250
},
{
"epoch": 0.91,
"learning_rate": 8.814677903788008e-05,
"loss": 14.6629,
"step": 12300
},
{
"epoch": 0.91,
"learning_rate": 8.809655352532873e-05,
"loss": 16.6715,
"step": 12350
},
{
"epoch": 0.91,
"learning_rate": 8.804632801277738e-05,
"loss": 13.0133,
"step": 12400
},
{
"epoch": 0.92,
"learning_rate": 8.799610250022601e-05,
"loss": 14.1551,
"step": 12450
},
{
"epoch": 0.92,
"learning_rate": 8.794587698767466e-05,
"loss": 14.019,
"step": 12500
},
{
"epoch": 0.93,
"learning_rate": 8.78956514751233e-05,
"loss": 14.4279,
"step": 12550
},
{
"epoch": 0.93,
"learning_rate": 8.784542596257195e-05,
"loss": 12.5293,
"step": 12600
},
{
"epoch": 0.93,
"learning_rate": 8.77952004500206e-05,
"loss": 15.0403,
"step": 12650
},
{
"epoch": 0.94,
"learning_rate": 8.774497493746924e-05,
"loss": 13.8193,
"step": 12700
},
{
"epoch": 0.94,
"learning_rate": 8.769474942491789e-05,
"loss": 13.1564,
"step": 12750
},
{
"epoch": 0.94,
"learning_rate": 8.764452391236652e-05,
"loss": 14.6415,
"step": 12800
},
{
"epoch": 0.95,
"learning_rate": 8.759429839981518e-05,
"loss": 12.2339,
"step": 12850
},
{
"epoch": 0.95,
"learning_rate": 8.754407288726381e-05,
"loss": 12.1604,
"step": 12900
},
{
"epoch": 0.96,
"learning_rate": 8.749384737471247e-05,
"loss": 15.4939,
"step": 12950
},
{
"epoch": 0.96,
"learning_rate": 8.744362186216111e-05,
"loss": 13.9713,
"step": 13000
},
{
"epoch": 0.96,
"learning_rate": 8.739339634960976e-05,
"loss": 14.0986,
"step": 13050
},
{
"epoch": 0.97,
"learning_rate": 8.73431708370584e-05,
"loss": 13.6334,
"step": 13100
},
{
"epoch": 0.97,
"learning_rate": 8.729294532450703e-05,
"loss": 13.5201,
"step": 13150
},
{
"epoch": 0.97,
"learning_rate": 8.724271981195569e-05,
"loss": 14.3793,
"step": 13200
},
{
"epoch": 0.98,
"learning_rate": 8.719249429940432e-05,
"loss": 13.1741,
"step": 13250
},
{
"epoch": 0.98,
"learning_rate": 8.714226878685298e-05,
"loss": 11.7782,
"step": 13300
},
{
"epoch": 0.98,
"learning_rate": 8.709204327430162e-05,
"loss": 12.2758,
"step": 13350
},
{
"epoch": 0.99,
"learning_rate": 8.704181776175027e-05,
"loss": 13.1723,
"step": 13400
},
{
"epoch": 0.99,
"learning_rate": 8.699159224919891e-05,
"loss": 14.0858,
"step": 13450
},
{
"epoch": 1.0,
"learning_rate": 8.694136673664755e-05,
"loss": 11.2836,
"step": 13500
},
{
"epoch": 1.0,
"learning_rate": 8.68911412240962e-05,
"loss": 15.7226,
"step": 13550
},
{
"epoch": 1.0,
"learning_rate": 8.684091571154484e-05,
"loss": 15.8889,
"step": 13600
},
{
"epoch": 1.01,
"learning_rate": 8.679069019899349e-05,
"loss": 12.2185,
"step": 13650
},
{
"epoch": 1.01,
"learning_rate": 8.674046468644213e-05,
"loss": 11.4647,
"step": 13700
},
{
"epoch": 1.01,
"learning_rate": 8.669023917389077e-05,
"loss": 13.1238,
"step": 13750
},
{
"epoch": 1.02,
"learning_rate": 8.664001366133942e-05,
"loss": 11.909,
"step": 13800
},
{
"epoch": 1.02,
"learning_rate": 8.658978814878806e-05,
"loss": 12.5478,
"step": 13850
},
{
"epoch": 1.03,
"learning_rate": 8.65395626362367e-05,
"loss": 13.017,
"step": 13900
},
{
"epoch": 1.03,
"learning_rate": 8.648933712368535e-05,
"loss": 12.9134,
"step": 13950
},
{
"epoch": 1.03,
"learning_rate": 8.6439111611134e-05,
"loss": 13.3485,
"step": 14000
},
{
"epoch": 1.04,
"learning_rate": 8.638888609858264e-05,
"loss": 11.4706,
"step": 14050
},
{
"epoch": 1.04,
"learning_rate": 8.633866058603128e-05,
"loss": 11.1063,
"step": 14100
},
{
"epoch": 1.04,
"learning_rate": 8.628843507347994e-05,
"loss": 12.7408,
"step": 14150
},
{
"epoch": 1.05,
"learning_rate": 8.623820956092857e-05,
"loss": 12.0689,
"step": 14200
},
{
"epoch": 1.05,
"learning_rate": 8.618798404837721e-05,
"loss": 11.0724,
"step": 14250
},
{
"epoch": 1.05,
"learning_rate": 8.613775853582586e-05,
"loss": 12.5685,
"step": 14300
},
{
"epoch": 1.06,
"learning_rate": 8.60875330232745e-05,
"loss": 12.7776,
"step": 14350
},
{
"epoch": 1.06,
"learning_rate": 8.603730751072315e-05,
"loss": 11.3066,
"step": 14400
},
{
"epoch": 1.07,
"learning_rate": 8.598708199817179e-05,
"loss": 13.06,
"step": 14450
},
{
"epoch": 1.07,
"learning_rate": 8.593685648562045e-05,
"loss": 15.6523,
"step": 14500
},
{
"epoch": 1.07,
"learning_rate": 8.588663097306908e-05,
"loss": 12.019,
"step": 14550
},
{
"epoch": 1.08,
"learning_rate": 8.583640546051774e-05,
"loss": 11.0941,
"step": 14600
},
{
"epoch": 1.08,
"learning_rate": 8.578617994796637e-05,
"loss": 12.4755,
"step": 14650
},
{
"epoch": 1.08,
"learning_rate": 8.573595443541502e-05,
"loss": 13.7012,
"step": 14700
},
{
"epoch": 1.09,
"learning_rate": 8.568572892286366e-05,
"loss": 12.2024,
"step": 14750
},
{
"epoch": 1.09,
"learning_rate": 8.56355034103123e-05,
"loss": 12.4744,
"step": 14800
},
{
"epoch": 1.1,
"learning_rate": 8.558527789776096e-05,
"loss": 12.3234,
"step": 14850
},
{
"epoch": 1.1,
"learning_rate": 8.553505238520959e-05,
"loss": 12.5616,
"step": 14900
},
{
"epoch": 1.1,
"learning_rate": 8.548482687265824e-05,
"loss": 11.9559,
"step": 14950
},
{
"epoch": 1.11,
"learning_rate": 8.543460136010688e-05,
"loss": 12.0734,
"step": 15000
},
{
"epoch": 1.11,
"learning_rate": 8.538437584755553e-05,
"loss": 13.0341,
"step": 15050
},
{
"epoch": 1.11,
"learning_rate": 8.533415033500418e-05,
"loss": 12.7406,
"step": 15100
},
{
"epoch": 1.12,
"learning_rate": 8.528392482245282e-05,
"loss": 11.7258,
"step": 15150
},
{
"epoch": 1.12,
"learning_rate": 8.523369930990147e-05,
"loss": 11.8709,
"step": 15200
},
{
"epoch": 1.12,
"learning_rate": 8.518347379735011e-05,
"loss": 11.7021,
"step": 15250
},
{
"epoch": 1.13,
"learning_rate": 8.513324828479875e-05,
"loss": 13.2674,
"step": 15300
},
{
"epoch": 1.13,
"learning_rate": 8.508302277224738e-05,
"loss": 11.9099,
"step": 15350
},
{
"epoch": 1.14,
"learning_rate": 8.503279725969604e-05,
"loss": 11.7841,
"step": 15400
},
{
"epoch": 1.14,
"learning_rate": 8.498257174714469e-05,
"loss": 11.9573,
"step": 15450
},
{
"epoch": 1.14,
"learning_rate": 8.493234623459333e-05,
"loss": 11.7211,
"step": 15500
},
{
"epoch": 1.15,
"learning_rate": 8.488212072204197e-05,
"loss": 12.3513,
"step": 15550
},
{
"epoch": 1.15,
"learning_rate": 8.483189520949062e-05,
"loss": 11.0709,
"step": 15600
},
{
"epoch": 1.15,
"learning_rate": 8.478166969693926e-05,
"loss": 11.6544,
"step": 15650
},
{
"epoch": 1.16,
"learning_rate": 8.47314441843879e-05,
"loss": 11.8285,
"step": 15700
},
{
"epoch": 1.16,
"learning_rate": 8.468121867183655e-05,
"loss": 10.4208,
"step": 15750
},
{
"epoch": 1.17,
"learning_rate": 8.46309931592852e-05,
"loss": 10.7821,
"step": 15800
},
{
"epoch": 1.17,
"learning_rate": 8.458076764673384e-05,
"loss": 13.2724,
"step": 15850
},
{
"epoch": 1.17,
"learning_rate": 8.45305421341825e-05,
"loss": 10.9219,
"step": 15900
},
{
"epoch": 1.18,
"learning_rate": 8.448031662163113e-05,
"loss": 12.2532,
"step": 15950
},
{
"epoch": 1.18,
"learning_rate": 8.443009110907977e-05,
"loss": 11.0132,
"step": 16000
},
{
"epoch": 1.18,
"learning_rate": 8.437986559652841e-05,
"loss": 12.319,
"step": 16050
},
{
"epoch": 1.19,
"learning_rate": 8.432964008397706e-05,
"loss": 12.9871,
"step": 16100
},
{
"epoch": 1.19,
"learning_rate": 8.42794145714257e-05,
"loss": 12.0625,
"step": 16150
},
{
"epoch": 1.19,
"learning_rate": 8.422918905887435e-05,
"loss": 13.4629,
"step": 16200
},
{
"epoch": 1.2,
"learning_rate": 8.4178963546323e-05,
"loss": 10.9291,
"step": 16250
},
{
"epoch": 1.2,
"learning_rate": 8.412873803377163e-05,
"loss": 13.7719,
"step": 16300
},
{
"epoch": 1.21,
"learning_rate": 8.407851252122029e-05,
"loss": 11.3634,
"step": 16350
},
{
"epoch": 1.21,
"learning_rate": 8.402828700866892e-05,
"loss": 12.7941,
"step": 16400
},
{
"epoch": 1.21,
"learning_rate": 8.397806149611758e-05,
"loss": 11.8863,
"step": 16450
},
{
"epoch": 1.22,
"learning_rate": 8.392783598356621e-05,
"loss": 9.5225,
"step": 16500
},
{
"epoch": 1.22,
"learning_rate": 8.387761047101485e-05,
"loss": 12.983,
"step": 16550
},
{
"epoch": 1.22,
"learning_rate": 8.382738495846351e-05,
"loss": 11.8489,
"step": 16600
},
{
"epoch": 1.23,
"learning_rate": 8.377715944591214e-05,
"loss": 11.8122,
"step": 16650
},
{
"epoch": 1.23,
"learning_rate": 8.37269339333608e-05,
"loss": 12.3387,
"step": 16700
},
{
"epoch": 1.24,
"learning_rate": 8.367670842080943e-05,
"loss": 13.4648,
"step": 16750
},
{
"epoch": 1.24,
"learning_rate": 8.362648290825809e-05,
"loss": 10.2301,
"step": 16800
},
{
"epoch": 1.24,
"learning_rate": 8.357625739570672e-05,
"loss": 11.492,
"step": 16850
},
{
"epoch": 1.25,
"learning_rate": 8.352603188315538e-05,
"loss": 12.5997,
"step": 16900
},
{
"epoch": 1.25,
"learning_rate": 8.347580637060402e-05,
"loss": 11.5588,
"step": 16950
},
{
"epoch": 1.25,
"learning_rate": 8.342558085805266e-05,
"loss": 11.8627,
"step": 17000
},
{
"epoch": 1.26,
"learning_rate": 8.337535534550131e-05,
"loss": 13.2469,
"step": 17050
},
{
"epoch": 1.26,
"learning_rate": 8.332512983294994e-05,
"loss": 10.4327,
"step": 17100
},
{
"epoch": 1.27,
"learning_rate": 8.32749043203986e-05,
"loss": 12.7566,
"step": 17150
},
{
"epoch": 1.27,
"learning_rate": 8.322467880784723e-05,
"loss": 11.0729,
"step": 17200
},
{
"epoch": 1.27,
"learning_rate": 8.317445329529588e-05,
"loss": 12.3484,
"step": 17250
},
{
"epoch": 1.28,
"learning_rate": 8.312422778274453e-05,
"loss": 10.5193,
"step": 17300
},
{
"epoch": 1.28,
"learning_rate": 8.307400227019317e-05,
"loss": 12.2369,
"step": 17350
},
{
"epoch": 1.28,
"learning_rate": 8.302377675764182e-05,
"loss": 12.2976,
"step": 17400
},
{
"epoch": 1.29,
"learning_rate": 8.297355124509046e-05,
"loss": 12.3852,
"step": 17450
},
{
"epoch": 1.29,
"learning_rate": 8.29233257325391e-05,
"loss": 11.2137,
"step": 17500
},
{
"epoch": 1.29,
"learning_rate": 8.287310021998775e-05,
"loss": 11.609,
"step": 17550
},
{
"epoch": 1.3,
"learning_rate": 8.282287470743639e-05,
"loss": 13.3339,
"step": 17600
},
{
"epoch": 1.3,
"learning_rate": 8.277264919488504e-05,
"loss": 11.4263,
"step": 17650
},
{
"epoch": 1.31,
"learning_rate": 8.272242368233368e-05,
"loss": 12.6949,
"step": 17700
},
{
"epoch": 1.31,
"learning_rate": 8.267219816978233e-05,
"loss": 11.4767,
"step": 17750
},
{
"epoch": 1.31,
"learning_rate": 8.262197265723097e-05,
"loss": 12.2225,
"step": 17800
},
{
"epoch": 1.32,
"learning_rate": 8.257174714467961e-05,
"loss": 11.0755,
"step": 17850
},
{
"epoch": 1.32,
"learning_rate": 8.252152163212826e-05,
"loss": 11.9677,
"step": 17900
},
{
"epoch": 1.32,
"learning_rate": 8.24712961195769e-05,
"loss": 11.098,
"step": 17950
},
{
"epoch": 1.33,
"learning_rate": 8.242107060702555e-05,
"loss": 11.1102,
"step": 18000
},
{
"epoch": 1.33,
"learning_rate": 8.237084509447419e-05,
"loss": 11.4985,
"step": 18050
},
{
"epoch": 1.34,
"learning_rate": 8.232061958192285e-05,
"loss": 11.7356,
"step": 18100
},
{
"epoch": 1.34,
"learning_rate": 8.227039406937148e-05,
"loss": 11.3336,
"step": 18150
},
{
"epoch": 1.34,
"learning_rate": 8.222016855682012e-05,
"loss": 11.0448,
"step": 18200
},
{
"epoch": 1.35,
"learning_rate": 8.216994304426877e-05,
"loss": 10.9986,
"step": 18250
},
{
"epoch": 1.35,
"learning_rate": 8.211971753171741e-05,
"loss": 10.768,
"step": 18300
},
{
"epoch": 1.35,
"learning_rate": 8.206949201916607e-05,
"loss": 11.6844,
"step": 18350
},
{
"epoch": 1.36,
"learning_rate": 8.20192665066147e-05,
"loss": 11.5615,
"step": 18400
},
{
"epoch": 1.36,
"learning_rate": 8.196904099406336e-05,
"loss": 11.4019,
"step": 18450
},
{
"epoch": 1.36,
"learning_rate": 8.191881548151199e-05,
"loss": 12.1784,
"step": 18500
},
{
"epoch": 1.37,
"learning_rate": 8.186858996896064e-05,
"loss": 12.4565,
"step": 18550
},
{
"epoch": 1.37,
"learning_rate": 8.181836445640927e-05,
"loss": 11.0557,
"step": 18600
},
{
"epoch": 1.38,
"learning_rate": 8.176813894385793e-05,
"loss": 12.1892,
"step": 18650
},
{
"epoch": 1.38,
"learning_rate": 8.171791343130658e-05,
"loss": 12.0531,
"step": 18700
},
{
"epoch": 1.38,
"learning_rate": 8.166768791875522e-05,
"loss": 10.1791,
"step": 18750
},
{
"epoch": 1.39,
"learning_rate": 8.161746240620386e-05,
"loss": 11.2501,
"step": 18800
},
{
"epoch": 1.39,
"learning_rate": 8.15672368936525e-05,
"loss": 9.92,
"step": 18850
},
{
"epoch": 1.39,
"learning_rate": 8.151701138110115e-05,
"loss": 10.0603,
"step": 18900
},
{
"epoch": 1.4,
"learning_rate": 8.146678586854978e-05,
"loss": 10.9477,
"step": 18950
},
{
"epoch": 1.4,
"learning_rate": 8.141656035599844e-05,
"loss": 9.7579,
"step": 19000
},
{
"epoch": 1.41,
"learning_rate": 8.136633484344708e-05,
"loss": 11.243,
"step": 19050
},
{
"epoch": 1.41,
"learning_rate": 8.131610933089573e-05,
"loss": 11.0069,
"step": 19100
},
{
"epoch": 1.41,
"learning_rate": 8.126588381834437e-05,
"loss": 9.7387,
"step": 19150
},
{
"epoch": 1.42,
"learning_rate": 8.121565830579302e-05,
"loss": 11.4624,
"step": 19200
},
{
"epoch": 1.42,
"learning_rate": 8.116543279324166e-05,
"loss": 12.1299,
"step": 19250
},
{
"epoch": 1.42,
"learning_rate": 8.11152072806903e-05,
"loss": 12.2796,
"step": 19300
},
{
"epoch": 1.43,
"learning_rate": 8.106498176813895e-05,
"loss": 10.3295,
"step": 19350
},
{
"epoch": 1.43,
"learning_rate": 8.101475625558759e-05,
"loss": 10.0709,
"step": 19400
},
{
"epoch": 1.43,
"learning_rate": 8.096453074303624e-05,
"loss": 11.0725,
"step": 19450
},
{
"epoch": 1.44,
"learning_rate": 8.091430523048488e-05,
"loss": 10.7882,
"step": 19500
},
{
"epoch": 1.44,
"learning_rate": 8.086407971793352e-05,
"loss": 11.4124,
"step": 19550
},
{
"epoch": 1.45,
"learning_rate": 8.081385420538217e-05,
"loss": 10.4941,
"step": 19600
},
{
"epoch": 1.45,
"learning_rate": 8.076362869283081e-05,
"loss": 11.8687,
"step": 19650
},
{
"epoch": 1.45,
"learning_rate": 8.071340318027946e-05,
"loss": 11.3221,
"step": 19700
},
{
"epoch": 1.46,
"learning_rate": 8.06631776677281e-05,
"loss": 10.2167,
"step": 19750
},
{
"epoch": 1.46,
"learning_rate": 8.061295215517675e-05,
"loss": 10.5425,
"step": 19800
},
{
"epoch": 1.46,
"learning_rate": 8.05627266426254e-05,
"loss": 11.2982,
"step": 19850
},
{
"epoch": 1.47,
"learning_rate": 8.051250113007403e-05,
"loss": 12.0685,
"step": 19900
},
{
"epoch": 1.47,
"learning_rate": 8.046227561752268e-05,
"loss": 10.6613,
"step": 19950
},
{
"epoch": 1.48,
"learning_rate": 8.041205010497132e-05,
"loss": 10.8245,
"step": 20000
},
{
"epoch": 1.48,
"eval_loss": 10.409339904785156,
"eval_runtime": 890.9956,
"eval_samples_per_second": 14.7,
"eval_steps_per_second": 3.676,
"eval_wer": 0.2624627273109067,
"step": 20000
},
{
"epoch": 1.48,
"learning_rate": 8.036182459241997e-05,
"loss": 10.671,
"step": 20050
},
{
"epoch": 1.48,
"learning_rate": 8.031159907986861e-05,
"loss": 11.0263,
"step": 20100
},
{
"epoch": 1.49,
"learning_rate": 8.026137356731725e-05,
"loss": 11.0571,
"step": 20150
},
{
"epoch": 1.49,
"learning_rate": 8.021114805476591e-05,
"loss": 13.0778,
"step": 20200
},
{
"epoch": 1.49,
"learning_rate": 8.016092254221454e-05,
"loss": 11.0495,
"step": 20250
},
{
"epoch": 1.5,
"learning_rate": 8.01106970296632e-05,
"loss": 10.6039,
"step": 20300
},
{
"epoch": 1.5,
"learning_rate": 8.006047151711183e-05,
"loss": 11.4221,
"step": 20350
},
{
"epoch": 1.5,
"learning_rate": 8.001024600456049e-05,
"loss": 10.7975,
"step": 20400
},
{
"epoch": 1.51,
"learning_rate": 7.996002049200912e-05,
"loss": 10.1123,
"step": 20450
},
{
"epoch": 1.51,
"learning_rate": 7.990979497945776e-05,
"loss": 10.2241,
"step": 20500
},
{
"epoch": 1.52,
"learning_rate": 7.985956946690642e-05,
"loss": 10.0191,
"step": 20550
},
{
"epoch": 1.52,
"learning_rate": 7.980934395435505e-05,
"loss": 10.649,
"step": 20600
},
{
"epoch": 1.52,
"learning_rate": 7.975911844180371e-05,
"loss": 9.6091,
"step": 20650
},
{
"epoch": 1.53,
"learning_rate": 7.970889292925234e-05,
"loss": 9.9386,
"step": 20700
},
{
"epoch": 1.53,
"learning_rate": 7.9658667416701e-05,
"loss": 11.2646,
"step": 20750
},
{
"epoch": 1.53,
"learning_rate": 7.960844190414964e-05,
"loss": 10.0181,
"step": 20800
},
{
"epoch": 1.54,
"learning_rate": 7.955821639159828e-05,
"loss": 11.9437,
"step": 20850
},
{
"epoch": 1.54,
"learning_rate": 7.950799087904693e-05,
"loss": 10.9254,
"step": 20900
},
{
"epoch": 1.55,
"learning_rate": 7.945776536649557e-05,
"loss": 11.7954,
"step": 20950
},
{
"epoch": 1.55,
"learning_rate": 7.940753985394422e-05,
"loss": 9.6569,
"step": 21000
},
{
"epoch": 1.55,
"learning_rate": 7.935731434139286e-05,
"loss": 10.6546,
"step": 21050
},
{
"epoch": 1.56,
"learning_rate": 7.93070888288415e-05,
"loss": 10.2795,
"step": 21100
},
{
"epoch": 1.56,
"learning_rate": 7.925686331629015e-05,
"loss": 10.4595,
"step": 21150
},
{
"epoch": 1.56,
"learning_rate": 7.920663780373879e-05,
"loss": 9.2921,
"step": 21200
},
{
"epoch": 1.57,
"learning_rate": 7.915641229118744e-05,
"loss": 10.1245,
"step": 21250
},
{
"epoch": 1.57,
"learning_rate": 7.910618677863608e-05,
"loss": 11.2896,
"step": 21300
},
{
"epoch": 1.57,
"learning_rate": 7.905596126608472e-05,
"loss": 11.3328,
"step": 21350
},
{
"epoch": 1.58,
"learning_rate": 7.900573575353337e-05,
"loss": 10.0718,
"step": 21400
},
{
"epoch": 1.58,
"learning_rate": 7.895551024098201e-05,
"loss": 10.8954,
"step": 21450
},
{
"epoch": 1.59,
"learning_rate": 7.890528472843066e-05,
"loss": 10.2921,
"step": 21500
},
{
"epoch": 1.59,
"learning_rate": 7.88550592158793e-05,
"loss": 9.4609,
"step": 21550
},
{
"epoch": 1.59,
"learning_rate": 7.880483370332796e-05,
"loss": 11.4751,
"step": 21600
},
{
"epoch": 1.6,
"learning_rate": 7.875460819077659e-05,
"loss": 10.1189,
"step": 21650
},
{
"epoch": 1.6,
"learning_rate": 7.870438267822523e-05,
"loss": 11.6478,
"step": 21700
},
{
"epoch": 1.6,
"learning_rate": 7.865415716567388e-05,
"loss": 11.2943,
"step": 21750
},
{
"epoch": 1.61,
"learning_rate": 7.860393165312252e-05,
"loss": 11.5788,
"step": 21800
},
{
"epoch": 1.61,
"learning_rate": 7.855370614057116e-05,
"loss": 10.638,
"step": 21850
},
{
"epoch": 1.62,
"learning_rate": 7.850348062801981e-05,
"loss": 9.2895,
"step": 21900
},
{
"epoch": 1.62,
"learning_rate": 7.845325511546847e-05,
"loss": 11.4984,
"step": 21950
},
{
"epoch": 1.62,
"learning_rate": 7.84030296029171e-05,
"loss": 10.3685,
"step": 22000
},
{
"epoch": 1.63,
"learning_rate": 7.835280409036575e-05,
"loss": 10.0115,
"step": 22050
},
{
"epoch": 1.63,
"learning_rate": 7.830257857781439e-05,
"loss": 10.2941,
"step": 22100
},
{
"epoch": 1.63,
"learning_rate": 7.825235306526304e-05,
"loss": 10.8751,
"step": 22150
},
{
"epoch": 1.64,
"learning_rate": 7.820212755271167e-05,
"loss": 10.7477,
"step": 22200
},
{
"epoch": 1.64,
"learning_rate": 7.815190204016032e-05,
"loss": 12.2573,
"step": 22250
},
{
"epoch": 1.64,
"learning_rate": 7.810167652760897e-05,
"loss": 10.1055,
"step": 22300
},
{
"epoch": 1.65,
"learning_rate": 7.80514510150576e-05,
"loss": 10.7913,
"step": 22350
},
{
"epoch": 1.65,
"learning_rate": 7.800122550250626e-05,
"loss": 9.4701,
"step": 22400
},
{
"epoch": 1.66,
"learning_rate": 7.79509999899549e-05,
"loss": 9.9434,
"step": 22450
},
{
"epoch": 1.66,
"learning_rate": 7.790077447740355e-05,
"loss": 10.9016,
"step": 22500
},
{
"epoch": 1.66,
"learning_rate": 7.785054896485218e-05,
"loss": 10.1733,
"step": 22550
},
{
"epoch": 1.67,
"learning_rate": 7.780032345230084e-05,
"loss": 11.0693,
"step": 22600
},
{
"epoch": 1.67,
"learning_rate": 7.775009793974948e-05,
"loss": 10.4538,
"step": 22650
},
{
"epoch": 1.67,
"learning_rate": 7.769987242719813e-05,
"loss": 10.5127,
"step": 22700
},
{
"epoch": 1.68,
"learning_rate": 7.764964691464677e-05,
"loss": 10.1074,
"step": 22750
},
{
"epoch": 1.68,
"learning_rate": 7.75994214020954e-05,
"loss": 11.2803,
"step": 22800
},
{
"epoch": 1.69,
"learning_rate": 7.754919588954406e-05,
"loss": 10.9954,
"step": 22850
},
{
"epoch": 1.69,
"learning_rate": 7.749897037699269e-05,
"loss": 10.1006,
"step": 22900
},
{
"epoch": 1.69,
"learning_rate": 7.744874486444135e-05,
"loss": 10.9978,
"step": 22950
},
{
"epoch": 1.7,
"learning_rate": 7.739851935188999e-05,
"loss": 10.5885,
"step": 23000
},
{
"epoch": 1.7,
"learning_rate": 7.734829383933864e-05,
"loss": 10.5676,
"step": 23050
},
{
"epoch": 1.7,
"learning_rate": 7.729806832678728e-05,
"loss": 11.3204,
"step": 23100
},
{
"epoch": 1.71,
"learning_rate": 7.724784281423592e-05,
"loss": 10.5388,
"step": 23150
},
{
"epoch": 1.71,
"learning_rate": 7.719761730168457e-05,
"loss": 10.7915,
"step": 23200
},
{
"epoch": 1.71,
"learning_rate": 7.714739178913321e-05,
"loss": 11.9486,
"step": 23250
},
{
"epoch": 1.72,
"learning_rate": 7.709716627658186e-05,
"loss": 11.6693,
"step": 23300
},
{
"epoch": 1.72,
"learning_rate": 7.70469407640305e-05,
"loss": 9.2664,
"step": 23350
},
{
"epoch": 1.73,
"learning_rate": 7.699671525147914e-05,
"loss": 12.1429,
"step": 23400
},
{
"epoch": 1.73,
"learning_rate": 7.694648973892779e-05,
"loss": 10.1155,
"step": 23450
},
{
"epoch": 1.73,
"learning_rate": 7.689626422637643e-05,
"loss": 10.1562,
"step": 23500
},
{
"epoch": 1.74,
"learning_rate": 7.684603871382508e-05,
"loss": 11.3484,
"step": 23550
},
{
"epoch": 1.74,
"learning_rate": 7.679581320127372e-05,
"loss": 9.5912,
"step": 23600
},
{
"epoch": 1.74,
"learning_rate": 7.674558768872236e-05,
"loss": 11.1067,
"step": 23650
},
{
"epoch": 1.75,
"learning_rate": 7.669536217617101e-05,
"loss": 11.7182,
"step": 23700
},
{
"epoch": 1.75,
"learning_rate": 7.664513666361965e-05,
"loss": 10.1444,
"step": 23750
},
{
"epoch": 1.76,
"learning_rate": 7.659491115106831e-05,
"loss": 11.2671,
"step": 23800
},
{
"epoch": 1.76,
"learning_rate": 7.654468563851694e-05,
"loss": 10.9027,
"step": 23850
},
{
"epoch": 1.76,
"learning_rate": 7.64944601259656e-05,
"loss": 10.9078,
"step": 23900
},
{
"epoch": 1.77,
"learning_rate": 7.644423461341423e-05,
"loss": 10.5441,
"step": 23950
},
{
"epoch": 1.77,
"learning_rate": 7.639400910086287e-05,
"loss": 9.8617,
"step": 24000
},
{
"epoch": 1.77,
"learning_rate": 7.634378358831153e-05,
"loss": 10.8022,
"step": 24050
},
{
"epoch": 1.78,
"learning_rate": 7.629355807576016e-05,
"loss": 10.3082,
"step": 24100
},
{
"epoch": 1.78,
"learning_rate": 7.624333256320882e-05,
"loss": 9.8398,
"step": 24150
},
{
"epoch": 1.79,
"learning_rate": 7.619310705065745e-05,
"loss": 10.3631,
"step": 24200
},
{
"epoch": 1.79,
"learning_rate": 7.61428815381061e-05,
"loss": 10.6078,
"step": 24250
},
{
"epoch": 1.79,
"learning_rate": 7.609265602555474e-05,
"loss": 11.366,
"step": 24300
},
{
"epoch": 1.8,
"learning_rate": 7.60424305130034e-05,
"loss": 12.1154,
"step": 24350
},
{
"epoch": 1.8,
"learning_rate": 7.599220500045204e-05,
"loss": 11.3429,
"step": 24400
},
{
"epoch": 1.8,
"learning_rate": 7.594197948790068e-05,
"loss": 9.135,
"step": 24450
},
{
"epoch": 1.81,
"learning_rate": 7.589175397534933e-05,
"loss": 10.3796,
"step": 24500
},
{
"epoch": 1.81,
"learning_rate": 7.584152846279796e-05,
"loss": 10.6452,
"step": 24550
},
{
"epoch": 1.81,
"learning_rate": 7.579130295024661e-05,
"loss": 9.6237,
"step": 24600
},
{
"epoch": 1.82,
"learning_rate": 7.574107743769525e-05,
"loss": 10.7158,
"step": 24650
},
{
"epoch": 1.82,
"learning_rate": 7.56908519251439e-05,
"loss": 9.8296,
"step": 24700
},
{
"epoch": 1.83,
"learning_rate": 7.564062641259255e-05,
"loss": 10.1654,
"step": 24750
},
{
"epoch": 1.83,
"learning_rate": 7.559040090004119e-05,
"loss": 10.395,
"step": 24800
},
{
"epoch": 1.83,
"learning_rate": 7.554017538748984e-05,
"loss": 10.3067,
"step": 24850
},
{
"epoch": 1.84,
"learning_rate": 7.548994987493848e-05,
"loss": 10.7243,
"step": 24900
},
{
"epoch": 1.84,
"learning_rate": 7.543972436238712e-05,
"loss": 10.4022,
"step": 24950
},
{
"epoch": 1.84,
"learning_rate": 7.538949884983577e-05,
"loss": 10.5045,
"step": 25000
},
{
"epoch": 1.85,
"learning_rate": 7.533927333728441e-05,
"loss": 11.2205,
"step": 25050
},
{
"epoch": 1.85,
"learning_rate": 7.528904782473306e-05,
"loss": 10.5375,
"step": 25100
},
{
"epoch": 1.86,
"learning_rate": 7.52388223121817e-05,
"loss": 10.4876,
"step": 25150
},
{
"epoch": 1.86,
"learning_rate": 7.518859679963034e-05,
"loss": 9.2096,
"step": 25200
},
{
"epoch": 1.86,
"learning_rate": 7.513837128707899e-05,
"loss": 10.0442,
"step": 25250
},
{
"epoch": 1.87,
"learning_rate": 7.508814577452763e-05,
"loss": 9.8174,
"step": 25300
},
{
"epoch": 1.87,
"learning_rate": 7.503792026197628e-05,
"loss": 10.8789,
"step": 25350
},
{
"epoch": 1.87,
"learning_rate": 7.498769474942492e-05,
"loss": 9.8789,
"step": 25400
},
{
"epoch": 1.88,
"learning_rate": 7.493746923687356e-05,
"loss": 11.1431,
"step": 25450
},
{
"epoch": 1.88,
"learning_rate": 7.488724372432221e-05,
"loss": 10.4659,
"step": 25500
},
{
"epoch": 1.88,
"learning_rate": 7.483701821177087e-05,
"loss": 10.7342,
"step": 25550
},
{
"epoch": 1.89,
"learning_rate": 7.47867926992195e-05,
"loss": 10.7841,
"step": 25600
},
{
"epoch": 1.89,
"learning_rate": 7.473656718666814e-05,
"loss": 9.6162,
"step": 25650
},
{
"epoch": 1.9,
"learning_rate": 7.468634167411678e-05,
"loss": 10.3568,
"step": 25700
},
{
"epoch": 1.9,
"learning_rate": 7.463611616156543e-05,
"loss": 9.6701,
"step": 25750
},
{
"epoch": 1.9,
"learning_rate": 7.458589064901407e-05,
"loss": 9.4003,
"step": 25800
},
{
"epoch": 1.91,
"learning_rate": 7.453566513646272e-05,
"loss": 9.6621,
"step": 25850
},
{
"epoch": 1.91,
"learning_rate": 7.448543962391137e-05,
"loss": 10.1086,
"step": 25900
},
{
"epoch": 1.91,
"learning_rate": 7.443521411136e-05,
"loss": 11.5655,
"step": 25950
},
{
"epoch": 1.92,
"learning_rate": 7.438498859880866e-05,
"loss": 8.9418,
"step": 26000
},
{
"epoch": 1.92,
"learning_rate": 7.433476308625729e-05,
"loss": 9.2415,
"step": 26050
},
{
"epoch": 1.93,
"learning_rate": 7.428453757370595e-05,
"loss": 9.4192,
"step": 26100
},
{
"epoch": 1.93,
"learning_rate": 7.423431206115458e-05,
"loss": 9.1755,
"step": 26150
},
{
"epoch": 1.93,
"learning_rate": 7.418408654860322e-05,
"loss": 9.6327,
"step": 26200
},
{
"epoch": 1.94,
"learning_rate": 7.413386103605188e-05,
"loss": 10.3333,
"step": 26250
},
{
"epoch": 1.94,
"learning_rate": 7.408363552350051e-05,
"loss": 10.298,
"step": 26300
},
{
"epoch": 1.94,
"learning_rate": 7.403341001094917e-05,
"loss": 10.7038,
"step": 26350
},
{
"epoch": 1.95,
"learning_rate": 7.39831844983978e-05,
"loss": 10.5099,
"step": 26400
},
{
"epoch": 1.95,
"learning_rate": 7.393295898584646e-05,
"loss": 9.8063,
"step": 26450
},
{
"epoch": 1.95,
"learning_rate": 7.38827334732951e-05,
"loss": 9.5784,
"step": 26500
},
{
"epoch": 1.96,
"learning_rate": 7.383250796074375e-05,
"loss": 10.1958,
"step": 26550
},
{
"epoch": 1.96,
"learning_rate": 7.378228244819239e-05,
"loss": 9.6869,
"step": 26600
},
{
"epoch": 1.97,
"learning_rate": 7.373205693564103e-05,
"loss": 10.3761,
"step": 26650
},
{
"epoch": 1.97,
"learning_rate": 7.368183142308968e-05,
"loss": 11.6806,
"step": 26700
},
{
"epoch": 1.97,
"learning_rate": 7.363160591053832e-05,
"loss": 10.3183,
"step": 26750
},
{
"epoch": 1.98,
"learning_rate": 7.358138039798697e-05,
"loss": 11.041,
"step": 26800
},
{
"epoch": 1.98,
"learning_rate": 7.353115488543561e-05,
"loss": 9.6997,
"step": 26850
},
{
"epoch": 1.98,
"learning_rate": 7.348092937288425e-05,
"loss": 9.6029,
"step": 26900
},
{
"epoch": 1.99,
"learning_rate": 7.34307038603329e-05,
"loss": 10.3322,
"step": 26950
},
{
"epoch": 1.99,
"learning_rate": 7.338047834778154e-05,
"loss": 9.9009,
"step": 27000
},
{
"epoch": 2.0,
"learning_rate": 7.333025283523019e-05,
"loss": 10.4815,
"step": 27050
},
{
"epoch": 2.0,
"learning_rate": 7.328002732267883e-05,
"loss": 11.7049,
"step": 27100
},
{
"epoch": 2.0,
"learning_rate": 7.322980181012748e-05,
"loss": 10.7831,
"step": 27150
},
{
"epoch": 2.01,
"learning_rate": 7.317957629757612e-05,
"loss": 8.735,
"step": 27200
},
{
"epoch": 2.01,
"learning_rate": 7.312935078502476e-05,
"loss": 9.4056,
"step": 27250
},
{
"epoch": 2.01,
"learning_rate": 7.307912527247342e-05,
"loss": 10.7689,
"step": 27300
},
{
"epoch": 2.02,
"learning_rate": 7.302889975992205e-05,
"loss": 9.5266,
"step": 27350
},
{
"epoch": 2.02,
"learning_rate": 7.29786742473707e-05,
"loss": 8.2467,
"step": 27400
},
{
"epoch": 2.02,
"learning_rate": 7.292844873481934e-05,
"loss": 8.6572,
"step": 27450
},
{
"epoch": 2.03,
"learning_rate": 7.287822322226798e-05,
"loss": 8.4693,
"step": 27500
},
{
"epoch": 2.03,
"learning_rate": 7.282799770971663e-05,
"loss": 10.4867,
"step": 27550
},
{
"epoch": 2.04,
"learning_rate": 7.277777219716527e-05,
"loss": 8.9364,
"step": 27600
},
{
"epoch": 2.04,
"learning_rate": 7.272754668461393e-05,
"loss": 10.0109,
"step": 27650
},
{
"epoch": 2.04,
"learning_rate": 7.267732117206256e-05,
"loss": 9.5535,
"step": 27700
},
{
"epoch": 2.05,
"learning_rate": 7.262709565951122e-05,
"loss": 9.3029,
"step": 27750
},
{
"epoch": 2.05,
"learning_rate": 7.257687014695985e-05,
"loss": 9.854,
"step": 27800
},
{
"epoch": 2.05,
"learning_rate": 7.25266446344085e-05,
"loss": 9.5327,
"step": 27850
},
{
"epoch": 2.06,
"learning_rate": 7.247641912185714e-05,
"loss": 9.8255,
"step": 27900
},
{
"epoch": 2.06,
"learning_rate": 7.242619360930578e-05,
"loss": 9.9737,
"step": 27950
},
{
"epoch": 2.07,
"learning_rate": 7.237596809675444e-05,
"loss": 9.0471,
"step": 28000
},
{
"epoch": 2.07,
"learning_rate": 7.232574258420307e-05,
"loss": 10.0566,
"step": 28050
},
{
"epoch": 2.07,
"learning_rate": 7.227551707165173e-05,
"loss": 9.4781,
"step": 28100
},
{
"epoch": 2.08,
"learning_rate": 7.222529155910036e-05,
"loss": 8.7599,
"step": 28150
},
{
"epoch": 2.08,
"learning_rate": 7.217506604654901e-05,
"loss": 8.7605,
"step": 28200
},
{
"epoch": 2.08,
"learning_rate": 7.212484053399764e-05,
"loss": 10.061,
"step": 28250
},
{
"epoch": 2.09,
"learning_rate": 7.20746150214463e-05,
"loss": 9.6124,
"step": 28300
},
{
"epoch": 2.09,
"learning_rate": 7.202438950889495e-05,
"loss": 10.4776,
"step": 28350
},
{
"epoch": 2.09,
"learning_rate": 7.197416399634359e-05,
"loss": 9.2169,
"step": 28400
},
{
"epoch": 2.1,
"learning_rate": 7.192393848379223e-05,
"loss": 9.3654,
"step": 28450
},
{
"epoch": 2.1,
"learning_rate": 7.187371297124086e-05,
"loss": 9.4445,
"step": 28500
},
{
"epoch": 2.11,
"learning_rate": 7.182348745868952e-05,
"loss": 8.3614,
"step": 28550
},
{
"epoch": 2.11,
"learning_rate": 7.177326194613815e-05,
"loss": 9.1661,
"step": 28600
},
{
"epoch": 2.11,
"learning_rate": 7.172303643358681e-05,
"loss": 9.4976,
"step": 28650
},
{
"epoch": 2.12,
"learning_rate": 7.167281092103545e-05,
"loss": 9.125,
"step": 28700
},
{
"epoch": 2.12,
"learning_rate": 7.16225854084841e-05,
"loss": 8.9051,
"step": 28750
},
{
"epoch": 2.12,
"learning_rate": 7.157235989593274e-05,
"loss": 8.9753,
"step": 28800
},
{
"epoch": 2.13,
"learning_rate": 7.152213438338139e-05,
"loss": 9.133,
"step": 28850
},
{
"epoch": 2.13,
"learning_rate": 7.147190887083003e-05,
"loss": 9.9677,
"step": 28900
},
{
"epoch": 2.14,
"learning_rate": 7.142168335827867e-05,
"loss": 8.725,
"step": 28950
},
{
"epoch": 2.14,
"learning_rate": 7.137145784572732e-05,
"loss": 8.831,
"step": 29000
},
{
"epoch": 2.14,
"learning_rate": 7.132123233317596e-05,
"loss": 7.8207,
"step": 29050
},
{
"epoch": 2.15,
"learning_rate": 7.127100682062461e-05,
"loss": 9.3707,
"step": 29100
},
{
"epoch": 2.15,
"learning_rate": 7.122078130807325e-05,
"loss": 10.4259,
"step": 29150
},
{
"epoch": 2.15,
"learning_rate": 7.11705557955219e-05,
"loss": 8.1836,
"step": 29200
},
{
"epoch": 2.16,
"learning_rate": 7.112033028297054e-05,
"loss": 9.0874,
"step": 29250
},
{
"epoch": 2.16,
"learning_rate": 7.107010477041918e-05,
"loss": 9.5957,
"step": 29300
},
{
"epoch": 2.16,
"learning_rate": 7.101987925786783e-05,
"loss": 8.7545,
"step": 29350
},
{
"epoch": 2.17,
"learning_rate": 7.096965374531647e-05,
"loss": 8.4478,
"step": 29400
},
{
"epoch": 2.17,
"learning_rate": 7.091942823276512e-05,
"loss": 8.601,
"step": 29450
},
{
"epoch": 2.18,
"learning_rate": 7.086920272021377e-05,
"loss": 9.6172,
"step": 29500
},
{
"epoch": 2.18,
"learning_rate": 7.08189772076624e-05,
"loss": 9.0805,
"step": 29550
},
{
"epoch": 2.18,
"learning_rate": 7.076875169511106e-05,
"loss": 9.6039,
"step": 29600
},
{
"epoch": 2.19,
"learning_rate": 7.071852618255969e-05,
"loss": 9.3622,
"step": 29650
},
{
"epoch": 2.19,
"learning_rate": 7.066830067000834e-05,
"loss": 8.8765,
"step": 29700
},
{
"epoch": 2.19,
"learning_rate": 7.061807515745699e-05,
"loss": 8.992,
"step": 29750
},
{
"epoch": 2.2,
"learning_rate": 7.056784964490562e-05,
"loss": 10.3564,
"step": 29800
},
{
"epoch": 2.2,
"learning_rate": 7.051762413235428e-05,
"loss": 8.8092,
"step": 29850
},
{
"epoch": 2.21,
"learning_rate": 7.046739861980291e-05,
"loss": 9.8373,
"step": 29900
},
{
"epoch": 2.21,
"learning_rate": 7.041717310725157e-05,
"loss": 8.004,
"step": 29950
},
{
"epoch": 2.21,
"learning_rate": 7.03669475947002e-05,
"loss": 9.4461,
"step": 30000
},
{
"epoch": 2.22,
"learning_rate": 7.031672208214886e-05,
"loss": 8.4964,
"step": 30050
},
{
"epoch": 2.22,
"learning_rate": 7.02664965695975e-05,
"loss": 10.3181,
"step": 30100
},
{
"epoch": 2.22,
"learning_rate": 7.021627105704615e-05,
"loss": 8.6637,
"step": 30150
},
{
"epoch": 2.23,
"learning_rate": 7.016604554449479e-05,
"loss": 10.1703,
"step": 30200
},
{
"epoch": 2.23,
"learning_rate": 7.011582003194342e-05,
"loss": 9.2846,
"step": 30250
},
{
"epoch": 2.24,
"learning_rate": 7.006559451939208e-05,
"loss": 8.5913,
"step": 30300
},
{
"epoch": 2.24,
"learning_rate": 7.001536900684071e-05,
"loss": 9.1308,
"step": 30350
},
{
"epoch": 2.24,
"learning_rate": 6.996514349428937e-05,
"loss": 11.2229,
"step": 30400
},
{
"epoch": 2.25,
"learning_rate": 6.991491798173801e-05,
"loss": 8.5923,
"step": 30450
},
{
"epoch": 2.25,
"learning_rate": 6.986469246918665e-05,
"loss": 9.9826,
"step": 30500
},
{
"epoch": 2.25,
"learning_rate": 6.98144669566353e-05,
"loss": 8.4765,
"step": 30550
},
{
"epoch": 2.26,
"learning_rate": 6.976424144408394e-05,
"loss": 8.7624,
"step": 30600
},
{
"epoch": 2.26,
"learning_rate": 6.971401593153259e-05,
"loss": 9.238,
"step": 30650
},
{
"epoch": 2.26,
"learning_rate": 6.966379041898123e-05,
"loss": 8.4976,
"step": 30700
},
{
"epoch": 2.27,
"learning_rate": 6.961356490642987e-05,
"loss": 9.1886,
"step": 30750
},
{
"epoch": 2.27,
"learning_rate": 6.956333939387852e-05,
"loss": 8.4443,
"step": 30800
},
{
"epoch": 2.28,
"learning_rate": 6.951311388132716e-05,
"loss": 8.3648,
"step": 30850
},
{
"epoch": 2.28,
"learning_rate": 6.94628883687758e-05,
"loss": 9.2509,
"step": 30900
},
{
"epoch": 2.28,
"learning_rate": 6.941266285622445e-05,
"loss": 8.3765,
"step": 30950
},
{
"epoch": 2.29,
"learning_rate": 6.93624373436731e-05,
"loss": 9.6616,
"step": 31000
},
{
"epoch": 2.29,
"learning_rate": 6.931221183112174e-05,
"loss": 9.658,
"step": 31050
},
{
"epoch": 2.29,
"learning_rate": 6.926198631857038e-05,
"loss": 8.7527,
"step": 31100
},
{
"epoch": 2.3,
"learning_rate": 6.921176080601903e-05,
"loss": 8.7148,
"step": 31150
},
{
"epoch": 2.3,
"learning_rate": 6.916153529346767e-05,
"loss": 8.5962,
"step": 31200
},
{
"epoch": 2.31,
"learning_rate": 6.911130978091633e-05,
"loss": 9.2625,
"step": 31250
},
{
"epoch": 2.31,
"learning_rate": 6.906108426836496e-05,
"loss": 8.8352,
"step": 31300
},
{
"epoch": 2.31,
"learning_rate": 6.90108587558136e-05,
"loss": 7.3991,
"step": 31350
},
{
"epoch": 2.32,
"learning_rate": 6.896063324326225e-05,
"loss": 9.9391,
"step": 31400
},
{
"epoch": 2.32,
"learning_rate": 6.891040773071089e-05,
"loss": 8.9575,
"step": 31450
},
{
"epoch": 2.32,
"learning_rate": 6.886018221815954e-05,
"loss": 7.9103,
"step": 31500
},
{
"epoch": 2.33,
"learning_rate": 6.880995670560818e-05,
"loss": 8.5276,
"step": 31550
},
{
"epoch": 2.33,
"learning_rate": 6.875973119305684e-05,
"loss": 8.5427,
"step": 31600
},
{
"epoch": 2.33,
"learning_rate": 6.870950568050547e-05,
"loss": 8.4672,
"step": 31650
},
{
"epoch": 2.34,
"learning_rate": 6.865928016795412e-05,
"loss": 8.9638,
"step": 31700
},
{
"epoch": 2.34,
"learning_rate": 6.860905465540276e-05,
"loss": 8.3136,
"step": 31750
},
{
"epoch": 2.35,
"learning_rate": 6.855882914285141e-05,
"loss": 8.8076,
"step": 31800
},
{
"epoch": 2.35,
"learning_rate": 6.850860363030004e-05,
"loss": 8.6041,
"step": 31850
},
{
"epoch": 2.35,
"learning_rate": 6.845837811774869e-05,
"loss": 9.1751,
"step": 31900
},
{
"epoch": 2.36,
"learning_rate": 6.840815260519735e-05,
"loss": 8.5955,
"step": 31950
},
{
"epoch": 2.36,
"learning_rate": 6.835792709264598e-05,
"loss": 9.0927,
"step": 32000
},
{
"epoch": 2.36,
"learning_rate": 6.830770158009463e-05,
"loss": 7.9647,
"step": 32050
},
{
"epoch": 2.37,
"learning_rate": 6.825747606754326e-05,
"loss": 10.2647,
"step": 32100
},
{
"epoch": 2.37,
"learning_rate": 6.820725055499192e-05,
"loss": 8.3442,
"step": 32150
},
{
"epoch": 2.38,
"learning_rate": 6.815702504244057e-05,
"loss": 9.2019,
"step": 32200
},
{
"epoch": 2.38,
"learning_rate": 6.810679952988921e-05,
"loss": 8.345,
"step": 32250
},
{
"epoch": 2.38,
"learning_rate": 6.805657401733785e-05,
"loss": 9.1835,
"step": 32300
},
{
"epoch": 2.39,
"learning_rate": 6.80063485047865e-05,
"loss": 9.1846,
"step": 32350
},
{
"epoch": 2.39,
"learning_rate": 6.795612299223514e-05,
"loss": 9.0015,
"step": 32400
},
{
"epoch": 2.39,
"learning_rate": 6.790589747968379e-05,
"loss": 8.2404,
"step": 32450
},
{
"epoch": 2.4,
"learning_rate": 6.785567196713243e-05,
"loss": 8.8715,
"step": 32500
},
{
"epoch": 2.4,
"learning_rate": 6.780544645458107e-05,
"loss": 8.817,
"step": 32550
},
{
"epoch": 2.4,
"learning_rate": 6.775522094202972e-05,
"loss": 9.2154,
"step": 32600
},
{
"epoch": 2.41,
"learning_rate": 6.770499542947836e-05,
"loss": 9.1914,
"step": 32650
},
{
"epoch": 2.41,
"learning_rate": 6.7654769916927e-05,
"loss": 9.2804,
"step": 32700
},
{
"epoch": 2.42,
"learning_rate": 6.760454440437565e-05,
"loss": 9.177,
"step": 32750
},
{
"epoch": 2.42,
"learning_rate": 6.75543188918243e-05,
"loss": 8.8259,
"step": 32800
},
{
"epoch": 2.42,
"learning_rate": 6.750409337927294e-05,
"loss": 8.6121,
"step": 32850
},
{
"epoch": 2.43,
"learning_rate": 6.745386786672158e-05,
"loss": 8.644,
"step": 32900
},
{
"epoch": 2.43,
"learning_rate": 6.740364235417023e-05,
"loss": 8.5743,
"step": 32950
},
{
"epoch": 2.43,
"learning_rate": 6.735341684161888e-05,
"loss": 8.7636,
"step": 33000
},
{
"epoch": 2.44,
"learning_rate": 6.730319132906751e-05,
"loss": 8.3064,
"step": 33050
},
{
"epoch": 2.44,
"learning_rate": 6.725296581651616e-05,
"loss": 8.8806,
"step": 33100
},
{
"epoch": 2.45,
"learning_rate": 6.72027403039648e-05,
"loss": 8.8212,
"step": 33150
},
{
"epoch": 2.45,
"learning_rate": 6.715251479141345e-05,
"loss": 9.5261,
"step": 33200
},
{
"epoch": 2.45,
"learning_rate": 6.710228927886209e-05,
"loss": 9.0764,
"step": 33250
},
{
"epoch": 2.46,
"learning_rate": 6.705206376631073e-05,
"loss": 7.399,
"step": 33300
},
{
"epoch": 2.46,
"learning_rate": 6.700183825375939e-05,
"loss": 9.4119,
"step": 33350
},
{
"epoch": 2.46,
"learning_rate": 6.695161274120802e-05,
"loss": 8.4576,
"step": 33400
},
{
"epoch": 2.47,
"learning_rate": 6.690138722865668e-05,
"loss": 8.024,
"step": 33450
},
{
"epoch": 2.47,
"learning_rate": 6.685116171610531e-05,
"loss": 9.1605,
"step": 33500
},
{
"epoch": 2.47,
"learning_rate": 6.680093620355397e-05,
"loss": 8.3661,
"step": 33550
},
{
"epoch": 2.48,
"learning_rate": 6.67507106910026e-05,
"loss": 8.4145,
"step": 33600
},
{
"epoch": 2.48,
"learning_rate": 6.670048517845124e-05,
"loss": 7.824,
"step": 33650
},
{
"epoch": 2.49,
"learning_rate": 6.66502596658999e-05,
"loss": 9.129,
"step": 33700
},
{
"epoch": 2.49,
"learning_rate": 6.660003415334853e-05,
"loss": 9.0876,
"step": 33750
},
{
"epoch": 2.49,
"learning_rate": 6.654980864079719e-05,
"loss": 8.6961,
"step": 33800
},
{
"epoch": 2.5,
"learning_rate": 6.649958312824582e-05,
"loss": 8.1584,
"step": 33850
},
{
"epoch": 2.5,
"learning_rate": 6.644935761569448e-05,
"loss": 8.6587,
"step": 33900
},
{
"epoch": 2.5,
"learning_rate": 6.639913210314311e-05,
"loss": 8.1059,
"step": 33950
},
{
"epoch": 2.51,
"learning_rate": 6.634890659059176e-05,
"loss": 9.2588,
"step": 34000
},
{
"epoch": 2.51,
"learning_rate": 6.629868107804041e-05,
"loss": 8.6443,
"step": 34050
},
{
"epoch": 2.52,
"learning_rate": 6.624845556548905e-05,
"loss": 8.8006,
"step": 34100
},
{
"epoch": 2.52,
"learning_rate": 6.61982300529377e-05,
"loss": 9.2288,
"step": 34150
},
{
"epoch": 2.52,
"learning_rate": 6.614800454038633e-05,
"loss": 9.0328,
"step": 34200
},
{
"epoch": 2.53,
"learning_rate": 6.609777902783499e-05,
"loss": 7.8269,
"step": 34250
},
{
"epoch": 2.53,
"learning_rate": 6.604755351528362e-05,
"loss": 8.5883,
"step": 34300
},
{
"epoch": 2.53,
"learning_rate": 6.599732800273227e-05,
"loss": 9.9388,
"step": 34350
},
{
"epoch": 2.54,
"learning_rate": 6.594710249018092e-05,
"loss": 8.6776,
"step": 34400
},
{
"epoch": 2.54,
"learning_rate": 6.589687697762956e-05,
"loss": 7.2287,
"step": 34450
},
{
"epoch": 2.54,
"learning_rate": 6.58466514650782e-05,
"loss": 7.7042,
"step": 34500
},
{
"epoch": 2.55,
"learning_rate": 6.579642595252685e-05,
"loss": 9.0004,
"step": 34550
},
{
"epoch": 2.55,
"learning_rate": 6.57462004399755e-05,
"loss": 9.3279,
"step": 34600
},
{
"epoch": 2.56,
"learning_rate": 6.569597492742414e-05,
"loss": 8.9144,
"step": 34650
},
{
"epoch": 2.56,
"learning_rate": 6.564574941487278e-05,
"loss": 9.3319,
"step": 34700
},
{
"epoch": 2.56,
"learning_rate": 6.559552390232143e-05,
"loss": 9.4986,
"step": 34750
},
{
"epoch": 2.57,
"learning_rate": 6.554529838977007e-05,
"loss": 9.002,
"step": 34800
},
{
"epoch": 2.57,
"learning_rate": 6.549507287721871e-05,
"loss": 8.6061,
"step": 34850
},
{
"epoch": 2.57,
"learning_rate": 6.544484736466736e-05,
"loss": 7.4598,
"step": 34900
},
{
"epoch": 2.58,
"learning_rate": 6.5394621852116e-05,
"loss": 8.6618,
"step": 34950
},
{
"epoch": 2.58,
"learning_rate": 6.534439633956465e-05,
"loss": 9.0226,
"step": 35000
},
{
"epoch": 2.59,
"learning_rate": 6.529417082701329e-05,
"loss": 7.9738,
"step": 35050
},
{
"epoch": 2.59,
"learning_rate": 6.524394531446193e-05,
"loss": 8.7871,
"step": 35100
},
{
"epoch": 2.59,
"learning_rate": 6.519371980191058e-05,
"loss": 8.8744,
"step": 35150
},
{
"epoch": 2.6,
"learning_rate": 6.514349428935924e-05,
"loss": 8.3771,
"step": 35200
},
{
"epoch": 2.6,
"learning_rate": 6.509326877680787e-05,
"loss": 8.058,
"step": 35250
},
{
"epoch": 2.6,
"learning_rate": 6.504304326425652e-05,
"loss": 8.2627,
"step": 35300
},
{
"epoch": 2.61,
"learning_rate": 6.499281775170515e-05,
"loss": 8.1643,
"step": 35350
},
{
"epoch": 2.61,
"learning_rate": 6.49425922391538e-05,
"loss": 7.888,
"step": 35400
},
{
"epoch": 2.61,
"learning_rate": 6.489236672660246e-05,
"loss": 7.9235,
"step": 35450
},
{
"epoch": 2.62,
"learning_rate": 6.484214121405109e-05,
"loss": 8.1139,
"step": 35500
},
{
"epoch": 2.62,
"learning_rate": 6.479191570149974e-05,
"loss": 8.7467,
"step": 35550
},
{
"epoch": 2.63,
"learning_rate": 6.474169018894837e-05,
"loss": 7.4693,
"step": 35600
},
{
"epoch": 2.63,
"learning_rate": 6.469146467639703e-05,
"loss": 8.5167,
"step": 35650
},
{
"epoch": 2.63,
"learning_rate": 6.464123916384566e-05,
"loss": 9.5274,
"step": 35700
},
{
"epoch": 2.64,
"learning_rate": 6.459101365129432e-05,
"loss": 8.9735,
"step": 35750
},
{
"epoch": 2.64,
"learning_rate": 6.454078813874296e-05,
"loss": 8.1756,
"step": 35800
},
{
"epoch": 2.64,
"learning_rate": 6.449056262619161e-05,
"loss": 7.8084,
"step": 35850
},
{
"epoch": 2.65,
"learning_rate": 6.444033711364025e-05,
"loss": 8.2671,
"step": 35900
},
{
"epoch": 2.65,
"learning_rate": 6.439011160108888e-05,
"loss": 8.6628,
"step": 35950
},
{
"epoch": 2.66,
"learning_rate": 6.433988608853754e-05,
"loss": 9.8654,
"step": 36000
},
{
"epoch": 2.66,
"learning_rate": 6.428966057598617e-05,
"loss": 9.104,
"step": 36050
},
{
"epoch": 2.66,
"learning_rate": 6.423943506343483e-05,
"loss": 9.4156,
"step": 36100
},
{
"epoch": 2.67,
"learning_rate": 6.418920955088347e-05,
"loss": 8.9803,
"step": 36150
},
{
"epoch": 2.67,
"learning_rate": 6.413898403833212e-05,
"loss": 8.9584,
"step": 36200
},
{
"epoch": 2.67,
"learning_rate": 6.408875852578076e-05,
"loss": 7.3683,
"step": 36250
},
{
"epoch": 2.68,
"learning_rate": 6.40385330132294e-05,
"loss": 8.3277,
"step": 36300
},
{
"epoch": 2.68,
"learning_rate": 6.398830750067805e-05,
"loss": 9.3236,
"step": 36350
},
{
"epoch": 2.68,
"learning_rate": 6.393808198812669e-05,
"loss": 8.6918,
"step": 36400
},
{
"epoch": 2.69,
"learning_rate": 6.388785647557534e-05,
"loss": 8.9422,
"step": 36450
},
{
"epoch": 2.69,
"learning_rate": 6.383763096302398e-05,
"loss": 8.8438,
"step": 36500
},
{
"epoch": 2.7,
"learning_rate": 6.378740545047263e-05,
"loss": 8.7752,
"step": 36550
},
{
"epoch": 2.7,
"learning_rate": 6.373717993792127e-05,
"loss": 8.6483,
"step": 36600
},
{
"epoch": 2.7,
"learning_rate": 6.368695442536991e-05,
"loss": 8.5753,
"step": 36650
},
{
"epoch": 2.71,
"learning_rate": 6.363672891281856e-05,
"loss": 8.1893,
"step": 36700
},
{
"epoch": 2.71,
"learning_rate": 6.35865034002672e-05,
"loss": 8.189,
"step": 36750
},
{
"epoch": 2.71,
"learning_rate": 6.353627788771585e-05,
"loss": 8.2979,
"step": 36800
},
{
"epoch": 2.72,
"learning_rate": 6.348605237516449e-05,
"loss": 8.3904,
"step": 36850
},
{
"epoch": 2.72,
"learning_rate": 6.343582686261313e-05,
"loss": 9.3274,
"step": 36900
},
{
"epoch": 2.73,
"learning_rate": 6.338560135006179e-05,
"loss": 7.7663,
"step": 36950
},
{
"epoch": 2.73,
"learning_rate": 6.333537583751042e-05,
"loss": 8.2105,
"step": 37000
},
{
"epoch": 2.73,
"learning_rate": 6.328515032495907e-05,
"loss": 8.035,
"step": 37050
},
{
"epoch": 2.74,
"learning_rate": 6.323492481240771e-05,
"loss": 9.5032,
"step": 37100
},
{
"epoch": 2.74,
"learning_rate": 6.318469929985635e-05,
"loss": 8.3856,
"step": 37150
},
{
"epoch": 2.74,
"learning_rate": 6.3134473787305e-05,
"loss": 8.9941,
"step": 37200
},
{
"epoch": 2.75,
"learning_rate": 6.308424827475364e-05,
"loss": 8.3987,
"step": 37250
},
{
"epoch": 2.75,
"learning_rate": 6.30340227622023e-05,
"loss": 9.1753,
"step": 37300
},
{
"epoch": 2.76,
"learning_rate": 6.298379724965093e-05,
"loss": 7.9557,
"step": 37350
},
{
"epoch": 2.76,
"learning_rate": 6.293357173709959e-05,
"loss": 7.725,
"step": 37400
},
{
"epoch": 2.76,
"learning_rate": 6.288334622454822e-05,
"loss": 8.0807,
"step": 37450
},
{
"epoch": 2.77,
"learning_rate": 6.283312071199688e-05,
"loss": 8.6492,
"step": 37500
},
{
"epoch": 2.77,
"learning_rate": 6.27828951994455e-05,
"loss": 8.4716,
"step": 37550
},
{
"epoch": 2.77,
"learning_rate": 6.273266968689416e-05,
"loss": 8.7209,
"step": 37600
},
{
"epoch": 2.78,
"learning_rate": 6.268244417434281e-05,
"loss": 8.4902,
"step": 37650
},
{
"epoch": 2.78,
"learning_rate": 6.263221866179144e-05,
"loss": 7.9589,
"step": 37700
},
{
"epoch": 2.78,
"learning_rate": 6.25819931492401e-05,
"loss": 9.3285,
"step": 37750
},
{
"epoch": 2.79,
"learning_rate": 6.253176763668873e-05,
"loss": 9.0506,
"step": 37800
},
{
"epoch": 2.79,
"learning_rate": 6.248154212413738e-05,
"loss": 7.9992,
"step": 37850
},
{
"epoch": 2.8,
"learning_rate": 6.243131661158603e-05,
"loss": 8.029,
"step": 37900
},
{
"epoch": 2.8,
"learning_rate": 6.238109109903467e-05,
"loss": 8.6681,
"step": 37950
},
{
"epoch": 2.8,
"learning_rate": 6.233086558648332e-05,
"loss": 8.5906,
"step": 38000
},
{
"epoch": 2.81,
"learning_rate": 6.228064007393196e-05,
"loss": 10.4719,
"step": 38050
},
{
"epoch": 2.81,
"learning_rate": 6.22304145613806e-05,
"loss": 8.2759,
"step": 38100
},
{
"epoch": 2.81,
"learning_rate": 6.218018904882925e-05,
"loss": 8.2633,
"step": 38150
},
{
"epoch": 2.82,
"learning_rate": 6.212996353627789e-05,
"loss": 8.5218,
"step": 38200
},
{
"epoch": 2.82,
"learning_rate": 6.207973802372654e-05,
"loss": 8.0609,
"step": 38250
},
{
"epoch": 2.83,
"learning_rate": 6.202951251117518e-05,
"loss": 9.3672,
"step": 38300
},
{
"epoch": 2.83,
"learning_rate": 6.197928699862382e-05,
"loss": 10.1768,
"step": 38350
},
{
"epoch": 2.83,
"learning_rate": 6.192906148607247e-05,
"loss": 9.4389,
"step": 38400
},
{
"epoch": 2.84,
"learning_rate": 6.187883597352111e-05,
"loss": 7.6737,
"step": 38450
},
{
"epoch": 2.84,
"learning_rate": 6.182861046096976e-05,
"loss": 9.2337,
"step": 38500
},
{
"epoch": 2.84,
"learning_rate": 6.17783849484184e-05,
"loss": 8.7846,
"step": 38550
},
{
"epoch": 2.85,
"learning_rate": 6.172815943586704e-05,
"loss": 7.8709,
"step": 38600
},
{
"epoch": 2.85,
"learning_rate": 6.167793392331569e-05,
"loss": 8.8688,
"step": 38650
},
{
"epoch": 2.85,
"learning_rate": 6.162770841076435e-05,
"loss": 8.4087,
"step": 38700
},
{
"epoch": 2.86,
"learning_rate": 6.157748289821298e-05,
"loss": 7.7129,
"step": 38750
},
{
"epoch": 2.86,
"learning_rate": 6.152725738566162e-05,
"loss": 9.3196,
"step": 38800
},
{
"epoch": 2.87,
"learning_rate": 6.147703187311027e-05,
"loss": 8.8242,
"step": 38850
},
{
"epoch": 2.87,
"learning_rate": 6.142680636055891e-05,
"loss": 8.4237,
"step": 38900
},
{
"epoch": 2.87,
"learning_rate": 6.137658084800755e-05,
"loss": 8.9383,
"step": 38950
},
{
"epoch": 2.88,
"learning_rate": 6.13263553354562e-05,
"loss": 8.3749,
"step": 39000
},
{
"epoch": 2.88,
"learning_rate": 6.127612982290485e-05,
"loss": 8.8894,
"step": 39050
},
{
"epoch": 2.88,
"learning_rate": 6.122590431035349e-05,
"loss": 8.2975,
"step": 39100
},
{
"epoch": 2.89,
"learning_rate": 6.117567879780214e-05,
"loss": 8.0517,
"step": 39150
},
{
"epoch": 2.89,
"learning_rate": 6.112545328525077e-05,
"loss": 8.0154,
"step": 39200
},
{
"epoch": 2.9,
"learning_rate": 6.107522777269943e-05,
"loss": 8.4887,
"step": 39250
},
{
"epoch": 2.9,
"learning_rate": 6.102500226014807e-05,
"loss": 8.7064,
"step": 39300
},
{
"epoch": 2.9,
"learning_rate": 6.0974776747596706e-05,
"loss": 9.7375,
"step": 39350
},
{
"epoch": 2.91,
"learning_rate": 6.0924551235045357e-05,
"loss": 8.8614,
"step": 39400
},
{
"epoch": 2.91,
"learning_rate": 6.0874325722493994e-05,
"loss": 8.302,
"step": 39450
},
{
"epoch": 2.91,
"learning_rate": 6.0824100209942645e-05,
"loss": 7.8469,
"step": 39500
},
{
"epoch": 2.92,
"learning_rate": 6.077387469739129e-05,
"loss": 9.0706,
"step": 39550
},
{
"epoch": 2.92,
"learning_rate": 6.072364918483994e-05,
"loss": 9.1398,
"step": 39600
},
{
"epoch": 2.92,
"learning_rate": 6.067342367228858e-05,
"loss": 8.1838,
"step": 39650
},
{
"epoch": 2.93,
"learning_rate": 6.062319815973723e-05,
"loss": 9.2303,
"step": 39700
},
{
"epoch": 2.93,
"learning_rate": 6.0572972647185865e-05,
"loss": 8.3715,
"step": 39750
},
{
"epoch": 2.94,
"learning_rate": 6.0522747134634516e-05,
"loss": 8.409,
"step": 39800
},
{
"epoch": 2.94,
"learning_rate": 6.047252162208315e-05,
"loss": 8.6441,
"step": 39850
},
{
"epoch": 2.94,
"learning_rate": 6.04222961095318e-05,
"loss": 9.0975,
"step": 39900
},
{
"epoch": 2.95,
"learning_rate": 6.037207059698045e-05,
"loss": 8.0691,
"step": 39950
},
{
"epoch": 2.95,
"learning_rate": 6.0321845084429085e-05,
"loss": 8.6646,
"step": 40000
},
{
"epoch": 2.95,
"eval_loss": 8.163222312927246,
"eval_runtime": 957.6189,
"eval_samples_per_second": 13.678,
"eval_steps_per_second": 3.42,
"eval_wer": 0.22493805384066187,
"step": 40000
},
{
"epoch": 2.95,
"learning_rate": 6.0271619571877736e-05,
"loss": 8.4278,
"step": 40050
},
{
"epoch": 2.96,
"learning_rate": 6.022139405932637e-05,
"loss": 8.1656,
"step": 40100
},
{
"epoch": 2.96,
"learning_rate": 6.0171168546775024e-05,
"loss": 7.7975,
"step": 40150
},
{
"epoch": 2.97,
"learning_rate": 6.012094303422366e-05,
"loss": 7.5465,
"step": 40200
},
{
"epoch": 2.97,
"learning_rate": 6.007071752167231e-05,
"loss": 8.3986,
"step": 40250
},
{
"epoch": 2.97,
"learning_rate": 6.0020492009120956e-05,
"loss": 8.3762,
"step": 40300
},
{
"epoch": 2.98,
"learning_rate": 5.997026649656961e-05,
"loss": 8.6175,
"step": 40350
},
{
"epoch": 2.98,
"learning_rate": 5.9920040984018244e-05,
"loss": 8.5622,
"step": 40400
},
{
"epoch": 2.98,
"learning_rate": 5.9869815471466895e-05,
"loss": 8.1824,
"step": 40450
},
{
"epoch": 2.99,
"learning_rate": 5.981958995891553e-05,
"loss": 7.2886,
"step": 40500
},
{
"epoch": 2.99,
"learning_rate": 5.9769364446364177e-05,
"loss": 8.3469,
"step": 40550
},
{
"epoch": 2.99,
"learning_rate": 5.971913893381282e-05,
"loss": 8.6257,
"step": 40600
},
{
"epoch": 3.0,
"learning_rate": 5.9668913421261465e-05,
"loss": 7.7071,
"step": 40650
},
{
"epoch": 3.0,
"learning_rate": 5.9618687908710116e-05,
"loss": 7.8413,
"step": 40700
},
{
"epoch": 3.01,
"learning_rate": 5.956846239615875e-05,
"loss": 7.6704,
"step": 40750
},
{
"epoch": 3.01,
"learning_rate": 5.9518236883607404e-05,
"loss": 7.3902,
"step": 40800
},
{
"epoch": 3.01,
"learning_rate": 5.946801137105604e-05,
"loss": 8.3296,
"step": 40850
},
{
"epoch": 3.02,
"learning_rate": 5.941778585850469e-05,
"loss": 7.0884,
"step": 40900
},
{
"epoch": 3.02,
"learning_rate": 5.9367560345953336e-05,
"loss": 7.043,
"step": 40950
},
{
"epoch": 3.02,
"learning_rate": 5.931733483340198e-05,
"loss": 7.5367,
"step": 41000
},
{
"epoch": 3.03,
"learning_rate": 5.9267109320850624e-05,
"loss": 8.3064,
"step": 41050
},
{
"epoch": 3.03,
"learning_rate": 5.921688380829926e-05,
"loss": 7.6769,
"step": 41100
},
{
"epoch": 3.04,
"learning_rate": 5.916665829574791e-05,
"loss": 9.067,
"step": 41150
},
{
"epoch": 3.04,
"learning_rate": 5.911643278319655e-05,
"loss": 8.3565,
"step": 41200
},
{
"epoch": 3.04,
"learning_rate": 5.90662072706452e-05,
"loss": 7.8335,
"step": 41250
},
{
"epoch": 3.05,
"learning_rate": 5.9015981758093844e-05,
"loss": 7.9617,
"step": 41300
},
{
"epoch": 3.05,
"learning_rate": 5.8965756245542495e-05,
"loss": 8.6728,
"step": 41350
},
{
"epoch": 3.05,
"learning_rate": 5.891553073299113e-05,
"loss": 7.9142,
"step": 41400
},
{
"epoch": 3.06,
"learning_rate": 5.886530522043978e-05,
"loss": 7.7702,
"step": 41450
},
{
"epoch": 3.06,
"learning_rate": 5.881507970788842e-05,
"loss": 8.2997,
"step": 41500
},
{
"epoch": 3.06,
"learning_rate": 5.876485419533707e-05,
"loss": 8.1519,
"step": 41550
},
{
"epoch": 3.07,
"learning_rate": 5.871462868278571e-05,
"loss": 7.3762,
"step": 41600
},
{
"epoch": 3.07,
"learning_rate": 5.866440317023435e-05,
"loss": 7.5129,
"step": 41650
},
{
"epoch": 3.08,
"learning_rate": 5.8614177657683e-05,
"loss": 8.2537,
"step": 41700
},
{
"epoch": 3.08,
"learning_rate": 5.856395214513164e-05,
"loss": 8.4148,
"step": 41750
},
{
"epoch": 3.08,
"learning_rate": 5.851372663258029e-05,
"loss": 7.1737,
"step": 41800
},
{
"epoch": 3.09,
"learning_rate": 5.846350112002893e-05,
"loss": 7.2628,
"step": 41850
},
{
"epoch": 3.09,
"learning_rate": 5.841327560747758e-05,
"loss": 7.2933,
"step": 41900
},
{
"epoch": 3.09,
"learning_rate": 5.836305009492622e-05,
"loss": 7.7675,
"step": 41950
},
{
"epoch": 3.1,
"learning_rate": 5.831282458237487e-05,
"loss": 8.2344,
"step": 42000
},
{
"epoch": 3.1,
"learning_rate": 5.826259906982351e-05,
"loss": 7.1329,
"step": 42050
},
{
"epoch": 3.11,
"learning_rate": 5.821237355727216e-05,
"loss": 7.3924,
"step": 42100
},
{
"epoch": 3.11,
"learning_rate": 5.81621480447208e-05,
"loss": 6.6189,
"step": 42150
},
{
"epoch": 3.11,
"learning_rate": 5.811192253216944e-05,
"loss": 7.3457,
"step": 42200
},
{
"epoch": 3.12,
"learning_rate": 5.806169701961809e-05,
"loss": 8.9924,
"step": 42250
},
{
"epoch": 3.12,
"learning_rate": 5.8011471507066725e-05,
"loss": 7.6315,
"step": 42300
},
{
"epoch": 3.12,
"learning_rate": 5.7961245994515376e-05,
"loss": 8.4726,
"step": 42350
},
{
"epoch": 3.13,
"learning_rate": 5.791102048196402e-05,
"loss": 7.1755,
"step": 42400
},
{
"epoch": 3.13,
"learning_rate": 5.786079496941267e-05,
"loss": 7.5716,
"step": 42450
},
{
"epoch": 3.13,
"learning_rate": 5.781056945686131e-05,
"loss": 7.938,
"step": 42500
},
{
"epoch": 3.14,
"learning_rate": 5.776034394430996e-05,
"loss": 7.3833,
"step": 42550
},
{
"epoch": 3.14,
"learning_rate": 5.7710118431758596e-05,
"loss": 6.4276,
"step": 42600
},
{
"epoch": 3.15,
"learning_rate": 5.765989291920725e-05,
"loss": 6.8907,
"step": 42650
},
{
"epoch": 3.15,
"learning_rate": 5.7609667406655884e-05,
"loss": 7.7592,
"step": 42700
},
{
"epoch": 3.15,
"learning_rate": 5.755944189410453e-05,
"loss": 7.4997,
"step": 42750
},
{
"epoch": 3.16,
"learning_rate": 5.750921638155318e-05,
"loss": 7.2821,
"step": 42800
},
{
"epoch": 3.16,
"learning_rate": 5.745899086900182e-05,
"loss": 7.4861,
"step": 42850
},
{
"epoch": 3.16,
"learning_rate": 5.740876535645047e-05,
"loss": 7.9266,
"step": 42900
},
{
"epoch": 3.17,
"learning_rate": 5.7358539843899105e-05,
"loss": 7.6244,
"step": 42950
},
{
"epoch": 3.17,
"learning_rate": 5.7308314331347756e-05,
"loss": 7.382,
"step": 43000
},
{
"epoch": 3.18,
"learning_rate": 5.725808881879639e-05,
"loss": 8.1925,
"step": 43050
},
{
"epoch": 3.18,
"learning_rate": 5.7207863306245044e-05,
"loss": 8.3185,
"step": 43100
},
{
"epoch": 3.18,
"learning_rate": 5.715763779369369e-05,
"loss": 7.091,
"step": 43150
},
{
"epoch": 3.19,
"learning_rate": 5.710741228114234e-05,
"loss": 7.8352,
"step": 43200
},
{
"epoch": 3.19,
"learning_rate": 5.7057186768590976e-05,
"loss": 6.6085,
"step": 43250
},
{
"epoch": 3.19,
"learning_rate": 5.700696125603963e-05,
"loss": 7.8052,
"step": 43300
},
{
"epoch": 3.2,
"learning_rate": 5.6956735743488264e-05,
"loss": 8.1999,
"step": 43350
},
{
"epoch": 3.2,
"learning_rate": 5.690651023093691e-05,
"loss": 7.2801,
"step": 43400
},
{
"epoch": 3.2,
"learning_rate": 5.685628471838555e-05,
"loss": 7.6289,
"step": 43450
},
{
"epoch": 3.21,
"learning_rate": 5.6806059205834196e-05,
"loss": 6.8215,
"step": 43500
},
{
"epoch": 3.21,
"learning_rate": 5.675583369328285e-05,
"loss": 7.1678,
"step": 43550
},
{
"epoch": 3.22,
"learning_rate": 5.6705608180731484e-05,
"loss": 7.6612,
"step": 43600
},
{
"epoch": 3.22,
"learning_rate": 5.6655382668180135e-05,
"loss": 7.8899,
"step": 43650
},
{
"epoch": 3.22,
"learning_rate": 5.660515715562877e-05,
"loss": 7.8546,
"step": 43700
},
{
"epoch": 3.23,
"learning_rate": 5.655493164307742e-05,
"loss": 7.319,
"step": 43750
},
{
"epoch": 3.23,
"learning_rate": 5.650470613052607e-05,
"loss": 7.3317,
"step": 43800
},
{
"epoch": 3.23,
"learning_rate": 5.645448061797471e-05,
"loss": 7.8875,
"step": 43850
},
{
"epoch": 3.24,
"learning_rate": 5.6404255105423355e-05,
"loss": 7.8145,
"step": 43900
},
{
"epoch": 3.24,
"learning_rate": 5.635402959287199e-05,
"loss": 7.0667,
"step": 43950
},
{
"epoch": 3.25,
"learning_rate": 5.6303804080320643e-05,
"loss": 7.7603,
"step": 44000
},
{
"epoch": 3.25,
"learning_rate": 5.625357856776928e-05,
"loss": 7.6111,
"step": 44050
},
{
"epoch": 3.25,
"learning_rate": 5.620335305521793e-05,
"loss": 7.9858,
"step": 44100
},
{
"epoch": 3.26,
"learning_rate": 5.6153127542666576e-05,
"loss": 8.9896,
"step": 44150
},
{
"epoch": 3.26,
"learning_rate": 5.6102902030115226e-05,
"loss": 8.4081,
"step": 44200
},
{
"epoch": 3.26,
"learning_rate": 5.6052676517563864e-05,
"loss": 7.4748,
"step": 44250
},
{
"epoch": 3.27,
"learning_rate": 5.6002451005012515e-05,
"loss": 8.2133,
"step": 44300
},
{
"epoch": 3.27,
"learning_rate": 5.595222549246115e-05,
"loss": 7.3073,
"step": 44350
},
{
"epoch": 3.28,
"learning_rate": 5.59019999799098e-05,
"loss": 7.9638,
"step": 44400
},
{
"epoch": 3.28,
"learning_rate": 5.585177446735844e-05,
"loss": 7.9653,
"step": 44450
},
{
"epoch": 3.28,
"learning_rate": 5.5801548954807084e-05,
"loss": 7.8583,
"step": 44500
},
{
"epoch": 3.29,
"learning_rate": 5.5751323442255735e-05,
"loss": 8.0561,
"step": 44550
},
{
"epoch": 3.29,
"learning_rate": 5.570109792970437e-05,
"loss": 8.1276,
"step": 44600
},
{
"epoch": 3.29,
"learning_rate": 5.565087241715302e-05,
"loss": 7.7357,
"step": 44650
},
{
"epoch": 3.3,
"learning_rate": 5.560064690460166e-05,
"loss": 7.7529,
"step": 44700
},
{
"epoch": 3.3,
"learning_rate": 5.555042139205031e-05,
"loss": 7.2583,
"step": 44750
},
{
"epoch": 3.3,
"learning_rate": 5.550019587949895e-05,
"loss": 6.4675,
"step": 44800
},
{
"epoch": 3.31,
"learning_rate": 5.54499703669476e-05,
"loss": 7.3658,
"step": 44850
},
{
"epoch": 3.31,
"learning_rate": 5.539974485439624e-05,
"loss": 8.278,
"step": 44900
},
{
"epoch": 3.32,
"learning_rate": 5.5349519341844894e-05,
"loss": 7.3867,
"step": 44950
},
{
"epoch": 3.32,
"learning_rate": 5.529929382929353e-05,
"loss": 7.4187,
"step": 45000
},
{
"epoch": 3.32,
"learning_rate": 5.524906831674217e-05,
"loss": 7.5281,
"step": 45050
},
{
"epoch": 3.33,
"learning_rate": 5.519884280419082e-05,
"loss": 7.8815,
"step": 45100
},
{
"epoch": 3.33,
"learning_rate": 5.514861729163946e-05,
"loss": 7.2487,
"step": 45150
},
{
"epoch": 3.33,
"learning_rate": 5.509839177908811e-05,
"loss": 8.3441,
"step": 45200
},
{
"epoch": 3.34,
"learning_rate": 5.504816626653675e-05,
"loss": 7.4892,
"step": 45250
},
{
"epoch": 3.34,
"learning_rate": 5.49979407539854e-05,
"loss": 7.7789,
"step": 45300
},
{
"epoch": 3.35,
"learning_rate": 5.494771524143404e-05,
"loss": 7.3951,
"step": 45350
},
{
"epoch": 3.35,
"learning_rate": 5.489748972888269e-05,
"loss": 7.8756,
"step": 45400
},
{
"epoch": 3.35,
"learning_rate": 5.484726421633133e-05,
"loss": 7.9274,
"step": 45450
},
{
"epoch": 3.36,
"learning_rate": 5.479703870377998e-05,
"loss": 8.1525,
"step": 45500
},
{
"epoch": 3.36,
"learning_rate": 5.4746813191228616e-05,
"loss": 7.5597,
"step": 45550
},
{
"epoch": 3.36,
"learning_rate": 5.469658767867726e-05,
"loss": 7.8939,
"step": 45600
},
{
"epoch": 3.37,
"learning_rate": 5.464636216612591e-05,
"loss": 6.1451,
"step": 45650
},
{
"epoch": 3.37,
"learning_rate": 5.459613665357455e-05,
"loss": 7.224,
"step": 45700
},
{
"epoch": 3.37,
"learning_rate": 5.45459111410232e-05,
"loss": 7.2489,
"step": 45750
},
{
"epoch": 3.38,
"learning_rate": 5.4495685628471836e-05,
"loss": 7.4162,
"step": 45800
},
{
"epoch": 3.38,
"learning_rate": 5.444546011592049e-05,
"loss": 6.8503,
"step": 45850
},
{
"epoch": 3.39,
"learning_rate": 5.4395234603369124e-05,
"loss": 6.7087,
"step": 45900
},
{
"epoch": 3.39,
"learning_rate": 5.4345009090817775e-05,
"loss": 6.9697,
"step": 45950
},
{
"epoch": 3.39,
"learning_rate": 5.429478357826642e-05,
"loss": 7.8369,
"step": 46000
},
{
"epoch": 3.4,
"learning_rate": 5.424455806571507e-05,
"loss": 7.7567,
"step": 46050
},
{
"epoch": 3.4,
"learning_rate": 5.419433255316371e-05,
"loss": 6.6241,
"step": 46100
},
{
"epoch": 3.4,
"learning_rate": 5.414410704061236e-05,
"loss": 7.5218,
"step": 46150
},
{
"epoch": 3.41,
"learning_rate": 5.4093881528060995e-05,
"loss": 7.2338,
"step": 46200
},
{
"epoch": 3.41,
"learning_rate": 5.404365601550964e-05,
"loss": 7.0707,
"step": 46250
},
{
"epoch": 3.42,
"learning_rate": 5.3993430502958283e-05,
"loss": 7.6922,
"step": 46300
},
{
"epoch": 3.42,
"learning_rate": 5.394320499040693e-05,
"loss": 8.6056,
"step": 46350
},
{
"epoch": 3.42,
"learning_rate": 5.389297947785558e-05,
"loss": 7.4641,
"step": 46400
},
{
"epoch": 3.43,
"learning_rate": 5.3842753965304216e-05,
"loss": 7.1716,
"step": 46450
},
{
"epoch": 3.43,
"learning_rate": 5.3792528452752866e-05,
"loss": 7.6382,
"step": 46500
},
{
"epoch": 3.43,
"learning_rate": 5.3742302940201504e-05,
"loss": 7.0739,
"step": 46550
},
{
"epoch": 3.44,
"learning_rate": 5.3692077427650155e-05,
"loss": 7.8667,
"step": 46600
},
{
"epoch": 3.44,
"learning_rate": 5.36418519150988e-05,
"loss": 7.559,
"step": 46650
},
{
"epoch": 3.44,
"learning_rate": 5.359162640254744e-05,
"loss": 7.6078,
"step": 46700
},
{
"epoch": 3.45,
"learning_rate": 5.354140088999609e-05,
"loss": 7.7994,
"step": 46750
},
{
"epoch": 3.45,
"learning_rate": 5.3491175377444724e-05,
"loss": 7.0418,
"step": 46800
},
{
"epoch": 3.46,
"learning_rate": 5.3440949864893375e-05,
"loss": 7.3261,
"step": 46850
},
{
"epoch": 3.46,
"learning_rate": 5.339072435234201e-05,
"loss": 7.9914,
"step": 46900
},
{
"epoch": 3.46,
"learning_rate": 5.334049883979066e-05,
"loss": 7.1998,
"step": 46950
},
{
"epoch": 3.47,
"learning_rate": 5.329027332723931e-05,
"loss": 7.3343,
"step": 47000
},
{
"epoch": 3.47,
"learning_rate": 5.324004781468796e-05,
"loss": 8.1604,
"step": 47050
},
{
"epoch": 3.47,
"learning_rate": 5.3189822302136595e-05,
"loss": 7.5405,
"step": 47100
},
{
"epoch": 3.48,
"learning_rate": 5.3139596789585246e-05,
"loss": 7.9409,
"step": 47150
},
{
"epoch": 3.48,
"learning_rate": 5.308937127703388e-05,
"loss": 8.0573,
"step": 47200
},
{
"epoch": 3.49,
"learning_rate": 5.3039145764482534e-05,
"loss": 7.2927,
"step": 47250
},
{
"epoch": 3.49,
"learning_rate": 5.298892025193117e-05,
"loss": 6.9476,
"step": 47300
},
{
"epoch": 3.49,
"learning_rate": 5.2938694739379815e-05,
"loss": 7.1999,
"step": 47350
},
{
"epoch": 3.5,
"learning_rate": 5.2888469226828466e-05,
"loss": 7.8224,
"step": 47400
},
{
"epoch": 3.5,
"learning_rate": 5.2838243714277103e-05,
"loss": 8.1369,
"step": 47450
},
{
"epoch": 3.5,
"learning_rate": 5.2788018201725754e-05,
"loss": 6.7302,
"step": 47500
},
{
"epoch": 3.51,
"learning_rate": 5.273779268917439e-05,
"loss": 8.0819,
"step": 47550
},
{
"epoch": 3.51,
"learning_rate": 5.268756717662304e-05,
"loss": 7.8832,
"step": 47600
},
{
"epoch": 3.51,
"learning_rate": 5.263734166407168e-05,
"loss": 8.4479,
"step": 47650
},
{
"epoch": 3.52,
"learning_rate": 5.258711615152033e-05,
"loss": 7.7838,
"step": 47700
},
{
"epoch": 3.52,
"learning_rate": 5.2536890638968975e-05,
"loss": 8.3843,
"step": 47750
},
{
"epoch": 3.53,
"learning_rate": 5.2486665126417625e-05,
"loss": 6.9055,
"step": 47800
},
{
"epoch": 3.53,
"learning_rate": 5.243643961386626e-05,
"loss": 6.6339,
"step": 47850
},
{
"epoch": 3.53,
"learning_rate": 5.23862141013149e-05,
"loss": 7.0316,
"step": 47900
},
{
"epoch": 3.54,
"learning_rate": 5.233598858876355e-05,
"loss": 7.4569,
"step": 47950
},
{
"epoch": 3.54,
"learning_rate": 5.228576307621219e-05,
"loss": 7.6204,
"step": 48000
},
{
"epoch": 3.54,
"learning_rate": 5.223553756366084e-05,
"loss": 7.1085,
"step": 48050
},
{
"epoch": 3.55,
"learning_rate": 5.218531205110948e-05,
"loss": 7.7254,
"step": 48100
},
{
"epoch": 3.55,
"learning_rate": 5.2135086538558134e-05,
"loss": 7.1486,
"step": 48150
},
{
"epoch": 3.56,
"learning_rate": 5.208486102600677e-05,
"loss": 6.9297,
"step": 48200
},
{
"epoch": 3.56,
"learning_rate": 5.203463551345542e-05,
"loss": 7.5314,
"step": 48250
},
{
"epoch": 3.56,
"learning_rate": 5.198441000090406e-05,
"loss": 7.68,
"step": 48300
},
{
"epoch": 3.57,
"learning_rate": 5.193418448835271e-05,
"loss": 7.9467,
"step": 48350
},
{
"epoch": 3.57,
"learning_rate": 5.188395897580135e-05,
"loss": 6.7188,
"step": 48400
},
{
"epoch": 3.57,
"learning_rate": 5.183373346325e-05,
"loss": 7.7619,
"step": 48450
},
{
"epoch": 3.58,
"learning_rate": 5.178350795069864e-05,
"loss": 7.6537,
"step": 48500
},
{
"epoch": 3.58,
"learning_rate": 5.173328243814728e-05,
"loss": 6.9593,
"step": 48550
},
{
"epoch": 3.58,
"learning_rate": 5.168305692559593e-05,
"loss": 7.4834,
"step": 48600
},
{
"epoch": 3.59,
"learning_rate": 5.163283141304457e-05,
"loss": 8.2864,
"step": 48650
},
{
"epoch": 3.59,
"learning_rate": 5.158260590049322e-05,
"loss": 7.234,
"step": 48700
},
{
"epoch": 3.6,
"learning_rate": 5.1532380387941856e-05,
"loss": 7.2513,
"step": 48750
},
{
"epoch": 3.6,
"learning_rate": 5.1482154875390506e-05,
"loss": 7.508,
"step": 48800
},
{
"epoch": 3.6,
"learning_rate": 5.143192936283915e-05,
"loss": 7.1513,
"step": 48850
},
{
"epoch": 3.61,
"learning_rate": 5.13817038502878e-05,
"loss": 7.8882,
"step": 48900
},
{
"epoch": 3.61,
"learning_rate": 5.133147833773644e-05,
"loss": 8.0859,
"step": 48950
},
{
"epoch": 3.61,
"learning_rate": 5.128125282518509e-05,
"loss": 7.5506,
"step": 49000
},
{
"epoch": 3.62,
"learning_rate": 5.123102731263373e-05,
"loss": 7.9777,
"step": 49050
},
{
"epoch": 3.62,
"learning_rate": 5.118080180008237e-05,
"loss": 8.3599,
"step": 49100
},
{
"epoch": 3.63,
"learning_rate": 5.1130576287531015e-05,
"loss": 7.105,
"step": 49150
},
{
"epoch": 3.63,
"learning_rate": 5.108035077497966e-05,
"loss": 8.2692,
"step": 49200
},
{
"epoch": 3.63,
"learning_rate": 5.103012526242831e-05,
"loss": 7.9098,
"step": 49250
},
{
"epoch": 3.64,
"learning_rate": 5.097989974987695e-05,
"loss": 7.1698,
"step": 49300
},
{
"epoch": 3.64,
"learning_rate": 5.09296742373256e-05,
"loss": 7.406,
"step": 49350
},
{
"epoch": 3.64,
"learning_rate": 5.0879448724774235e-05,
"loss": 8.3276,
"step": 49400
},
{
"epoch": 3.65,
"learning_rate": 5.0829223212222886e-05,
"loss": 7.5714,
"step": 49450
},
{
"epoch": 3.65,
"learning_rate": 5.077899769967153e-05,
"loss": 7.0839,
"step": 49500
},
{
"epoch": 3.65,
"learning_rate": 5.0728772187120174e-05,
"loss": 7.0589,
"step": 49550
},
{
"epoch": 3.66,
"learning_rate": 5.067854667456882e-05,
"loss": 7.4998,
"step": 49600
},
{
"epoch": 3.66,
"learning_rate": 5.0628321162017455e-05,
"loss": 7.3495,
"step": 49650
},
{
"epoch": 3.67,
"learning_rate": 5.0578095649466106e-05,
"loss": 7.5101,
"step": 49700
},
{
"epoch": 3.67,
"learning_rate": 5.0527870136914743e-05,
"loss": 6.7707,
"step": 49750
},
{
"epoch": 3.67,
"learning_rate": 5.0477644624363394e-05,
"loss": 7.5822,
"step": 49800
},
{
"epoch": 3.68,
"learning_rate": 5.042741911181204e-05,
"loss": 6.5937,
"step": 49850
},
{
"epoch": 3.68,
"learning_rate": 5.037719359926069e-05,
"loss": 7.2497,
"step": 49900
},
{
"epoch": 3.68,
"learning_rate": 5.0326968086709326e-05,
"loss": 7.35,
"step": 49950
},
{
"epoch": 3.69,
"learning_rate": 5.027674257415798e-05,
"loss": 7.767,
"step": 50000
},
{
"epoch": 3.69,
"learning_rate": 5.0226517061606615e-05,
"loss": 8.3228,
"step": 50050
},
{
"epoch": 3.7,
"learning_rate": 5.0176291549055265e-05,
"loss": 7.6905,
"step": 50100
},
{
"epoch": 3.7,
"learning_rate": 5.01260660365039e-05,
"loss": 7.8275,
"step": 50150
},
{
"epoch": 3.7,
"learning_rate": 5.007584052395255e-05,
"loss": 8.0724,
"step": 50200
},
{
"epoch": 3.71,
"learning_rate": 5.00256150114012e-05,
"loss": 7.0501,
"step": 50250
},
{
"epoch": 3.71,
"learning_rate": 4.997538949884984e-05,
"loss": 7.4269,
"step": 50300
},
{
"epoch": 3.71,
"learning_rate": 4.9925163986298486e-05,
"loss": 7.5186,
"step": 50350
},
{
"epoch": 3.72,
"learning_rate": 4.987493847374713e-05,
"loss": 8.2606,
"step": 50400
},
{
"epoch": 3.72,
"learning_rate": 4.9824712961195774e-05,
"loss": 8.2097,
"step": 50450
},
{
"epoch": 3.73,
"learning_rate": 4.977448744864441e-05,
"loss": 7.468,
"step": 50500
},
{
"epoch": 3.73,
"learning_rate": 4.9724261936093055e-05,
"loss": 8.2075,
"step": 50550
},
{
"epoch": 3.73,
"learning_rate": 4.9674036423541706e-05,
"loss": 7.3928,
"step": 50600
},
{
"epoch": 3.74,
"learning_rate": 4.962381091099035e-05,
"loss": 7.2907,
"step": 50650
},
{
"epoch": 3.74,
"learning_rate": 4.9573585398438994e-05,
"loss": 7.706,
"step": 50700
},
{
"epoch": 3.74,
"learning_rate": 4.952335988588764e-05,
"loss": 7.301,
"step": 50750
},
{
"epoch": 3.75,
"learning_rate": 4.947313437333628e-05,
"loss": 6.9109,
"step": 50800
},
{
"epoch": 3.75,
"learning_rate": 4.9422908860784926e-05,
"loss": 6.6967,
"step": 50850
},
{
"epoch": 3.75,
"learning_rate": 4.937268334823357e-05,
"loss": 5.9484,
"step": 50900
},
{
"epoch": 3.76,
"learning_rate": 4.9322457835682214e-05,
"loss": 7.8288,
"step": 50950
},
{
"epoch": 3.76,
"learning_rate": 4.9272232323130865e-05,
"loss": 7.3987,
"step": 51000
},
{
"epoch": 3.77,
"learning_rate": 4.92220068105795e-05,
"loss": 7.3714,
"step": 51050
},
{
"epoch": 3.77,
"learning_rate": 4.9171781298028147e-05,
"loss": 7.258,
"step": 51100
},
{
"epoch": 3.77,
"learning_rate": 4.912155578547679e-05,
"loss": 6.8541,
"step": 51150
},
{
"epoch": 3.78,
"learning_rate": 4.9071330272925435e-05,
"loss": 7.085,
"step": 51200
},
{
"epoch": 3.78,
"learning_rate": 4.902110476037408e-05,
"loss": 6.7827,
"step": 51250
},
{
"epoch": 3.78,
"learning_rate": 4.897087924782273e-05,
"loss": 6.6806,
"step": 51300
},
{
"epoch": 3.79,
"learning_rate": 4.8920653735271374e-05,
"loss": 7.2918,
"step": 51350
},
{
"epoch": 3.79,
"learning_rate": 4.887042822272002e-05,
"loss": 7.9022,
"step": 51400
},
{
"epoch": 3.8,
"learning_rate": 4.882020271016866e-05,
"loss": 7.6094,
"step": 51450
},
{
"epoch": 3.8,
"learning_rate": 4.8769977197617306e-05,
"loss": 8.1048,
"step": 51500
},
{
"epoch": 3.8,
"learning_rate": 4.871975168506595e-05,
"loss": 6.9056,
"step": 51550
},
{
"epoch": 3.81,
"learning_rate": 4.866952617251459e-05,
"loss": 6.4347,
"step": 51600
},
{
"epoch": 3.81,
"learning_rate": 4.861930065996324e-05,
"loss": 7.307,
"step": 51650
},
{
"epoch": 3.81,
"learning_rate": 4.856907514741188e-05,
"loss": 7.649,
"step": 51700
},
{
"epoch": 3.82,
"learning_rate": 4.8518849634860526e-05,
"loss": 6.7706,
"step": 51750
},
{
"epoch": 3.82,
"learning_rate": 4.846862412230917e-05,
"loss": 6.7943,
"step": 51800
},
{
"epoch": 3.82,
"learning_rate": 4.8418398609757814e-05,
"loss": 7.654,
"step": 51850
},
{
"epoch": 3.83,
"learning_rate": 4.836817309720646e-05,
"loss": 7.6245,
"step": 51900
},
{
"epoch": 3.83,
"learning_rate": 4.83179475846551e-05,
"loss": 7.8284,
"step": 51950
},
{
"epoch": 3.84,
"learning_rate": 4.8267722072103746e-05,
"loss": 6.9516,
"step": 52000
},
{
"epoch": 3.84,
"learning_rate": 4.82174965595524e-05,
"loss": 7.1367,
"step": 52050
},
{
"epoch": 3.84,
"learning_rate": 4.816727104700104e-05,
"loss": 7.4153,
"step": 52100
},
{
"epoch": 3.85,
"learning_rate": 4.8117045534449685e-05,
"loss": 6.5358,
"step": 52150
},
{
"epoch": 3.85,
"learning_rate": 4.806682002189832e-05,
"loss": 7.5443,
"step": 52200
},
{
"epoch": 3.85,
"learning_rate": 4.8016594509346967e-05,
"loss": 7.8184,
"step": 52250
},
{
"epoch": 3.86,
"learning_rate": 4.796636899679561e-05,
"loss": 6.9702,
"step": 52300
},
{
"epoch": 3.86,
"learning_rate": 4.791614348424426e-05,
"loss": 8.3988,
"step": 52350
},
{
"epoch": 3.87,
"learning_rate": 4.7865917971692905e-05,
"loss": 8.1025,
"step": 52400
},
{
"epoch": 3.87,
"learning_rate": 4.781569245914155e-05,
"loss": 7.81,
"step": 52450
},
{
"epoch": 3.87,
"learning_rate": 4.7765466946590194e-05,
"loss": 6.6809,
"step": 52500
},
{
"epoch": 3.88,
"learning_rate": 4.771524143403884e-05,
"loss": 6.81,
"step": 52550
},
{
"epoch": 3.88,
"learning_rate": 4.766501592148748e-05,
"loss": 7.1717,
"step": 52600
},
{
"epoch": 3.88,
"learning_rate": 4.7614790408936126e-05,
"loss": 7.3114,
"step": 52650
},
{
"epoch": 3.89,
"learning_rate": 4.756456489638477e-05,
"loss": 7.2819,
"step": 52700
},
{
"epoch": 3.89,
"learning_rate": 4.7514339383833414e-05,
"loss": 6.6964,
"step": 52750
},
{
"epoch": 3.89,
"learning_rate": 4.746411387128206e-05,
"loss": 8.1118,
"step": 52800
},
{
"epoch": 3.9,
"learning_rate": 4.74138883587307e-05,
"loss": 8.1901,
"step": 52850
},
{
"epoch": 3.9,
"learning_rate": 4.7363662846179346e-05,
"loss": 6.8883,
"step": 52900
},
{
"epoch": 3.91,
"learning_rate": 4.731343733362799e-05,
"loss": 7.2554,
"step": 52950
},
{
"epoch": 3.91,
"learning_rate": 4.7263211821076634e-05,
"loss": 7.402,
"step": 53000
},
{
"epoch": 3.91,
"learning_rate": 4.721298630852528e-05,
"loss": 8.8808,
"step": 53050
},
{
"epoch": 3.92,
"learning_rate": 4.716276079597393e-05,
"loss": 7.1652,
"step": 53100
},
{
"epoch": 3.92,
"learning_rate": 4.711253528342257e-05,
"loss": 6.884,
"step": 53150
},
{
"epoch": 3.92,
"learning_rate": 4.706230977087122e-05,
"loss": 7.4472,
"step": 53200
},
{
"epoch": 3.93,
"learning_rate": 4.701208425831986e-05,
"loss": 6.8787,
"step": 53250
},
{
"epoch": 3.93,
"learning_rate": 4.6961858745768505e-05,
"loss": 6.9316,
"step": 53300
},
{
"epoch": 3.94,
"learning_rate": 4.691163323321714e-05,
"loss": 7.1614,
"step": 53350
},
{
"epoch": 3.94,
"learning_rate": 4.6861407720665787e-05,
"loss": 7.193,
"step": 53400
},
{
"epoch": 3.94,
"learning_rate": 4.681118220811444e-05,
"loss": 7.5875,
"step": 53450
},
{
"epoch": 3.95,
"learning_rate": 4.676095669556308e-05,
"loss": 7.0836,
"step": 53500
},
{
"epoch": 3.95,
"learning_rate": 4.6710731183011725e-05,
"loss": 7.2054,
"step": 53550
},
{
"epoch": 3.95,
"learning_rate": 4.666050567046037e-05,
"loss": 6.95,
"step": 53600
},
{
"epoch": 3.96,
"learning_rate": 4.6610280157909014e-05,
"loss": 6.6366,
"step": 53650
},
{
"epoch": 3.96,
"learning_rate": 4.656005464535766e-05,
"loss": 6.7976,
"step": 53700
},
{
"epoch": 3.96,
"learning_rate": 4.65098291328063e-05,
"loss": 7.1371,
"step": 53750
},
{
"epoch": 3.97,
"learning_rate": 4.6459603620254946e-05,
"loss": 6.7457,
"step": 53800
},
{
"epoch": 3.97,
"learning_rate": 4.6409378107703597e-05,
"loss": 6.6139,
"step": 53850
},
{
"epoch": 3.98,
"learning_rate": 4.6359152595152234e-05,
"loss": 7.9291,
"step": 53900
},
{
"epoch": 3.98,
"learning_rate": 4.630892708260088e-05,
"loss": 7.4235,
"step": 53950
},
{
"epoch": 3.98,
"learning_rate": 4.625870157004952e-05,
"loss": 6.8609,
"step": 54000
},
{
"epoch": 3.99,
"learning_rate": 4.6208476057498166e-05,
"loss": 6.6128,
"step": 54050
},
{
"epoch": 3.99,
"learning_rate": 4.615825054494681e-05,
"loss": 7.0313,
"step": 54100
},
{
"epoch": 3.99,
"learning_rate": 4.610802503239546e-05,
"loss": 7.3654,
"step": 54150
},
{
"epoch": 4.0,
"learning_rate": 4.6057799519844105e-05,
"loss": 8.266,
"step": 54200
},
{
"epoch": 4.0,
"learning_rate": 4.600757400729275e-05,
"loss": 7.9471,
"step": 54250
},
{
"epoch": 4.01,
"learning_rate": 4.595734849474139e-05,
"loss": 6.0877,
"step": 54300
},
{
"epoch": 4.01,
"learning_rate": 4.590712298219004e-05,
"loss": 6.7453,
"step": 54350
},
{
"epoch": 4.01,
"learning_rate": 4.585689746963868e-05,
"loss": 5.8985,
"step": 54400
},
{
"epoch": 4.02,
"learning_rate": 4.580667195708732e-05,
"loss": 7.4527,
"step": 54450
},
{
"epoch": 4.02,
"learning_rate": 4.575644644453597e-05,
"loss": 7.0419,
"step": 54500
},
{
"epoch": 4.02,
"learning_rate": 4.570622093198461e-05,
"loss": 6.281,
"step": 54550
},
{
"epoch": 4.03,
"learning_rate": 4.565599541943326e-05,
"loss": 6.6096,
"step": 54600
},
{
"epoch": 4.03,
"learning_rate": 4.56057699068819e-05,
"loss": 7.0341,
"step": 54650
},
{
"epoch": 4.03,
"learning_rate": 4.5555544394330545e-05,
"loss": 6.621,
"step": 54700
},
{
"epoch": 4.04,
"learning_rate": 4.550531888177919e-05,
"loss": 7.4405,
"step": 54750
},
{
"epoch": 4.04,
"learning_rate": 4.5455093369227834e-05,
"loss": 7.2506,
"step": 54800
},
{
"epoch": 4.05,
"learning_rate": 4.540486785667648e-05,
"loss": 7.1534,
"step": 54850
},
{
"epoch": 4.05,
"learning_rate": 4.535464234412513e-05,
"loss": 6.5829,
"step": 54900
},
{
"epoch": 4.05,
"learning_rate": 4.530441683157377e-05,
"loss": 7.0338,
"step": 54950
},
{
"epoch": 4.06,
"learning_rate": 4.5254191319022417e-05,
"loss": 6.6234,
"step": 55000
},
{
"epoch": 4.06,
"learning_rate": 4.5203965806471054e-05,
"loss": 6.2412,
"step": 55050
},
{
"epoch": 4.06,
"learning_rate": 4.51537402939197e-05,
"loss": 6.3439,
"step": 55100
},
{
"epoch": 4.07,
"learning_rate": 4.510351478136834e-05,
"loss": 6.8272,
"step": 55150
},
{
"epoch": 4.07,
"learning_rate": 4.5053289268816986e-05,
"loss": 6.4758,
"step": 55200
},
{
"epoch": 4.08,
"learning_rate": 4.500306375626564e-05,
"loss": 6.434,
"step": 55250
},
{
"epoch": 4.08,
"learning_rate": 4.495283824371428e-05,
"loss": 6.5471,
"step": 55300
},
{
"epoch": 4.08,
"learning_rate": 4.4902612731162925e-05,
"loss": 6.5088,
"step": 55350
},
{
"epoch": 4.09,
"learning_rate": 4.485238721861157e-05,
"loss": 6.6941,
"step": 55400
},
{
"epoch": 4.09,
"learning_rate": 4.480216170606021e-05,
"loss": 6.3248,
"step": 55450
},
{
"epoch": 4.09,
"learning_rate": 4.475193619350886e-05,
"loss": 7.2989,
"step": 55500
},
{
"epoch": 4.1,
"learning_rate": 4.47017106809575e-05,
"loss": 7.0947,
"step": 55550
},
{
"epoch": 4.1,
"learning_rate": 4.4651485168406145e-05,
"loss": 6.4896,
"step": 55600
},
{
"epoch": 4.1,
"learning_rate": 4.460125965585479e-05,
"loss": 5.9249,
"step": 55650
},
{
"epoch": 4.11,
"learning_rate": 4.455103414330343e-05,
"loss": 6.7801,
"step": 55700
},
{
"epoch": 4.11,
"learning_rate": 4.450080863075208e-05,
"loss": 6.2216,
"step": 55750
},
{
"epoch": 4.12,
"learning_rate": 4.445058311820072e-05,
"loss": 6.8346,
"step": 55800
},
{
"epoch": 4.12,
"learning_rate": 4.4400357605649366e-05,
"loss": 7.2863,
"step": 55850
},
{
"epoch": 4.12,
"learning_rate": 4.435013209309801e-05,
"loss": 7.406,
"step": 55900
},
{
"epoch": 4.13,
"learning_rate": 4.429990658054666e-05,
"loss": 6.0165,
"step": 55950
},
{
"epoch": 4.13,
"learning_rate": 4.4249681067995304e-05,
"loss": 6.8052,
"step": 56000
},
{
"epoch": 4.13,
"learning_rate": 4.419945555544395e-05,
"loss": 6.679,
"step": 56050
},
{
"epoch": 4.14,
"learning_rate": 4.414923004289259e-05,
"loss": 6.2087,
"step": 56100
},
{
"epoch": 4.14,
"learning_rate": 4.4099004530341237e-05,
"loss": 6.5904,
"step": 56150
},
{
"epoch": 4.15,
"learning_rate": 4.4048779017789874e-05,
"loss": 6.4147,
"step": 56200
},
{
"epoch": 4.15,
"learning_rate": 4.399855350523852e-05,
"loss": 6.6465,
"step": 56250
},
{
"epoch": 4.15,
"learning_rate": 4.394832799268717e-05,
"loss": 7.11,
"step": 56300
},
{
"epoch": 4.16,
"learning_rate": 4.389810248013581e-05,
"loss": 7.0558,
"step": 56350
},
{
"epoch": 4.16,
"learning_rate": 4.384787696758446e-05,
"loss": 6.922,
"step": 56400
},
{
"epoch": 4.16,
"learning_rate": 4.37976514550331e-05,
"loss": 7.2125,
"step": 56450
},
{
"epoch": 4.17,
"learning_rate": 4.3747425942481745e-05,
"loss": 6.4719,
"step": 56500
},
{
"epoch": 4.17,
"learning_rate": 4.369720042993039e-05,
"loss": 7.204,
"step": 56550
},
{
"epoch": 4.17,
"learning_rate": 4.364697491737903e-05,
"loss": 7.2371,
"step": 56600
},
{
"epoch": 4.18,
"learning_rate": 4.359674940482768e-05,
"loss": 6.6131,
"step": 56650
},
{
"epoch": 4.18,
"learning_rate": 4.354652389227633e-05,
"loss": 6.6349,
"step": 56700
},
{
"epoch": 4.19,
"learning_rate": 4.3496298379724965e-05,
"loss": 5.9137,
"step": 56750
},
{
"epoch": 4.19,
"learning_rate": 4.344607286717361e-05,
"loss": 6.5402,
"step": 56800
},
{
"epoch": 4.19,
"learning_rate": 4.339584735462225e-05,
"loss": 7.3351,
"step": 56850
},
{
"epoch": 4.2,
"learning_rate": 4.33456218420709e-05,
"loss": 8.1387,
"step": 56900
},
{
"epoch": 4.2,
"learning_rate": 4.329539632951954e-05,
"loss": 7.0783,
"step": 56950
},
{
"epoch": 4.2,
"learning_rate": 4.324517081696819e-05,
"loss": 6.947,
"step": 57000
},
{
"epoch": 4.21,
"learning_rate": 4.3194945304416836e-05,
"loss": 6.1526,
"step": 57050
},
{
"epoch": 4.21,
"learning_rate": 4.314471979186548e-05,
"loss": 7.273,
"step": 57100
},
{
"epoch": 4.22,
"learning_rate": 4.3094494279314124e-05,
"loss": 7.0958,
"step": 57150
},
{
"epoch": 4.22,
"learning_rate": 4.304426876676277e-05,
"loss": 6.4413,
"step": 57200
},
{
"epoch": 4.22,
"learning_rate": 4.299404325421141e-05,
"loss": 6.597,
"step": 57250
},
{
"epoch": 4.23,
"learning_rate": 4.294381774166006e-05,
"loss": 6.6893,
"step": 57300
},
{
"epoch": 4.23,
"learning_rate": 4.28935922291087e-05,
"loss": 6.4746,
"step": 57350
},
{
"epoch": 4.23,
"learning_rate": 4.2843366716557345e-05,
"loss": 7.376,
"step": 57400
},
{
"epoch": 4.24,
"learning_rate": 4.279314120400599e-05,
"loss": 7.2823,
"step": 57450
},
{
"epoch": 4.24,
"learning_rate": 4.274291569145463e-05,
"loss": 6.3184,
"step": 57500
},
{
"epoch": 4.25,
"learning_rate": 4.269269017890328e-05,
"loss": 6.4526,
"step": 57550
},
{
"epoch": 4.25,
"learning_rate": 4.264246466635192e-05,
"loss": 6.7892,
"step": 57600
},
{
"epoch": 4.25,
"learning_rate": 4.2592239153800565e-05,
"loss": 6.2082,
"step": 57650
},
{
"epoch": 4.26,
"learning_rate": 4.254201364124921e-05,
"loss": 7.1488,
"step": 57700
},
{
"epoch": 4.26,
"learning_rate": 4.249178812869786e-05,
"loss": 6.9399,
"step": 57750
},
{
"epoch": 4.26,
"learning_rate": 4.2441562616146504e-05,
"loss": 6.8596,
"step": 57800
},
{
"epoch": 4.27,
"learning_rate": 4.239133710359515e-05,
"loss": 6.8899,
"step": 57850
},
{
"epoch": 4.27,
"learning_rate": 4.2341111591043785e-05,
"loss": 6.8196,
"step": 57900
},
{
"epoch": 4.27,
"learning_rate": 4.229088607849243e-05,
"loss": 7.5114,
"step": 57950
},
{
"epoch": 4.28,
"learning_rate": 4.224066056594107e-05,
"loss": 6.4122,
"step": 58000
},
{
"epoch": 4.28,
"learning_rate": 4.219043505338972e-05,
"loss": 6.9228,
"step": 58050
},
{
"epoch": 4.29,
"learning_rate": 4.214020954083837e-05,
"loss": 6.3687,
"step": 58100
},
{
"epoch": 4.29,
"learning_rate": 4.208998402828701e-05,
"loss": 6.8616,
"step": 58150
},
{
"epoch": 4.29,
"learning_rate": 4.2039758515735656e-05,
"loss": 6.002,
"step": 58200
},
{
"epoch": 4.3,
"learning_rate": 4.19895330031843e-05,
"loss": 6.0985,
"step": 58250
},
{
"epoch": 4.3,
"learning_rate": 4.1939307490632944e-05,
"loss": 6.5857,
"step": 58300
},
{
"epoch": 4.3,
"learning_rate": 4.188908197808159e-05,
"loss": 6.257,
"step": 58350
},
{
"epoch": 4.31,
"learning_rate": 4.183885646553023e-05,
"loss": 6.9222,
"step": 58400
},
{
"epoch": 4.31,
"learning_rate": 4.178863095297888e-05,
"loss": 6.7801,
"step": 58450
},
{
"epoch": 4.32,
"learning_rate": 4.173840544042752e-05,
"loss": 6.3861,
"step": 58500
},
{
"epoch": 4.32,
"learning_rate": 4.1688179927876165e-05,
"loss": 6.8685,
"step": 58550
},
{
"epoch": 4.32,
"learning_rate": 4.163795441532481e-05,
"loss": 6.9948,
"step": 58600
},
{
"epoch": 4.33,
"learning_rate": 4.158772890277345e-05,
"loss": 6.0965,
"step": 58650
},
{
"epoch": 4.33,
"learning_rate": 4.15375033902221e-05,
"loss": 7.282,
"step": 58700
},
{
"epoch": 4.33,
"learning_rate": 4.148727787767074e-05,
"loss": 7.6165,
"step": 58750
},
{
"epoch": 4.34,
"learning_rate": 4.143705236511939e-05,
"loss": 6.734,
"step": 58800
},
{
"epoch": 4.34,
"learning_rate": 4.1386826852568036e-05,
"loss": 6.0334,
"step": 58850
},
{
"epoch": 4.34,
"learning_rate": 4.133660134001668e-05,
"loss": 6.5306,
"step": 58900
},
{
"epoch": 4.35,
"learning_rate": 4.1286375827465324e-05,
"loss": 7.4324,
"step": 58950
},
{
"epoch": 4.35,
"learning_rate": 4.123615031491397e-05,
"loss": 7.234,
"step": 59000
},
{
"epoch": 4.36,
"learning_rate": 4.1185924802362605e-05,
"loss": 6.7196,
"step": 59050
},
{
"epoch": 4.36,
"learning_rate": 4.113569928981125e-05,
"loss": 6.0641,
"step": 59100
},
{
"epoch": 4.36,
"learning_rate": 4.10854737772599e-05,
"loss": 5.9373,
"step": 59150
},
{
"epoch": 4.37,
"learning_rate": 4.1035248264708544e-05,
"loss": 6.4428,
"step": 59200
},
{
"epoch": 4.37,
"learning_rate": 4.098502275215719e-05,
"loss": 6.7303,
"step": 59250
},
{
"epoch": 4.37,
"learning_rate": 4.093479723960583e-05,
"loss": 6.5585,
"step": 59300
},
{
"epoch": 4.38,
"learning_rate": 4.0884571727054476e-05,
"loss": 5.837,
"step": 59350
},
{
"epoch": 4.38,
"learning_rate": 4.083434621450312e-05,
"loss": 6.574,
"step": 59400
},
{
"epoch": 4.39,
"learning_rate": 4.0784120701951764e-05,
"loss": 7.4319,
"step": 59450
},
{
"epoch": 4.39,
"learning_rate": 4.073389518940041e-05,
"loss": 6.1092,
"step": 59500
},
{
"epoch": 4.39,
"learning_rate": 4.068366967684906e-05,
"loss": 6.4683,
"step": 59550
},
{
"epoch": 4.4,
"learning_rate": 4.06334441642977e-05,
"loss": 7.1323,
"step": 59600
},
{
"epoch": 4.4,
"learning_rate": 4.058321865174634e-05,
"loss": 6.7011,
"step": 59650
},
{
"epoch": 4.4,
"learning_rate": 4.0532993139194985e-05,
"loss": 6.7281,
"step": 59700
},
{
"epoch": 4.41,
"learning_rate": 4.048276762664363e-05,
"loss": 6.0361,
"step": 59750
},
{
"epoch": 4.41,
"learning_rate": 4.043254211409227e-05,
"loss": 6.5359,
"step": 59800
},
{
"epoch": 4.41,
"learning_rate": 4.0382316601540924e-05,
"loss": 7.3576,
"step": 59850
},
{
"epoch": 4.42,
"learning_rate": 4.033209108898957e-05,
"loss": 6.7324,
"step": 59900
},
{
"epoch": 4.42,
"learning_rate": 4.028186557643821e-05,
"loss": 7.1445,
"step": 59950
},
{
"epoch": 4.43,
"learning_rate": 4.0231640063886856e-05,
"loss": 5.426,
"step": 60000
},
{
"epoch": 4.43,
"eval_loss": 7.799332618713379,
"eval_runtime": 963.4222,
"eval_samples_per_second": 13.595,
"eval_steps_per_second": 3.399,
"eval_wer": 0.20775061946159337,
"step": 60000
}
],
"max_steps": 100051,
"num_train_epochs": 8,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}