|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4752526370140886, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 178.9465, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2e-05, |
|
"loss": 164.9707, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3e-05, |
|
"loss": 142.2782, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4e-05, |
|
"loss": 121.5122, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5e-05, |
|
"loss": 91.8622, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6e-05, |
|
"loss": 82.2062, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7e-05, |
|
"loss": 72.6893, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8e-05, |
|
"loss": 71.8709, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9e-05, |
|
"loss": 69.9995, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001, |
|
"loss": 70.6458, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.994977448744865e-05, |
|
"loss": 73.9929, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.989954897489729e-05, |
|
"loss": 66.52, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.984932346234594e-05, |
|
"loss": 65.8947, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.979909794979458e-05, |
|
"loss": 62.5809, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.974887243724323e-05, |
|
"loss": 61.212, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.969864692469187e-05, |
|
"loss": 68.2408, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.964842141214051e-05, |
|
"loss": 61.5308, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.959819589958916e-05, |
|
"loss": 58.9116, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.95479703870378e-05, |
|
"loss": 60.0702, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.949774487448646e-05, |
|
"loss": 57.6135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.944751936193509e-05, |
|
"loss": 50.9231, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.939729384938373e-05, |
|
"loss": 51.187, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.934706833683238e-05, |
|
"loss": 52.1127, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.929684282428102e-05, |
|
"loss": 47.4608, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.924661731172968e-05, |
|
"loss": 51.6108, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.919639179917831e-05, |
|
"loss": 46.5874, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.914616628662697e-05, |
|
"loss": 41.4706, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.90959407740756e-05, |
|
"loss": 43.7544, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.904571526152426e-05, |
|
"loss": 44.6039, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.899548974897289e-05, |
|
"loss": 41.4384, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.894526423642154e-05, |
|
"loss": 42.8289, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.889503872387019e-05, |
|
"loss": 39.9726, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.884481321131882e-05, |
|
"loss": 43.9533, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.879458769876748e-05, |
|
"loss": 38.7605, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.87443621862161e-05, |
|
"loss": 39.5425, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.869413667366476e-05, |
|
"loss": 37.588, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.86439111611134e-05, |
|
"loss": 39.7744, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.859368564856205e-05, |
|
"loss": 38.2154, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.85434601360107e-05, |
|
"loss": 35.0806, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.849323462345934e-05, |
|
"loss": 39.061, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.844300911090798e-05, |
|
"loss": 35.1544, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.839278359835663e-05, |
|
"loss": 38.123, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.834255808580527e-05, |
|
"loss": 33.1144, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.829233257325392e-05, |
|
"loss": 34.3476, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.824210706070256e-05, |
|
"loss": 29.5665, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.81918815481512e-05, |
|
"loss": 35.8756, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.814165603559985e-05, |
|
"loss": 37.2579, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.809143052304849e-05, |
|
"loss": 33.6245, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.804120501049714e-05, |
|
"loss": 35.6543, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.799097949794578e-05, |
|
"loss": 36.7847, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.794075398539442e-05, |
|
"loss": 33.463, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.789052847284307e-05, |
|
"loss": 32.2215, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.784030296029171e-05, |
|
"loss": 33.4301, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.779007744774036e-05, |
|
"loss": 29.9579, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.773985193518901e-05, |
|
"loss": 31.9141, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.768962642263764e-05, |
|
"loss": 33.2049, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.763940091008629e-05, |
|
"loss": 32.8774, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.758917539753493e-05, |
|
"loss": 29.0858, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.753894988498358e-05, |
|
"loss": 30.1145, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.748872437243222e-05, |
|
"loss": 27.6986, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.743849885988087e-05, |
|
"loss": 31.7807, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.738827334732952e-05, |
|
"loss": 30.5108, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.733804783477815e-05, |
|
"loss": 31.0909, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.728782232222681e-05, |
|
"loss": 27.9057, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.723759680967544e-05, |
|
"loss": 29.7323, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.71873712971241e-05, |
|
"loss": 29.7527, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.713714578457273e-05, |
|
"loss": 29.1442, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.708692027202137e-05, |
|
"loss": 30.8906, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.703669475947003e-05, |
|
"loss": 26.8419, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.698646924691866e-05, |
|
"loss": 29.2181, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.693624373436732e-05, |
|
"loss": 27.6549, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.688601822181595e-05, |
|
"loss": 34.0701, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.683579270926461e-05, |
|
"loss": 24.7487, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.678556719671325e-05, |
|
"loss": 30.0266, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.67353416841619e-05, |
|
"loss": 25.5011, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.668511617161054e-05, |
|
"loss": 26.1437, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.663489065905918e-05, |
|
"loss": 23.2303, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.658466514650783e-05, |
|
"loss": 26.357, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.653443963395646e-05, |
|
"loss": 27.2201, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.648421412140512e-05, |
|
"loss": 25.5695, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.643398860885376e-05, |
|
"loss": 24.8346, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.63837630963024e-05, |
|
"loss": 22.3957, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.633353758375105e-05, |
|
"loss": 24.9532, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.628331207119969e-05, |
|
"loss": 23.1574, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.623308655864834e-05, |
|
"loss": 23.7018, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.618286104609698e-05, |
|
"loss": 25.1433, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.613263553354562e-05, |
|
"loss": 25.0571, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.608241002099427e-05, |
|
"loss": 24.2231, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.603218450844291e-05, |
|
"loss": 23.0983, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.598195899589156e-05, |
|
"loss": 25.0078, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.59317334833402e-05, |
|
"loss": 20.6933, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.588150797078884e-05, |
|
"loss": 23.6196, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.583128245823749e-05, |
|
"loss": 25.2331, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.578105694568613e-05, |
|
"loss": 24.7932, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.573083143313478e-05, |
|
"loss": 24.3586, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.568060592058342e-05, |
|
"loss": 22.7161, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.563038040803208e-05, |
|
"loss": 22.4188, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.558015489548071e-05, |
|
"loss": 21.6516, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.552992938292937e-05, |
|
"loss": 21.78, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.5479703870378e-05, |
|
"loss": 21.0172, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.542947835782665e-05, |
|
"loss": 22.4624, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.537925284527528e-05, |
|
"loss": 23.6615, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.532902733272393e-05, |
|
"loss": 21.8091, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.527880182017259e-05, |
|
"loss": 21.4173, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.522857630762122e-05, |
|
"loss": 20.5415, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.517835079506987e-05, |
|
"loss": 21.0639, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.51281252825185e-05, |
|
"loss": 21.6078, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.507789976996716e-05, |
|
"loss": 19.4142, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.50276742574158e-05, |
|
"loss": 20.2504, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.497744874486445e-05, |
|
"loss": 23.8683, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.49272232323131e-05, |
|
"loss": 19.7559, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.487699771976174e-05, |
|
"loss": 21.1743, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.482677220721038e-05, |
|
"loss": 21.1908, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.477654669465901e-05, |
|
"loss": 20.9591, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.472632118210767e-05, |
|
"loss": 20.9036, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.46760956695563e-05, |
|
"loss": 22.249, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.462587015700496e-05, |
|
"loss": 19.1093, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.45756446444536e-05, |
|
"loss": 21.2714, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.452541913190225e-05, |
|
"loss": 21.3794, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.447519361935089e-05, |
|
"loss": 20.0326, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.442496810679954e-05, |
|
"loss": 19.8004, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.437474259424818e-05, |
|
"loss": 19.0229, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.432451708169682e-05, |
|
"loss": 17.6587, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.427429156914547e-05, |
|
"loss": 21.9247, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.422406605659411e-05, |
|
"loss": 19.743, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.417384054404276e-05, |
|
"loss": 22.9746, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.41236150314914e-05, |
|
"loss": 19.6693, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.407338951894004e-05, |
|
"loss": 19.1141, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.402316400638869e-05, |
|
"loss": 18.3847, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.397293849383733e-05, |
|
"loss": 18.9357, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.392271298128598e-05, |
|
"loss": 18.9316, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.387248746873462e-05, |
|
"loss": 20.9141, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.382226195618326e-05, |
|
"loss": 18.7472, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.377203644363192e-05, |
|
"loss": 18.8577, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.372181093108055e-05, |
|
"loss": 17.8061, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.36715854185292e-05, |
|
"loss": 19.4687, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.362135990597784e-05, |
|
"loss": 19.5103, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.357113439342648e-05, |
|
"loss": 18.5319, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.352090888087514e-05, |
|
"loss": 20.16, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.347068336832377e-05, |
|
"loss": 18.1913, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.342045785577243e-05, |
|
"loss": 21.341, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.337023234322106e-05, |
|
"loss": 16.7701, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.332000683066972e-05, |
|
"loss": 18.045, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.326978131811835e-05, |
|
"loss": 16.0393, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.3219555805567e-05, |
|
"loss": 17.4833, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.316933029301565e-05, |
|
"loss": 17.3978, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.31191047804643e-05, |
|
"loss": 18.2649, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.306887926791294e-05, |
|
"loss": 16.3891, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.301865375536157e-05, |
|
"loss": 21.4399, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.296842824281023e-05, |
|
"loss": 16.3082, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.291820273025886e-05, |
|
"loss": 14.8713, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.286797721770751e-05, |
|
"loss": 16.3099, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.281775170515616e-05, |
|
"loss": 17.8771, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.27675261926048e-05, |
|
"loss": 17.1421, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.271730068005345e-05, |
|
"loss": 16.6478, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.266707516750209e-05, |
|
"loss": 15.3247, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.261684965495073e-05, |
|
"loss": 17.6577, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.256662414239938e-05, |
|
"loss": 18.8549, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.251639862984802e-05, |
|
"loss": 17.4187, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.246617311729667e-05, |
|
"loss": 15.6643, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.241594760474531e-05, |
|
"loss": 17.1987, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.236572209219396e-05, |
|
"loss": 18.1712, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.23154965796426e-05, |
|
"loss": 15.8015, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.226527106709124e-05, |
|
"loss": 19.064, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.221504555453989e-05, |
|
"loss": 18.2748, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.216482004198853e-05, |
|
"loss": 15.0679, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.211459452943718e-05, |
|
"loss": 17.995, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.206436901688582e-05, |
|
"loss": 17.467, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.201414350433448e-05, |
|
"loss": 18.6665, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.196391799178311e-05, |
|
"loss": 17.2848, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.191369247923175e-05, |
|
"loss": 14.4767, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.18634669666804e-05, |
|
"loss": 17.5444, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.181324145412904e-05, |
|
"loss": 14.4661, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.176301594157768e-05, |
|
"loss": 16.3339, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.171279042902633e-05, |
|
"loss": 17.5122, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.166256491647499e-05, |
|
"loss": 16.7631, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.161233940392362e-05, |
|
"loss": 16.5193, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.156211389137227e-05, |
|
"loss": 17.8364, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.15118883788209e-05, |
|
"loss": 16.2916, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.146166286626956e-05, |
|
"loss": 14.1719, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.141143735371819e-05, |
|
"loss": 18.2987, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.136121184116684e-05, |
|
"loss": 17.4248, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.13109863286155e-05, |
|
"loss": 16.1862, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.126076081606412e-05, |
|
"loss": 16.3134, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.121053530351278e-05, |
|
"loss": 14.9158, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.116030979096141e-05, |
|
"loss": 15.2504, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.111008427841007e-05, |
|
"loss": 14.1967, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.105985876585871e-05, |
|
"loss": 17.3165, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.100963325330736e-05, |
|
"loss": 14.5912, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.0959407740756e-05, |
|
"loss": 17.5593, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.090918222820465e-05, |
|
"loss": 16.3421, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.085895671565329e-05, |
|
"loss": 16.2821, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.080873120310192e-05, |
|
"loss": 16.4985, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.075850569055058e-05, |
|
"loss": 16.1138, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.070828017799922e-05, |
|
"loss": 16.3997, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.065805466544787e-05, |
|
"loss": 15.518, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.060782915289651e-05, |
|
"loss": 13.8424, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.055760364034515e-05, |
|
"loss": 15.0784, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.05073781277938e-05, |
|
"loss": 14.0163, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.045715261524244e-05, |
|
"loss": 16.7863, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.040692710269109e-05, |
|
"loss": 13.6715, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.035670159013973e-05, |
|
"loss": 15.1071, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.030647607758837e-05, |
|
"loss": 14.2658, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.025625056503703e-05, |
|
"loss": 15.1115, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.020602505248566e-05, |
|
"loss": 14.028, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.015579953993431e-05, |
|
"loss": 13.3066, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.010557402738295e-05, |
|
"loss": 14.1185, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.00553485148316e-05, |
|
"loss": 14.061, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.000512300228024e-05, |
|
"loss": 15.2439, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.995489748972888e-05, |
|
"loss": 13.3617, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 8.990467197717754e-05, |
|
"loss": 14.5514, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 8.985444646462617e-05, |
|
"loss": 15.2426, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.980422095207483e-05, |
|
"loss": 16.6418, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.975399543952346e-05, |
|
"loss": 13.3146, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.970376992697212e-05, |
|
"loss": 14.9333, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.965354441442075e-05, |
|
"loss": 14.4502, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.960331890186939e-05, |
|
"loss": 14.7886, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.955309338931805e-05, |
|
"loss": 15.0266, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.950286787676668e-05, |
|
"loss": 14.543, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.945264236421534e-05, |
|
"loss": 15.8078, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.940241685166397e-05, |
|
"loss": 13.6052, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.935219133911263e-05, |
|
"loss": 14.2995, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.930196582656126e-05, |
|
"loss": 15.732, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.925174031400991e-05, |
|
"loss": 14.0573, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.920151480145856e-05, |
|
"loss": 17.5941, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.91512892889072e-05, |
|
"loss": 14.7829, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.910106377635585e-05, |
|
"loss": 14.6669, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.905083826380448e-05, |
|
"loss": 14.3315, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.900061275125313e-05, |
|
"loss": 14.2639, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.895038723870176e-05, |
|
"loss": 14.3226, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.890016172615042e-05, |
|
"loss": 14.4975, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.884993621359907e-05, |
|
"loss": 14.8436, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.879971070104771e-05, |
|
"loss": 13.8481, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.874948518849635e-05, |
|
"loss": 12.8151, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.8699259675945e-05, |
|
"loss": 13.1659, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.864903416339364e-05, |
|
"loss": 15.0919, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.859880865084229e-05, |
|
"loss": 14.4382, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.854858313829093e-05, |
|
"loss": 14.0989, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.849835762573957e-05, |
|
"loss": 14.5763, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.844813211318822e-05, |
|
"loss": 13.4144, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.839790660063686e-05, |
|
"loss": 15.6018, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.83476810880855e-05, |
|
"loss": 14.7849, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.829745557553415e-05, |
|
"loss": 14.441, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.82472300629828e-05, |
|
"loss": 14.2135, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.819700455043144e-05, |
|
"loss": 17.1245, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.814677903788008e-05, |
|
"loss": 14.6629, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.809655352532873e-05, |
|
"loss": 16.6715, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.804632801277738e-05, |
|
"loss": 13.0133, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.799610250022601e-05, |
|
"loss": 14.1551, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.794587698767466e-05, |
|
"loss": 14.019, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.78956514751233e-05, |
|
"loss": 14.4279, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.784542596257195e-05, |
|
"loss": 12.5293, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.77952004500206e-05, |
|
"loss": 15.0403, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.774497493746924e-05, |
|
"loss": 13.8193, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.769474942491789e-05, |
|
"loss": 13.1564, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.764452391236652e-05, |
|
"loss": 14.6415, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.759429839981518e-05, |
|
"loss": 12.2339, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.754407288726381e-05, |
|
"loss": 12.1604, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.749384737471247e-05, |
|
"loss": 15.4939, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.744362186216111e-05, |
|
"loss": 13.9713, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.739339634960976e-05, |
|
"loss": 14.0986, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.73431708370584e-05, |
|
"loss": 13.6334, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.729294532450703e-05, |
|
"loss": 13.5201, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.724271981195569e-05, |
|
"loss": 14.3793, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.719249429940432e-05, |
|
"loss": 13.1741, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.714226878685298e-05, |
|
"loss": 11.7782, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.709204327430162e-05, |
|
"loss": 12.2758, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.704181776175027e-05, |
|
"loss": 13.1723, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.699159224919891e-05, |
|
"loss": 14.0858, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.694136673664755e-05, |
|
"loss": 11.2836, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.68911412240962e-05, |
|
"loss": 15.7226, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.684091571154484e-05, |
|
"loss": 15.8889, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.679069019899349e-05, |
|
"loss": 12.2185, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.674046468644213e-05, |
|
"loss": 11.4647, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.669023917389077e-05, |
|
"loss": 13.1238, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.664001366133942e-05, |
|
"loss": 11.909, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.658978814878806e-05, |
|
"loss": 12.5478, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.65395626362367e-05, |
|
"loss": 13.017, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.648933712368535e-05, |
|
"loss": 12.9134, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.6439111611134e-05, |
|
"loss": 13.3485, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.638888609858264e-05, |
|
"loss": 11.4706, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.633866058603128e-05, |
|
"loss": 11.1063, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.628843507347994e-05, |
|
"loss": 12.7408, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.623820956092857e-05, |
|
"loss": 12.0689, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.618798404837721e-05, |
|
"loss": 11.0724, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.613775853582586e-05, |
|
"loss": 12.5685, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.60875330232745e-05, |
|
"loss": 12.7776, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.603730751072315e-05, |
|
"loss": 11.3066, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.598708199817179e-05, |
|
"loss": 13.06, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.593685648562045e-05, |
|
"loss": 15.6523, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.588663097306908e-05, |
|
"loss": 12.019, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.583640546051774e-05, |
|
"loss": 11.0941, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.578617994796637e-05, |
|
"loss": 12.4755, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.573595443541502e-05, |
|
"loss": 13.7012, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.568572892286366e-05, |
|
"loss": 12.2024, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.56355034103123e-05, |
|
"loss": 12.4744, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.558527789776096e-05, |
|
"loss": 12.3234, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.553505238520959e-05, |
|
"loss": 12.5616, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.548482687265824e-05, |
|
"loss": 11.9559, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.543460136010688e-05, |
|
"loss": 12.0734, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.538437584755553e-05, |
|
"loss": 13.0341, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.533415033500418e-05, |
|
"loss": 12.7406, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.528392482245282e-05, |
|
"loss": 11.7258, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.523369930990147e-05, |
|
"loss": 11.8709, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.518347379735011e-05, |
|
"loss": 11.7021, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.513324828479875e-05, |
|
"loss": 13.2674, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.508302277224738e-05, |
|
"loss": 11.9099, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.503279725969604e-05, |
|
"loss": 11.7841, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.498257174714469e-05, |
|
"loss": 11.9573, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.493234623459333e-05, |
|
"loss": 11.7211, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.488212072204197e-05, |
|
"loss": 12.3513, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.483189520949062e-05, |
|
"loss": 11.0709, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.478166969693926e-05, |
|
"loss": 11.6544, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.47314441843879e-05, |
|
"loss": 11.8285, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.468121867183655e-05, |
|
"loss": 10.4208, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.46309931592852e-05, |
|
"loss": 10.7821, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.458076764673384e-05, |
|
"loss": 13.2724, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.45305421341825e-05, |
|
"loss": 10.9219, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.448031662163113e-05, |
|
"loss": 12.2532, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.443009110907977e-05, |
|
"loss": 11.0132, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.437986559652841e-05, |
|
"loss": 12.319, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.432964008397706e-05, |
|
"loss": 12.9871, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.42794145714257e-05, |
|
"loss": 12.0625, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.422918905887435e-05, |
|
"loss": 13.4629, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.4178963546323e-05, |
|
"loss": 10.9291, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.412873803377163e-05, |
|
"loss": 13.7719, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.407851252122029e-05, |
|
"loss": 11.3634, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.402828700866892e-05, |
|
"loss": 12.7941, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.397806149611758e-05, |
|
"loss": 11.8863, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.392783598356621e-05, |
|
"loss": 9.5225, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.387761047101485e-05, |
|
"loss": 12.983, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.382738495846351e-05, |
|
"loss": 11.8489, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.377715944591214e-05, |
|
"loss": 11.8122, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.37269339333608e-05, |
|
"loss": 12.3387, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.367670842080943e-05, |
|
"loss": 13.4648, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.362648290825809e-05, |
|
"loss": 10.2301, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.357625739570672e-05, |
|
"loss": 11.492, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.352603188315538e-05, |
|
"loss": 12.5997, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.347580637060402e-05, |
|
"loss": 11.5588, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.342558085805266e-05, |
|
"loss": 11.8627, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.337535534550131e-05, |
|
"loss": 13.2469, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.332512983294994e-05, |
|
"loss": 10.4327, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.32749043203986e-05, |
|
"loss": 12.7566, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.322467880784723e-05, |
|
"loss": 11.0729, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.317445329529588e-05, |
|
"loss": 12.3484, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.312422778274453e-05, |
|
"loss": 10.5193, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.307400227019317e-05, |
|
"loss": 12.2369, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.302377675764182e-05, |
|
"loss": 12.2976, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.297355124509046e-05, |
|
"loss": 12.3852, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.29233257325391e-05, |
|
"loss": 11.2137, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.287310021998775e-05, |
|
"loss": 11.609, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.282287470743639e-05, |
|
"loss": 13.3339, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.277264919488504e-05, |
|
"loss": 11.4263, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.272242368233368e-05, |
|
"loss": 12.6949, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.267219816978233e-05, |
|
"loss": 11.4767, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.262197265723097e-05, |
|
"loss": 12.2225, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.257174714467961e-05, |
|
"loss": 11.0755, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.252152163212826e-05, |
|
"loss": 11.9677, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.24712961195769e-05, |
|
"loss": 11.098, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.242107060702555e-05, |
|
"loss": 11.1102, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.237084509447419e-05, |
|
"loss": 11.4985, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 8.232061958192285e-05, |
|
"loss": 11.7356, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 8.227039406937148e-05, |
|
"loss": 11.3336, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 8.222016855682012e-05, |
|
"loss": 11.0448, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.216994304426877e-05, |
|
"loss": 10.9986, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.211971753171741e-05, |
|
"loss": 10.768, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.206949201916607e-05, |
|
"loss": 11.6844, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.20192665066147e-05, |
|
"loss": 11.5615, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.196904099406336e-05, |
|
"loss": 11.4019, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.191881548151199e-05, |
|
"loss": 12.1784, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.186858996896064e-05, |
|
"loss": 12.4565, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.181836445640927e-05, |
|
"loss": 11.0557, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.176813894385793e-05, |
|
"loss": 12.1892, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.171791343130658e-05, |
|
"loss": 12.0531, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.166768791875522e-05, |
|
"loss": 10.1791, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.161746240620386e-05, |
|
"loss": 11.2501, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.15672368936525e-05, |
|
"loss": 9.92, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.151701138110115e-05, |
|
"loss": 10.0603, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.146678586854978e-05, |
|
"loss": 10.9477, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.141656035599844e-05, |
|
"loss": 9.7579, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.136633484344708e-05, |
|
"loss": 11.243, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.131610933089573e-05, |
|
"loss": 11.0069, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.126588381834437e-05, |
|
"loss": 9.7387, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.121565830579302e-05, |
|
"loss": 11.4624, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.116543279324166e-05, |
|
"loss": 12.1299, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.11152072806903e-05, |
|
"loss": 12.2796, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.106498176813895e-05, |
|
"loss": 10.3295, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.101475625558759e-05, |
|
"loss": 10.0709, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.096453074303624e-05, |
|
"loss": 11.0725, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.091430523048488e-05, |
|
"loss": 10.7882, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.086407971793352e-05, |
|
"loss": 11.4124, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.081385420538217e-05, |
|
"loss": 10.4941, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.076362869283081e-05, |
|
"loss": 11.8687, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.071340318027946e-05, |
|
"loss": 11.3221, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.06631776677281e-05, |
|
"loss": 10.2167, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.061295215517675e-05, |
|
"loss": 10.5425, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.05627266426254e-05, |
|
"loss": 11.2982, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.051250113007403e-05, |
|
"loss": 12.0685, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.046227561752268e-05, |
|
"loss": 10.6613, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.041205010497132e-05, |
|
"loss": 10.8245, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 10.409339904785156, |
|
"eval_runtime": 890.9956, |
|
"eval_samples_per_second": 14.7, |
|
"eval_steps_per_second": 3.676, |
|
"eval_wer": 0.2624627273109067, |
|
"step": 20000 |
|
} |
|
], |
|
"max_steps": 100051, |
|
"num_train_epochs": 8, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|