{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 280880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.5609999287800016e-09, "loss": 0.0011, "step": 1 }, { "epoch": 0.1, "learning_rate": 5.003204899935903e-06, "loss": 0.0046, "step": 1405 }, { "epoch": 0.2, "learning_rate": 1.0006409799871805e-05, "loss": 0.005, "step": 2810 }, { "epoch": 0.3, "learning_rate": 1.5009614699807706e-05, "loss": 0.0052, "step": 4215 }, { "epoch": 0.4, "learning_rate": 2.001281959974361e-05, "loss": 0.005, "step": 5620 }, { "epoch": 0.5, "learning_rate": 2.5016024499679513e-05, "loss": 0.0048, "step": 7025 }, { "epoch": 0.6, "learning_rate": 3.001922939961541e-05, "loss": 0.0045, "step": 8430 }, { "epoch": 0.7, "learning_rate": 3.5022434299551316e-05, "loss": 0.0049, "step": 9835 }, { "epoch": 0.8, "learning_rate": 4.002563919948722e-05, "loss": 0.0051, "step": 11240 }, { "epoch": 0.9, "learning_rate": 4.502884409942312e-05, "loss": 0.0052, "step": 12645 }, { "epoch": 1.0, "eval_loss": 0.027173755690455437, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 13.945, "eval_samples_per_second": 17.928, "eval_steps_per_second": 1.219, "step": 14041 }, { "epoch": 1.0, "learning_rate": 5.0032048999359025e-05, "loss": 0.0045, "step": 14050 }, { "epoch": 1.1, "learning_rate": 5.5035253899294924e-05, "loss": 0.0047, "step": 15455 }, { "epoch": 1.2, "learning_rate": 6.003845879923082e-05, "loss": 0.0044, "step": 16860 }, { "epoch": 1.3, "learning_rate": 6.504166369916673e-05, "loss": 0.0045, "step": 18265 }, { "epoch": 1.4, "learning_rate": 7.002990600968385e-05, "loss": 0.0046, "step": 19670 }, { "epoch": 1.5, "learning_rate": 7.50320421532327e-05, "loss": 0.0043, "step": 21075 }, { "epoch": 1.6, "learning_rate": 8.003417829678155e-05, "loss": 0.0042, "step": 22480 }, { "epoch": 1.7, "learning_rate": 8.503631444033039e-05, "loss": 0.0046, "step": 23885 }, { "epoch": 1.8, "learning_rate": 9.003845058387924e-05, "loss": 0.0043, "step": 25290 }, { "epoch": 1.9, "learning_rate": 9.504058672742809e-05, "loss": 0.0045, "step": 26695 }, { "epoch": 2.0, "eval_loss": 0.03272660821676254, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 12.7866, "eval_samples_per_second": 19.552, "eval_steps_per_second": 1.33, "step": 28088 }, { "epoch": 2.0, "learning_rate": 9.99952530143359e-05, "loss": 0.0046, "step": 28100 }, { "epoch": 2.1, "learning_rate": 9.943946010949714e-05, "loss": 0.0042, "step": 29505 }, { "epoch": 2.2, "learning_rate": 9.888366720465838e-05, "loss": 0.0043, "step": 30910 }, { "epoch": 2.3, "learning_rate": 9.832787429981962e-05, "loss": 0.0044, "step": 32315 }, { "epoch": 2.4, "learning_rate": 9.777208139498087e-05, "loss": 0.0045, "step": 33720 }, { "epoch": 2.5, "learning_rate": 9.721628849014209e-05, "loss": 0.0041, "step": 35125 }, { "epoch": 2.6, "learning_rate": 9.666049558530333e-05, "loss": 0.0043, "step": 36530 }, { "epoch": 2.7, "learning_rate": 9.610470268046457e-05, "loss": 0.0046, "step": 37935 }, { "epoch": 2.8, "learning_rate": 9.554890977562582e-05, "loss": 0.0043, "step": 39340 }, { "epoch": 2.9, "learning_rate": 9.499311687078706e-05, "loss": 0.0043, "step": 40745 }, { "epoch": 3.0, "eval_loss": 0.03169206902384758, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.3553, "eval_samples_per_second": 20.234, "eval_steps_per_second": 1.376, "step": 42132 }, { "epoch": 3.0, "learning_rate": 9.44373239659483e-05, "loss": 0.0048, "step": 42150 }, { "epoch": 3.1, "learning_rate": 9.388153106110953e-05, "loss": 0.0034, "step": 43555 }, { "epoch": 3.2, "learning_rate": 9.332573815627077e-05, "loss": 0.0036, "step": 44960 }, { "epoch": 3.3, "learning_rate": 9.276994525143201e-05, "loss": 0.0037, "step": 46365 }, { "epoch": 3.4, "learning_rate": 9.221415234659325e-05, "loss": 0.004, "step": 47770 }, { "epoch": 3.5, "learning_rate": 9.16583594417545e-05, "loss": 0.0041, "step": 49175 }, { "epoch": 3.6, "learning_rate": 9.110256653691574e-05, "loss": 0.0039, "step": 50580 }, { "epoch": 3.7, "learning_rate": 9.054677363207696e-05, "loss": 0.0039, "step": 51985 }, { "epoch": 3.8, "learning_rate": 8.99909807272382e-05, "loss": 0.0039, "step": 53390 }, { "epoch": 3.9, "learning_rate": 8.943518782239945e-05, "loss": 0.0042, "step": 54795 }, { "epoch": 4.0, "eval_loss": 0.031554438173770905, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.1943, "eval_samples_per_second": 20.501, "eval_steps_per_second": 1.394, "step": 56176 }, { "epoch": 4.0, "learning_rate": 8.887939491756069e-05, "loss": 0.004, "step": 56200 }, { "epoch": 4.1, "learning_rate": 8.832360201272193e-05, "loss": 0.0034, "step": 57605 }, { "epoch": 4.2, "learning_rate": 8.776780910788317e-05, "loss": 0.0033, "step": 59010 }, { "epoch": 4.3, "learning_rate": 8.72120162030444e-05, "loss": 0.0034, "step": 60415 }, { "epoch": 4.4, "learning_rate": 8.665622329820564e-05, "loss": 0.0034, "step": 61820 }, { "epoch": 4.5, "learning_rate": 8.610043039336688e-05, "loss": 0.0032, "step": 63225 }, { "epoch": 4.6, "learning_rate": 8.554463748852812e-05, "loss": 0.0035, "step": 64630 }, { "epoch": 4.7, "learning_rate": 8.498884458368937e-05, "loss": 0.0035, "step": 66035 }, { "epoch": 4.8, "learning_rate": 8.443305167885061e-05, "loss": 0.0035, "step": 67440 }, { "epoch": 4.9, "learning_rate": 8.387725877401183e-05, "loss": 0.0035, "step": 68845 }, { "epoch": 5.0, "eval_loss": 0.03568544238805771, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.3273, "eval_samples_per_second": 20.28, "eval_steps_per_second": 1.379, "step": 70220 }, { "epoch": 5.0, "learning_rate": 8.332146586917307e-05, "loss": 0.0037, "step": 70250 }, { "epoch": 5.1, "learning_rate": 8.276567296433432e-05, "loss": 0.0028, "step": 71655 }, { "epoch": 5.2, "learning_rate": 8.220988005949556e-05, "loss": 0.0029, "step": 73060 }, { "epoch": 5.3, "learning_rate": 8.16540871546568e-05, "loss": 0.003, "step": 74465 }, { "epoch": 5.4, "learning_rate": 8.109829424981804e-05, "loss": 0.0031, "step": 75870 }, { "epoch": 5.5, "learning_rate": 8.054250134497927e-05, "loss": 0.0032, "step": 77275 }, { "epoch": 5.6, "learning_rate": 7.998670844014051e-05, "loss": 0.003, "step": 78680 }, { "epoch": 5.7, "learning_rate": 7.943091553530175e-05, "loss": 0.0032, "step": 80085 }, { "epoch": 5.8, "learning_rate": 7.8875122630463e-05, "loss": 0.0031, "step": 81490 }, { "epoch": 5.9, "learning_rate": 7.831932972562424e-05, "loss": 0.0032, "step": 82895 }, { "epoch": 6.0, "eval_loss": 0.03648597374558449, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.1121, "eval_samples_per_second": 20.641, "eval_steps_per_second": 1.404, "step": 84264 }, { "epoch": 6.0, "learning_rate": 7.776353682078548e-05, "loss": 0.0032, "step": 84300 }, { "epoch": 6.1, "learning_rate": 7.72077439159467e-05, "loss": 0.0027, "step": 85705 }, { "epoch": 6.2, "learning_rate": 7.665195101110795e-05, "loss": 0.0026, "step": 87110 }, { "epoch": 6.3, "learning_rate": 7.609615810626919e-05, "loss": 0.0028, "step": 88515 }, { "epoch": 6.4, "learning_rate": 7.554036520143043e-05, "loss": 0.0029, "step": 89920 }, { "epoch": 6.5, "learning_rate": 7.498457229659167e-05, "loss": 0.0026, "step": 91325 }, { "epoch": 6.6, "learning_rate": 7.442877939175292e-05, "loss": 0.0028, "step": 92730 }, { "epoch": 6.7, "learning_rate": 7.387298648691414e-05, "loss": 0.0031, "step": 94135 }, { "epoch": 6.8, "learning_rate": 7.331719358207538e-05, "loss": 0.0027, "step": 95540 }, { "epoch": 6.9, "learning_rate": 7.276140067723662e-05, "loss": 0.0027, "step": 96945 }, { "epoch": 7.0, "eval_loss": 0.04028007388114929, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.1827, "eval_samples_per_second": 20.521, "eval_steps_per_second": 1.395, "step": 98308 }, { "epoch": 7.0, "learning_rate": 7.220560777239787e-05, "loss": 0.0028, "step": 98350 }, { "epoch": 7.1, "learning_rate": 7.164981486755911e-05, "loss": 0.0027, "step": 99755 }, { "epoch": 7.2, "learning_rate": 7.109402196272035e-05, "loss": 0.0025, "step": 101160 }, { "epoch": 7.3, "learning_rate": 7.053822905788158e-05, "loss": 0.0025, "step": 102565 }, { "epoch": 7.4, "learning_rate": 6.998243615304282e-05, "loss": 0.0024, "step": 103970 }, { "epoch": 7.5, "learning_rate": 6.942664324820406e-05, "loss": 0.0026, "step": 105375 }, { "epoch": 7.6, "learning_rate": 6.88708503433653e-05, "loss": 0.0026, "step": 106780 }, { "epoch": 7.7, "learning_rate": 6.831505743852654e-05, "loss": 0.0026, "step": 108185 }, { "epoch": 7.8, "learning_rate": 6.775926453368779e-05, "loss": 0.0025, "step": 109590 }, { "epoch": 7.9, "learning_rate": 6.720347162884901e-05, "loss": 0.0027, "step": 110995 }, { "epoch": 8.0, "eval_loss": 0.03977961093187332, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.1864, "eval_samples_per_second": 20.515, "eval_steps_per_second": 1.395, "step": 112352 }, { "epoch": 8.0, "learning_rate": 6.664767872401025e-05, "loss": 0.0028, "step": 112400 }, { "epoch": 8.1, "learning_rate": 6.60918858191715e-05, "loss": 0.0023, "step": 113805 }, { "epoch": 8.2, "learning_rate": 6.553609291433274e-05, "loss": 0.0023, "step": 115210 }, { "epoch": 8.3, "learning_rate": 6.498030000949398e-05, "loss": 0.0022, "step": 116615 }, { "epoch": 8.4, "learning_rate": 6.442450710465522e-05, "loss": 0.0023, "step": 118020 }, { "epoch": 8.5, "learning_rate": 6.386871419981645e-05, "loss": 0.0024, "step": 119425 }, { "epoch": 8.6, "learning_rate": 6.331292129497769e-05, "loss": 0.0025, "step": 120830 }, { "epoch": 8.7, "learning_rate": 6.275712839013893e-05, "loss": 0.0024, "step": 122235 }, { "epoch": 8.8, "learning_rate": 6.220133548530017e-05, "loss": 0.0022, "step": 123640 }, { "epoch": 8.9, "learning_rate": 6.164554258046142e-05, "loss": 0.0023, "step": 125045 }, { "epoch": 9.0, "eval_loss": 0.040445487946271896, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.3201, "eval_samples_per_second": 20.292, "eval_steps_per_second": 1.38, "step": 126396 }, { "epoch": 9.0, "learning_rate": 6.108974967562266e-05, "loss": 0.0024, "step": 126450 }, { "epoch": 9.1, "learning_rate": 6.053395677078388e-05, "loss": 0.002, "step": 127855 }, { "epoch": 9.2, "learning_rate": 5.9978163865945126e-05, "loss": 0.0021, "step": 129260 }, { "epoch": 9.3, "learning_rate": 5.9422370961106364e-05, "loss": 0.0022, "step": 130665 }, { "epoch": 9.4, "learning_rate": 5.886657805626761e-05, "loss": 0.0023, "step": 132070 }, { "epoch": 9.5, "learning_rate": 5.831078515142885e-05, "loss": 0.0022, "step": 133475 }, { "epoch": 9.6, "learning_rate": 5.775499224659009e-05, "loss": 0.0021, "step": 134880 }, { "epoch": 9.7, "learning_rate": 5.719919934175132e-05, "loss": 0.0021, "step": 136285 }, { "epoch": 9.8, "learning_rate": 5.664340643691256e-05, "loss": 0.0021, "step": 137690 }, { "epoch": 9.9, "learning_rate": 5.60876135320738e-05, "loss": 0.0023, "step": 139095 }, { "epoch": 10.0, "eval_loss": 0.03852245956659317, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.131, "eval_samples_per_second": 20.608, "eval_steps_per_second": 1.401, "step": 140440 }, { "epoch": 10.0, "learning_rate": 5.5531820627235044e-05, "loss": 0.0023, "step": 140500 }, { "epoch": 10.1, "learning_rate": 5.497602772239628e-05, "loss": 0.0018, "step": 141905 }, { "epoch": 10.2, "learning_rate": 5.4420234817557526e-05, "loss": 0.0021, "step": 143310 }, { "epoch": 10.3, "learning_rate": 5.386444191271876e-05, "loss": 0.0021, "step": 144715 }, { "epoch": 10.4, "learning_rate": 5.3308649007879995e-05, "loss": 0.0021, "step": 146120 }, { "epoch": 10.5, "learning_rate": 5.275285610304124e-05, "loss": 0.0021, "step": 147525 }, { "epoch": 10.6, "learning_rate": 5.219706319820248e-05, "loss": 0.002, "step": 148930 }, { "epoch": 10.7, "learning_rate": 5.1641270293363716e-05, "loss": 0.002, "step": 150335 }, { "epoch": 10.8, "learning_rate": 5.108547738852496e-05, "loss": 0.0019, "step": 151740 }, { "epoch": 10.9, "learning_rate": 5.052968448368619e-05, "loss": 0.002, "step": 153145 }, { "epoch": 11.0, "eval_loss": 0.040671207010746, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.2718, "eval_samples_per_second": 20.372, "eval_steps_per_second": 1.385, "step": 154484 }, { "epoch": 11.0, "learning_rate": 4.997389157884743e-05, "loss": 0.0018, "step": 154550 }, { "epoch": 11.1, "learning_rate": 4.9418098674008675e-05, "loss": 0.002, "step": 155955 }, { "epoch": 11.2, "learning_rate": 4.886230576916991e-05, "loss": 0.0019, "step": 157360 }, { "epoch": 11.3, "learning_rate": 4.830651286433115e-05, "loss": 0.0018, "step": 158765 }, { "epoch": 11.4, "learning_rate": 4.775071995949239e-05, "loss": 0.0018, "step": 160170 }, { "epoch": 11.5, "learning_rate": 4.7194927054653634e-05, "loss": 0.002, "step": 161575 }, { "epoch": 11.6, "learning_rate": 4.6639134149814865e-05, "loss": 0.0019, "step": 162980 }, { "epoch": 11.7, "learning_rate": 4.608334124497611e-05, "loss": 0.0017, "step": 164385 }, { "epoch": 11.81, "learning_rate": 4.552754834013735e-05, "loss": 0.002, "step": 165790 }, { "epoch": 11.91, "learning_rate": 4.4971755435298586e-05, "loss": 0.0018, "step": 167195 }, { "epoch": 12.0, "eval_loss": 0.04258317872881889, "eval_max_distance": 9, "eval_mean_distance": 0, "eval_runtime": 12.8224, "eval_samples_per_second": 19.497, "eval_steps_per_second": 1.326, "step": 168528 }, { "epoch": 12.01, "learning_rate": 4.4415962530459824e-05, "loss": 0.0017, "step": 168600 }, { "epoch": 12.11, "learning_rate": 4.386016962562107e-05, "loss": 0.0016, "step": 170005 }, { "epoch": 12.21, "learning_rate": 4.33043767207823e-05, "loss": 0.0018, "step": 171410 }, { "epoch": 12.31, "learning_rate": 4.2748583815943544e-05, "loss": 0.0016, "step": 172815 }, { "epoch": 12.41, "learning_rate": 4.219279091110478e-05, "loss": 0.0018, "step": 174220 }, { "epoch": 12.51, "learning_rate": 4.163699800626602e-05, "loss": 0.0018, "step": 175625 }, { "epoch": 12.61, "learning_rate": 4.108120510142726e-05, "loss": 0.0018, "step": 177030 }, { "epoch": 12.71, "learning_rate": 4.05254121965885e-05, "loss": 0.0016, "step": 178435 }, { "epoch": 12.81, "learning_rate": 3.996961929174974e-05, "loss": 0.002, "step": 179840 }, { "epoch": 12.91, "learning_rate": 3.941382638691098e-05, "loss": 0.0018, "step": 181245 }, { "epoch": 13.0, "eval_loss": 0.042234089225530624, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 13.9619, "eval_samples_per_second": 17.906, "eval_steps_per_second": 1.218, "step": 182572 }, { "epoch": 13.01, "learning_rate": 3.885803348207222e-05, "loss": 0.0018, "step": 182650 }, { "epoch": 13.11, "learning_rate": 3.8302240577233455e-05, "loss": 0.0017, "step": 184055 }, { "epoch": 13.21, "learning_rate": 3.77464476723947e-05, "loss": 0.0017, "step": 185460 }, { "epoch": 13.31, "learning_rate": 3.719065476755594e-05, "loss": 0.0016, "step": 186865 }, { "epoch": 13.41, "learning_rate": 3.6634861862717176e-05, "loss": 0.0016, "step": 188270 }, { "epoch": 13.51, "learning_rate": 3.6079068957878414e-05, "loss": 0.0016, "step": 189675 }, { "epoch": 13.61, "learning_rate": 3.552327605303966e-05, "loss": 0.0017, "step": 191080 }, { "epoch": 13.71, "learning_rate": 3.4967483148200896e-05, "loss": 0.0017, "step": 192485 }, { "epoch": 13.81, "learning_rate": 3.4411690243362134e-05, "loss": 0.0015, "step": 193890 }, { "epoch": 13.91, "learning_rate": 3.385589733852337e-05, "loss": 0.0016, "step": 195295 }, { "epoch": 14.0, "eval_loss": 0.04207869619131088, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.3358, "eval_samples_per_second": 20.266, "eval_steps_per_second": 1.378, "step": 196616 }, { "epoch": 14.01, "learning_rate": 3.330010443368462e-05, "loss": 0.0016, "step": 196700 }, { "epoch": 14.11, "learning_rate": 3.274431152884585e-05, "loss": 0.0016, "step": 198105 }, { "epoch": 14.21, "learning_rate": 3.218851862400709e-05, "loss": 0.0015, "step": 199510 }, { "epoch": 14.31, "learning_rate": 3.163272571916833e-05, "loss": 0.0016, "step": 200915 }, { "epoch": 14.41, "learning_rate": 3.107693281432957e-05, "loss": 0.0016, "step": 202320 }, { "epoch": 14.51, "learning_rate": 3.052113990949081e-05, "loss": 0.0016, "step": 203725 }, { "epoch": 14.61, "learning_rate": 2.9965347004652052e-05, "loss": 0.0016, "step": 205130 }, { "epoch": 14.71, "learning_rate": 2.9409554099813286e-05, "loss": 0.0015, "step": 206535 }, { "epoch": 14.81, "learning_rate": 2.8853761194974528e-05, "loss": 0.0016, "step": 207940 }, { "epoch": 14.91, "learning_rate": 2.829796829013577e-05, "loss": 0.0016, "step": 209345 }, { "epoch": 15.0, "eval_loss": 0.04015611857175827, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.1736, "eval_samples_per_second": 20.536, "eval_steps_per_second": 1.396, "step": 210660 }, { "epoch": 15.01, "learning_rate": 2.7742175385297004e-05, "loss": 0.0015, "step": 210750 }, { "epoch": 15.11, "learning_rate": 2.7186382480458245e-05, "loss": 0.0015, "step": 212155 }, { "epoch": 15.21, "learning_rate": 2.6630589575619486e-05, "loss": 0.0014, "step": 213560 }, { "epoch": 15.31, "learning_rate": 2.607479667078072e-05, "loss": 0.0016, "step": 214965 }, { "epoch": 15.41, "learning_rate": 2.5519003765941962e-05, "loss": 0.0014, "step": 216370 }, { "epoch": 15.51, "learning_rate": 2.49632108611032e-05, "loss": 0.0016, "step": 217775 }, { "epoch": 15.61, "learning_rate": 2.440741795626444e-05, "loss": 0.0014, "step": 219180 }, { "epoch": 15.71, "learning_rate": 2.385162505142568e-05, "loss": 0.0015, "step": 220585 }, { "epoch": 15.81, "learning_rate": 2.3295832146586918e-05, "loss": 0.0015, "step": 221990 }, { "epoch": 15.91, "learning_rate": 2.2740039241748156e-05, "loss": 0.0014, "step": 223395 }, { "epoch": 16.0, "eval_loss": 0.04069029539823532, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.2719, "eval_samples_per_second": 20.372, "eval_steps_per_second": 1.385, "step": 224704 }, { "epoch": 16.01, "learning_rate": 2.2184246336909397e-05, "loss": 0.0015, "step": 224800 }, { "epoch": 16.11, "learning_rate": 2.1628453432070635e-05, "loss": 0.0013, "step": 226205 }, { "epoch": 16.21, "learning_rate": 2.1072660527231873e-05, "loss": 0.0014, "step": 227610 }, { "epoch": 16.31, "learning_rate": 2.0516867622393114e-05, "loss": 0.0014, "step": 229015 }, { "epoch": 16.41, "learning_rate": 1.9961074717554352e-05, "loss": 0.0015, "step": 230420 }, { "epoch": 16.51, "learning_rate": 1.9405281812715594e-05, "loss": 0.0016, "step": 231825 }, { "epoch": 16.61, "learning_rate": 1.8849488907876832e-05, "loss": 0.0015, "step": 233230 }, { "epoch": 16.71, "learning_rate": 1.8293696003038073e-05, "loss": 0.0014, "step": 234635 }, { "epoch": 16.81, "learning_rate": 1.773790309819931e-05, "loss": 0.0013, "step": 236040 }, { "epoch": 16.91, "learning_rate": 1.7182110193360553e-05, "loss": 0.0014, "step": 237445 }, { "epoch": 17.0, "eval_loss": 0.042702946811914444, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.1336, "eval_samples_per_second": 20.604, "eval_steps_per_second": 1.401, "step": 238748 }, { "epoch": 17.01, "learning_rate": 1.662631728852179e-05, "loss": 0.0013, "step": 238850 }, { "epoch": 17.11, "learning_rate": 1.607052438368303e-05, "loss": 0.0014, "step": 240255 }, { "epoch": 17.21, "learning_rate": 1.551473147884427e-05, "loss": 0.0014, "step": 241660 }, { "epoch": 17.31, "learning_rate": 1.4958938574005508e-05, "loss": 0.0013, "step": 243065 }, { "epoch": 17.41, "learning_rate": 1.4403145669166746e-05, "loss": 0.0014, "step": 244470 }, { "epoch": 17.51, "learning_rate": 1.3847352764327987e-05, "loss": 0.0013, "step": 245875 }, { "epoch": 17.61, "learning_rate": 1.3291559859489225e-05, "loss": 0.0013, "step": 247280 }, { "epoch": 17.71, "learning_rate": 1.2735766954650463e-05, "loss": 0.0014, "step": 248685 }, { "epoch": 17.81, "learning_rate": 1.2179974049811703e-05, "loss": 0.0014, "step": 250090 }, { "epoch": 17.91, "learning_rate": 1.1624181144972943e-05, "loss": 0.0014, "step": 251495 }, { "epoch": 18.0, "eval_loss": 0.04110053926706314, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.0446, "eval_samples_per_second": 20.756, "eval_steps_per_second": 1.411, "step": 252792 }, { "epoch": 18.01, "learning_rate": 1.1068388240134182e-05, "loss": 0.0014, "step": 252900 }, { "epoch": 18.11, "learning_rate": 1.051259533529542e-05, "loss": 0.0014, "step": 254305 }, { "epoch": 18.21, "learning_rate": 9.95680243045666e-06, "loss": 0.0012, "step": 255710 }, { "epoch": 18.31, "learning_rate": 9.4010095256179e-06, "loss": 0.0012, "step": 257115 }, { "epoch": 18.41, "learning_rate": 8.84521662077914e-06, "loss": 0.0014, "step": 258520 }, { "epoch": 18.51, "learning_rate": 8.289423715940379e-06, "loss": 0.0012, "step": 259925 }, { "epoch": 18.61, "learning_rate": 7.733630811101619e-06, "loss": 0.0013, "step": 261330 }, { "epoch": 18.71, "learning_rate": 7.1778379062628565e-06, "loss": 0.0014, "step": 262735 }, { "epoch": 18.81, "learning_rate": 6.622045001424096e-06, "loss": 0.0012, "step": 264140 }, { "epoch": 18.91, "learning_rate": 6.066252096585336e-06, "loss": 0.0013, "step": 265545 }, { "epoch": 19.0, "eval_loss": 0.040625352412462234, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.0757, "eval_samples_per_second": 20.703, "eval_steps_per_second": 1.408, "step": 266836 }, { "epoch": 19.01, "learning_rate": 5.510459191746575e-06, "loss": 0.0013, "step": 266950 }, { "epoch": 19.11, "learning_rate": 4.9546662869078136e-06, "loss": 0.0014, "step": 268355 }, { "epoch": 19.21, "learning_rate": 4.398873382069053e-06, "loss": 0.0013, "step": 269760 }, { "epoch": 19.31, "learning_rate": 3.843080477230292e-06, "loss": 0.0013, "step": 271165 }, { "epoch": 19.41, "learning_rate": 3.2872875723915313e-06, "loss": 0.0013, "step": 272570 }, { "epoch": 19.51, "learning_rate": 2.7314946675527706e-06, "loss": 0.0012, "step": 273975 }, { "epoch": 19.61, "learning_rate": 2.1757017627140103e-06, "loss": 0.0012, "step": 275380 }, { "epoch": 19.71, "learning_rate": 1.619908857875249e-06, "loss": 0.0013, "step": 276785 }, { "epoch": 19.81, "learning_rate": 1.0641159530364886e-06, "loss": 0.0014, "step": 278190 }, { "epoch": 19.91, "learning_rate": 5.083230481977278e-07, "loss": 0.0013, "step": 279595 }, { "epoch": 20.0, "eval_loss": 0.040508754551410675, "eval_max_distance": 6, "eval_mean_distance": 0, "eval_runtime": 12.1336, "eval_samples_per_second": 20.604, "eval_steps_per_second": 1.401, "step": 280880 }, { "epoch": 20.0, "step": 280880, "total_flos": 7.269950341627085e+16, "train_loss": 0.0021448437322240947, "train_runtime": 20054.4611, "train_samples_per_second": 210.084, "train_steps_per_second": 14.006 } ], "logging_steps": 1405, "max_steps": 280880, "num_train_epochs": 20, "save_steps": 2809, "total_flos": 7.269950341627085e+16, "trial_name": null, "trial_params": null }