|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 280880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.5609999287800016e-09, |
|
"loss": 0.0011, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.003204899935903e-06, |
|
"loss": 0.0046, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.0006409799871805e-05, |
|
"loss": 0.005, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5009614699807706e-05, |
|
"loss": 0.0052, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.001281959974361e-05, |
|
"loss": 0.005, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5016024499679513e-05, |
|
"loss": 0.0048, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.001922939961541e-05, |
|
"loss": 0.0045, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.5022434299551316e-05, |
|
"loss": 0.0049, |
|
"step": 9835 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.002563919948722e-05, |
|
"loss": 0.0051, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.502884409942312e-05, |
|
"loss": 0.0052, |
|
"step": 12645 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.027173755690455437, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.945, |
|
"eval_samples_per_second": 17.928, |
|
"eval_steps_per_second": 1.219, |
|
"step": 14041 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.0032048999359025e-05, |
|
"loss": 0.0045, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.5035253899294924e-05, |
|
"loss": 0.0047, |
|
"step": 15455 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.003845879923082e-05, |
|
"loss": 0.0044, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.504166369916673e-05, |
|
"loss": 0.0045, |
|
"step": 18265 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.002990600968385e-05, |
|
"loss": 0.0046, |
|
"step": 19670 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.50320421532327e-05, |
|
"loss": 0.0043, |
|
"step": 21075 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.003417829678155e-05, |
|
"loss": 0.0042, |
|
"step": 22480 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.503631444033039e-05, |
|
"loss": 0.0046, |
|
"step": 23885 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.003845058387924e-05, |
|
"loss": 0.0043, |
|
"step": 25290 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.504058672742809e-05, |
|
"loss": 0.0045, |
|
"step": 26695 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.03272660821676254, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.7866, |
|
"eval_samples_per_second": 19.552, |
|
"eval_steps_per_second": 1.33, |
|
"step": 28088 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.99952530143359e-05, |
|
"loss": 0.0046, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.943946010949714e-05, |
|
"loss": 0.0042, |
|
"step": 29505 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.888366720465838e-05, |
|
"loss": 0.0043, |
|
"step": 30910 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 9.832787429981962e-05, |
|
"loss": 0.0044, |
|
"step": 32315 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.777208139498087e-05, |
|
"loss": 0.0045, |
|
"step": 33720 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.721628849014209e-05, |
|
"loss": 0.0041, |
|
"step": 35125 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.666049558530333e-05, |
|
"loss": 0.0043, |
|
"step": 36530 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.610470268046457e-05, |
|
"loss": 0.0046, |
|
"step": 37935 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 9.554890977562582e-05, |
|
"loss": 0.0043, |
|
"step": 39340 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.499311687078706e-05, |
|
"loss": 0.0043, |
|
"step": 40745 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.03169206902384758, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.3553, |
|
"eval_samples_per_second": 20.234, |
|
"eval_steps_per_second": 1.376, |
|
"step": 42132 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.44373239659483e-05, |
|
"loss": 0.0048, |
|
"step": 42150 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.388153106110953e-05, |
|
"loss": 0.0034, |
|
"step": 43555 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.332573815627077e-05, |
|
"loss": 0.0036, |
|
"step": 44960 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 9.276994525143201e-05, |
|
"loss": 0.0037, |
|
"step": 46365 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.221415234659325e-05, |
|
"loss": 0.004, |
|
"step": 47770 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.16583594417545e-05, |
|
"loss": 0.0041, |
|
"step": 49175 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.110256653691574e-05, |
|
"loss": 0.0039, |
|
"step": 50580 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.054677363207696e-05, |
|
"loss": 0.0039, |
|
"step": 51985 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.99909807272382e-05, |
|
"loss": 0.0039, |
|
"step": 53390 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.943518782239945e-05, |
|
"loss": 0.0042, |
|
"step": 54795 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.031554438173770905, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.1943, |
|
"eval_samples_per_second": 20.501, |
|
"eval_steps_per_second": 1.394, |
|
"step": 56176 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.887939491756069e-05, |
|
"loss": 0.004, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.832360201272193e-05, |
|
"loss": 0.0034, |
|
"step": 57605 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.776780910788317e-05, |
|
"loss": 0.0033, |
|
"step": 59010 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 8.72120162030444e-05, |
|
"loss": 0.0034, |
|
"step": 60415 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.665622329820564e-05, |
|
"loss": 0.0034, |
|
"step": 61820 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.610043039336688e-05, |
|
"loss": 0.0032, |
|
"step": 63225 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.554463748852812e-05, |
|
"loss": 0.0035, |
|
"step": 64630 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.498884458368937e-05, |
|
"loss": 0.0035, |
|
"step": 66035 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.443305167885061e-05, |
|
"loss": 0.0035, |
|
"step": 67440 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 8.387725877401183e-05, |
|
"loss": 0.0035, |
|
"step": 68845 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.03568544238805771, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.3273, |
|
"eval_samples_per_second": 20.28, |
|
"eval_steps_per_second": 1.379, |
|
"step": 70220 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 8.332146586917307e-05, |
|
"loss": 0.0037, |
|
"step": 70250 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.276567296433432e-05, |
|
"loss": 0.0028, |
|
"step": 71655 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.220988005949556e-05, |
|
"loss": 0.0029, |
|
"step": 73060 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.16540871546568e-05, |
|
"loss": 0.003, |
|
"step": 74465 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 8.109829424981804e-05, |
|
"loss": 0.0031, |
|
"step": 75870 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 8.054250134497927e-05, |
|
"loss": 0.0032, |
|
"step": 77275 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.998670844014051e-05, |
|
"loss": 0.003, |
|
"step": 78680 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 7.943091553530175e-05, |
|
"loss": 0.0032, |
|
"step": 80085 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.8875122630463e-05, |
|
"loss": 0.0031, |
|
"step": 81490 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.831932972562424e-05, |
|
"loss": 0.0032, |
|
"step": 82895 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.03648597374558449, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.1121, |
|
"eval_samples_per_second": 20.641, |
|
"eval_steps_per_second": 1.404, |
|
"step": 84264 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.776353682078548e-05, |
|
"loss": 0.0032, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.72077439159467e-05, |
|
"loss": 0.0027, |
|
"step": 85705 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.665195101110795e-05, |
|
"loss": 0.0026, |
|
"step": 87110 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.609615810626919e-05, |
|
"loss": 0.0028, |
|
"step": 88515 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.554036520143043e-05, |
|
"loss": 0.0029, |
|
"step": 89920 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7.498457229659167e-05, |
|
"loss": 0.0026, |
|
"step": 91325 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 7.442877939175292e-05, |
|
"loss": 0.0028, |
|
"step": 92730 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 7.387298648691414e-05, |
|
"loss": 0.0031, |
|
"step": 94135 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 7.331719358207538e-05, |
|
"loss": 0.0027, |
|
"step": 95540 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 7.276140067723662e-05, |
|
"loss": 0.0027, |
|
"step": 96945 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.04028007388114929, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.1827, |
|
"eval_samples_per_second": 20.521, |
|
"eval_steps_per_second": 1.395, |
|
"step": 98308 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 7.220560777239787e-05, |
|
"loss": 0.0028, |
|
"step": 98350 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 7.164981486755911e-05, |
|
"loss": 0.0027, |
|
"step": 99755 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 7.109402196272035e-05, |
|
"loss": 0.0025, |
|
"step": 101160 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 7.053822905788158e-05, |
|
"loss": 0.0025, |
|
"step": 102565 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 6.998243615304282e-05, |
|
"loss": 0.0024, |
|
"step": 103970 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.942664324820406e-05, |
|
"loss": 0.0026, |
|
"step": 105375 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 6.88708503433653e-05, |
|
"loss": 0.0026, |
|
"step": 106780 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.831505743852654e-05, |
|
"loss": 0.0026, |
|
"step": 108185 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 6.775926453368779e-05, |
|
"loss": 0.0025, |
|
"step": 109590 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.720347162884901e-05, |
|
"loss": 0.0027, |
|
"step": 110995 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.03977961093187332, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.1864, |
|
"eval_samples_per_second": 20.515, |
|
"eval_steps_per_second": 1.395, |
|
"step": 112352 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.664767872401025e-05, |
|
"loss": 0.0028, |
|
"step": 112400 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 6.60918858191715e-05, |
|
"loss": 0.0023, |
|
"step": 113805 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 6.553609291433274e-05, |
|
"loss": 0.0023, |
|
"step": 115210 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 6.498030000949398e-05, |
|
"loss": 0.0022, |
|
"step": 116615 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 6.442450710465522e-05, |
|
"loss": 0.0023, |
|
"step": 118020 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.386871419981645e-05, |
|
"loss": 0.0024, |
|
"step": 119425 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 6.331292129497769e-05, |
|
"loss": 0.0025, |
|
"step": 120830 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.275712839013893e-05, |
|
"loss": 0.0024, |
|
"step": 122235 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.220133548530017e-05, |
|
"loss": 0.0022, |
|
"step": 123640 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 6.164554258046142e-05, |
|
"loss": 0.0023, |
|
"step": 125045 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.040445487946271896, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.3201, |
|
"eval_samples_per_second": 20.292, |
|
"eval_steps_per_second": 1.38, |
|
"step": 126396 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 6.108974967562266e-05, |
|
"loss": 0.0024, |
|
"step": 126450 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 6.053395677078388e-05, |
|
"loss": 0.002, |
|
"step": 127855 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5.9978163865945126e-05, |
|
"loss": 0.0021, |
|
"step": 129260 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.9422370961106364e-05, |
|
"loss": 0.0022, |
|
"step": 130665 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 5.886657805626761e-05, |
|
"loss": 0.0023, |
|
"step": 132070 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.831078515142885e-05, |
|
"loss": 0.0022, |
|
"step": 133475 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.775499224659009e-05, |
|
"loss": 0.0021, |
|
"step": 134880 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.719919934175132e-05, |
|
"loss": 0.0021, |
|
"step": 136285 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.664340643691256e-05, |
|
"loss": 0.0021, |
|
"step": 137690 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.60876135320738e-05, |
|
"loss": 0.0023, |
|
"step": 139095 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.03852245956659317, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.131, |
|
"eval_samples_per_second": 20.608, |
|
"eval_steps_per_second": 1.401, |
|
"step": 140440 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.5531820627235044e-05, |
|
"loss": 0.0023, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 5.497602772239628e-05, |
|
"loss": 0.0018, |
|
"step": 141905 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 5.4420234817557526e-05, |
|
"loss": 0.0021, |
|
"step": 143310 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 5.386444191271876e-05, |
|
"loss": 0.0021, |
|
"step": 144715 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5.3308649007879995e-05, |
|
"loss": 0.0021, |
|
"step": 146120 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 5.275285610304124e-05, |
|
"loss": 0.0021, |
|
"step": 147525 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 5.219706319820248e-05, |
|
"loss": 0.002, |
|
"step": 148930 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 5.1641270293363716e-05, |
|
"loss": 0.002, |
|
"step": 150335 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5.108547738852496e-05, |
|
"loss": 0.0019, |
|
"step": 151740 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5.052968448368619e-05, |
|
"loss": 0.002, |
|
"step": 153145 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.040671207010746, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.2718, |
|
"eval_samples_per_second": 20.372, |
|
"eval_steps_per_second": 1.385, |
|
"step": 154484 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.997389157884743e-05, |
|
"loss": 0.0018, |
|
"step": 154550 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 4.9418098674008675e-05, |
|
"loss": 0.002, |
|
"step": 155955 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.886230576916991e-05, |
|
"loss": 0.0019, |
|
"step": 157360 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.830651286433115e-05, |
|
"loss": 0.0018, |
|
"step": 158765 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.775071995949239e-05, |
|
"loss": 0.0018, |
|
"step": 160170 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.7194927054653634e-05, |
|
"loss": 0.002, |
|
"step": 161575 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.6639134149814865e-05, |
|
"loss": 0.0019, |
|
"step": 162980 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 4.608334124497611e-05, |
|
"loss": 0.0017, |
|
"step": 164385 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 4.552754834013735e-05, |
|
"loss": 0.002, |
|
"step": 165790 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 4.4971755435298586e-05, |
|
"loss": 0.0018, |
|
"step": 167195 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.04258317872881889, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.8224, |
|
"eval_samples_per_second": 19.497, |
|
"eval_steps_per_second": 1.326, |
|
"step": 168528 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 4.4415962530459824e-05, |
|
"loss": 0.0017, |
|
"step": 168600 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 4.386016962562107e-05, |
|
"loss": 0.0016, |
|
"step": 170005 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 4.33043767207823e-05, |
|
"loss": 0.0018, |
|
"step": 171410 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 4.2748583815943544e-05, |
|
"loss": 0.0016, |
|
"step": 172815 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 4.219279091110478e-05, |
|
"loss": 0.0018, |
|
"step": 174220 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 4.163699800626602e-05, |
|
"loss": 0.0018, |
|
"step": 175625 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 4.108120510142726e-05, |
|
"loss": 0.0018, |
|
"step": 177030 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 4.05254121965885e-05, |
|
"loss": 0.0016, |
|
"step": 178435 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 3.996961929174974e-05, |
|
"loss": 0.002, |
|
"step": 179840 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 3.941382638691098e-05, |
|
"loss": 0.0018, |
|
"step": 181245 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.042234089225530624, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.9619, |
|
"eval_samples_per_second": 17.906, |
|
"eval_steps_per_second": 1.218, |
|
"step": 182572 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 3.885803348207222e-05, |
|
"loss": 0.0018, |
|
"step": 182650 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 3.8302240577233455e-05, |
|
"loss": 0.0017, |
|
"step": 184055 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 3.77464476723947e-05, |
|
"loss": 0.0017, |
|
"step": 185460 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 3.719065476755594e-05, |
|
"loss": 0.0016, |
|
"step": 186865 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 3.6634861862717176e-05, |
|
"loss": 0.0016, |
|
"step": 188270 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.6079068957878414e-05, |
|
"loss": 0.0016, |
|
"step": 189675 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 3.552327605303966e-05, |
|
"loss": 0.0017, |
|
"step": 191080 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 3.4967483148200896e-05, |
|
"loss": 0.0017, |
|
"step": 192485 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 3.4411690243362134e-05, |
|
"loss": 0.0015, |
|
"step": 193890 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 3.385589733852337e-05, |
|
"loss": 0.0016, |
|
"step": 195295 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.04207869619131088, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.3358, |
|
"eval_samples_per_second": 20.266, |
|
"eval_steps_per_second": 1.378, |
|
"step": 196616 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 3.330010443368462e-05, |
|
"loss": 0.0016, |
|
"step": 196700 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 3.274431152884585e-05, |
|
"loss": 0.0016, |
|
"step": 198105 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 3.218851862400709e-05, |
|
"loss": 0.0015, |
|
"step": 199510 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 3.163272571916833e-05, |
|
"loss": 0.0016, |
|
"step": 200915 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 3.107693281432957e-05, |
|
"loss": 0.0016, |
|
"step": 202320 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 3.052113990949081e-05, |
|
"loss": 0.0016, |
|
"step": 203725 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 2.9965347004652052e-05, |
|
"loss": 0.0016, |
|
"step": 205130 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 2.9409554099813286e-05, |
|
"loss": 0.0015, |
|
"step": 206535 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 2.8853761194974528e-05, |
|
"loss": 0.0016, |
|
"step": 207940 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 2.829796829013577e-05, |
|
"loss": 0.0016, |
|
"step": 209345 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.04015611857175827, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.1736, |
|
"eval_samples_per_second": 20.536, |
|
"eval_steps_per_second": 1.396, |
|
"step": 210660 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 2.7742175385297004e-05, |
|
"loss": 0.0015, |
|
"step": 210750 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 2.7186382480458245e-05, |
|
"loss": 0.0015, |
|
"step": 212155 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 2.6630589575619486e-05, |
|
"loss": 0.0014, |
|
"step": 213560 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 2.607479667078072e-05, |
|
"loss": 0.0016, |
|
"step": 214965 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 2.5519003765941962e-05, |
|
"loss": 0.0014, |
|
"step": 216370 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 2.49632108611032e-05, |
|
"loss": 0.0016, |
|
"step": 217775 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 2.440741795626444e-05, |
|
"loss": 0.0014, |
|
"step": 219180 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 2.385162505142568e-05, |
|
"loss": 0.0015, |
|
"step": 220585 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 2.3295832146586918e-05, |
|
"loss": 0.0015, |
|
"step": 221990 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 2.2740039241748156e-05, |
|
"loss": 0.0014, |
|
"step": 223395 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.04069029539823532, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.2719, |
|
"eval_samples_per_second": 20.372, |
|
"eval_steps_per_second": 1.385, |
|
"step": 224704 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 2.2184246336909397e-05, |
|
"loss": 0.0015, |
|
"step": 224800 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 2.1628453432070635e-05, |
|
"loss": 0.0013, |
|
"step": 226205 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 2.1072660527231873e-05, |
|
"loss": 0.0014, |
|
"step": 227610 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 2.0516867622393114e-05, |
|
"loss": 0.0014, |
|
"step": 229015 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 1.9961074717554352e-05, |
|
"loss": 0.0015, |
|
"step": 230420 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 1.9405281812715594e-05, |
|
"loss": 0.0016, |
|
"step": 231825 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 1.8849488907876832e-05, |
|
"loss": 0.0015, |
|
"step": 233230 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 1.8293696003038073e-05, |
|
"loss": 0.0014, |
|
"step": 234635 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 1.773790309819931e-05, |
|
"loss": 0.0013, |
|
"step": 236040 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 1.7182110193360553e-05, |
|
"loss": 0.0014, |
|
"step": 237445 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.042702946811914444, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.1336, |
|
"eval_samples_per_second": 20.604, |
|
"eval_steps_per_second": 1.401, |
|
"step": 238748 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 1.662631728852179e-05, |
|
"loss": 0.0013, |
|
"step": 238850 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 1.607052438368303e-05, |
|
"loss": 0.0014, |
|
"step": 240255 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 1.551473147884427e-05, |
|
"loss": 0.0014, |
|
"step": 241660 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 1.4958938574005508e-05, |
|
"loss": 0.0013, |
|
"step": 243065 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.4403145669166746e-05, |
|
"loss": 0.0014, |
|
"step": 244470 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 1.3847352764327987e-05, |
|
"loss": 0.0013, |
|
"step": 245875 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 1.3291559859489225e-05, |
|
"loss": 0.0013, |
|
"step": 247280 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 1.2735766954650463e-05, |
|
"loss": 0.0014, |
|
"step": 248685 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 1.2179974049811703e-05, |
|
"loss": 0.0014, |
|
"step": 250090 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 1.1624181144972943e-05, |
|
"loss": 0.0014, |
|
"step": 251495 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.04110053926706314, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.0446, |
|
"eval_samples_per_second": 20.756, |
|
"eval_steps_per_second": 1.411, |
|
"step": 252792 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 1.1068388240134182e-05, |
|
"loss": 0.0014, |
|
"step": 252900 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 1.051259533529542e-05, |
|
"loss": 0.0014, |
|
"step": 254305 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 9.95680243045666e-06, |
|
"loss": 0.0012, |
|
"step": 255710 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 9.4010095256179e-06, |
|
"loss": 0.0012, |
|
"step": 257115 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 8.84521662077914e-06, |
|
"loss": 0.0014, |
|
"step": 258520 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 8.289423715940379e-06, |
|
"loss": 0.0012, |
|
"step": 259925 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 7.733630811101619e-06, |
|
"loss": 0.0013, |
|
"step": 261330 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 7.1778379062628565e-06, |
|
"loss": 0.0014, |
|
"step": 262735 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 6.622045001424096e-06, |
|
"loss": 0.0012, |
|
"step": 264140 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 6.066252096585336e-06, |
|
"loss": 0.0013, |
|
"step": 265545 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.040625352412462234, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.0757, |
|
"eval_samples_per_second": 20.703, |
|
"eval_steps_per_second": 1.408, |
|
"step": 266836 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 5.510459191746575e-06, |
|
"loss": 0.0013, |
|
"step": 266950 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 4.9546662869078136e-06, |
|
"loss": 0.0014, |
|
"step": 268355 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 4.398873382069053e-06, |
|
"loss": 0.0013, |
|
"step": 269760 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 3.843080477230292e-06, |
|
"loss": 0.0013, |
|
"step": 271165 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 3.2872875723915313e-06, |
|
"loss": 0.0013, |
|
"step": 272570 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 2.7314946675527706e-06, |
|
"loss": 0.0012, |
|
"step": 273975 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 2.1757017627140103e-06, |
|
"loss": 0.0012, |
|
"step": 275380 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 1.619908857875249e-06, |
|
"loss": 0.0013, |
|
"step": 276785 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.0641159530364886e-06, |
|
"loss": 0.0014, |
|
"step": 278190 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 5.083230481977278e-07, |
|
"loss": 0.0013, |
|
"step": 279595 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.040508754551410675, |
|
"eval_max_distance": 6, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.1336, |
|
"eval_samples_per_second": 20.604, |
|
"eval_steps_per_second": 1.401, |
|
"step": 280880 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 280880, |
|
"total_flos": 7.269950341627085e+16, |
|
"train_loss": 0.0021448437322240947, |
|
"train_runtime": 20054.4611, |
|
"train_samples_per_second": 210.084, |
|
"train_steps_per_second": 14.006 |
|
} |
|
], |
|
"logging_steps": 1405, |
|
"max_steps": 280880, |
|
"num_train_epochs": 20, |
|
"save_steps": 2809, |
|
"total_flos": 7.269950341627085e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|