|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 106560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.384384384384385e-09, |
|
"loss": 30.0191, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.001876876876877e-06, |
|
"loss": 28.4983, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.0003753753753754e-05, |
|
"loss": 21.5164, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.5005630630630632e-05, |
|
"loss": 7.132, |
|
"step": 1599 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.000750750750751e-05, |
|
"loss": 0.8541, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.3403850197792053, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.1154, |
|
"eval_samples_per_second": 320.968, |
|
"eval_steps_per_second": 21.517, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.500938438438439e-05, |
|
"loss": 0.3348, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.0011261261261263e-05, |
|
"loss": 0.5279, |
|
"step": 3198 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.501313813813814e-05, |
|
"loss": 0.2294, |
|
"step": 3731 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.001501501501502e-05, |
|
"loss": 0.0605, |
|
"step": 4264 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.5016891891891895e-05, |
|
"loss": 0.0451, |
|
"step": 4797 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.060470160096883774, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9182, |
|
"eval_samples_per_second": 389.888, |
|
"eval_steps_per_second": 26.138, |
|
"step": 5328 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.001876876876878e-05, |
|
"loss": 0.0605, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.502064564564565e-05, |
|
"loss": 0.025, |
|
"step": 5863 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.0022522522522526e-05, |
|
"loss": 0.0193, |
|
"step": 6396 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.502439939939941e-05, |
|
"loss": 0.0153, |
|
"step": 6929 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 7.002627627627628e-05, |
|
"loss": 0.0112, |
|
"step": 7462 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.04112406447529793, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9146, |
|
"eval_samples_per_second": 391.418, |
|
"eval_steps_per_second": 26.24, |
|
"step": 7992 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.502815315315315e-05, |
|
"loss": 0.0109, |
|
"step": 7995 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 8.003003003003004e-05, |
|
"loss": 0.0095, |
|
"step": 8528 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 8.50319069069069e-05, |
|
"loss": 0.012, |
|
"step": 9061 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.003378378378379e-05, |
|
"loss": 0.0223, |
|
"step": 9594 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 9.503566066066066e-05, |
|
"loss": 0.0068, |
|
"step": 10127 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.020507752895355225, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9253, |
|
"eval_samples_per_second": 386.912, |
|
"eval_steps_per_second": 25.938, |
|
"step": 10656 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.999582916249583e-05, |
|
"loss": 0.0077, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 9.944006506506507e-05, |
|
"loss": 0.0058, |
|
"step": 11193 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 9.88843009676343e-05, |
|
"loss": 0.0072, |
|
"step": 11726 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 9.832853687020355e-05, |
|
"loss": 0.0097, |
|
"step": 12259 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 9.777277277277279e-05, |
|
"loss": 0.007, |
|
"step": 12792 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.02420434169471264, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9141, |
|
"eval_samples_per_second": 391.658, |
|
"eval_steps_per_second": 26.256, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.721700867534201e-05, |
|
"loss": 0.0049, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 9.666124457791124e-05, |
|
"loss": 0.005, |
|
"step": 13858 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 9.610548048048048e-05, |
|
"loss": 0.0035, |
|
"step": 14391 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 9.554971638304973e-05, |
|
"loss": 0.0061, |
|
"step": 14924 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 9.499395228561896e-05, |
|
"loss": 0.0022, |
|
"step": 15457 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.027173461392521858, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9101, |
|
"eval_samples_per_second": 393.378, |
|
"eval_steps_per_second": 26.372, |
|
"step": 15984 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.44381881881882e-05, |
|
"loss": 0.0048, |
|
"step": 15990 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 9.388242409075743e-05, |
|
"loss": 0.0053, |
|
"step": 16523 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 9.332665999332665e-05, |
|
"loss": 0.005, |
|
"step": 17056 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 9.27708958958959e-05, |
|
"loss": 0.0034, |
|
"step": 17589 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 9.221513179846514e-05, |
|
"loss": 0.0054, |
|
"step": 18122 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.008011276833713055, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9187, |
|
"eval_samples_per_second": 389.697, |
|
"eval_steps_per_second": 26.125, |
|
"step": 18648 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.165936770103437e-05, |
|
"loss": 0.0036, |
|
"step": 18655 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 9.110360360360361e-05, |
|
"loss": 0.002, |
|
"step": 19188 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 9.054783950617284e-05, |
|
"loss": 0.0029, |
|
"step": 19721 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 8.999207540874208e-05, |
|
"loss": 0.0031, |
|
"step": 20254 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 8.943631131131131e-05, |
|
"loss": 0.0036, |
|
"step": 20787 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.025212394073605537, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9136, |
|
"eval_samples_per_second": 391.872, |
|
"eval_steps_per_second": 26.271, |
|
"step": 21312 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 8.888054721388055e-05, |
|
"loss": 0.0022, |
|
"step": 21320 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 8.832478311644978e-05, |
|
"loss": 0.0021, |
|
"step": 21853 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.776901901901903e-05, |
|
"loss": 0.0016, |
|
"step": 22386 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 8.721325492158827e-05, |
|
"loss": 0.0021, |
|
"step": 22919 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 8.665749082415749e-05, |
|
"loss": 0.0039, |
|
"step": 23452 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.020978303626179695, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9126, |
|
"eval_samples_per_second": 392.285, |
|
"eval_steps_per_second": 26.298, |
|
"step": 23976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 8.610172672672672e-05, |
|
"loss": 0.0032, |
|
"step": 23985 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 8.554596262929596e-05, |
|
"loss": 0.0027, |
|
"step": 24518 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 8.499019853186521e-05, |
|
"loss": 0.0022, |
|
"step": 25051 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 8.443443443443444e-05, |
|
"loss": 0.002, |
|
"step": 25584 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 8.387867033700368e-05, |
|
"loss": 0.0026, |
|
"step": 26117 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.017031751573085785, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9145, |
|
"eval_samples_per_second": 391.481, |
|
"eval_steps_per_second": 26.245, |
|
"step": 26640 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 8.332290623957291e-05, |
|
"loss": 0.0014, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 8.276714214214215e-05, |
|
"loss": 0.0016, |
|
"step": 27183 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 8.221137804471138e-05, |
|
"loss": 0.0045, |
|
"step": 27716 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 8.165561394728062e-05, |
|
"loss": 0.0017, |
|
"step": 28249 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 8.109984984984985e-05, |
|
"loss": 0.0026, |
|
"step": 28782 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.004335461650043726, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9104, |
|
"eval_samples_per_second": 393.245, |
|
"eval_steps_per_second": 26.363, |
|
"step": 29304 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 8.054408575241909e-05, |
|
"loss": 0.0016, |
|
"step": 29315 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 7.998832165498832e-05, |
|
"loss": 0.0006, |
|
"step": 29848 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 7.943255755755756e-05, |
|
"loss": 0.0011, |
|
"step": 30381 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 7.88767934601268e-05, |
|
"loss": 0.0009, |
|
"step": 30914 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 7.832102936269603e-05, |
|
"loss": 0.0029, |
|
"step": 31447 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.013542454689741135, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9125, |
|
"eval_samples_per_second": 392.323, |
|
"eval_steps_per_second": 26.301, |
|
"step": 31968 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 7.776526526526526e-05, |
|
"loss": 0.0029, |
|
"step": 31980 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 7.720950116783451e-05, |
|
"loss": 0.0008, |
|
"step": 32513 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 7.665373707040375e-05, |
|
"loss": 0.0027, |
|
"step": 33046 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 7.609797297297297e-05, |
|
"loss": 0.0016, |
|
"step": 33579 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 7.55422088755422e-05, |
|
"loss": 0.0011, |
|
"step": 34112 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.03128792718052864, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9117, |
|
"eval_samples_per_second": 392.662, |
|
"eval_steps_per_second": 26.324, |
|
"step": 34632 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 7.498644477811145e-05, |
|
"loss": 0.0007, |
|
"step": 34645 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 7.443068068068069e-05, |
|
"loss": 0.0014, |
|
"step": 35178 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 7.387491658324992e-05, |
|
"loss": 0.0067, |
|
"step": 35711 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 7.331915248581916e-05, |
|
"loss": 0.0008, |
|
"step": 36244 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 7.27633883883884e-05, |
|
"loss": 0.0017, |
|
"step": 36777 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.03530227765440941, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9178, |
|
"eval_samples_per_second": 390.083, |
|
"eval_steps_per_second": 26.151, |
|
"step": 37296 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 7.220762429095763e-05, |
|
"loss": 0.001, |
|
"step": 37310 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 7.165186019352686e-05, |
|
"loss": 0.001, |
|
"step": 37843 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 7.10960960960961e-05, |
|
"loss": 0.0012, |
|
"step": 38376 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 7.054033199866533e-05, |
|
"loss": 0.0011, |
|
"step": 38909 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 6.998456790123458e-05, |
|
"loss": 0.0014, |
|
"step": 39442 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.011675473302602768, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.916, |
|
"eval_samples_per_second": 390.823, |
|
"eval_steps_per_second": 26.2, |
|
"step": 39960 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 6.94288038038038e-05, |
|
"loss": 0.001, |
|
"step": 39975 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 6.887303970637304e-05, |
|
"loss": 0.0003, |
|
"step": 40508 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 6.831727560894227e-05, |
|
"loss": 0.002, |
|
"step": 41041 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 6.776151151151151e-05, |
|
"loss": 0.001, |
|
"step": 41574 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 6.720574741408076e-05, |
|
"loss": 0.0014, |
|
"step": 42107 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.0139808664098382, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9131, |
|
"eval_samples_per_second": 392.074, |
|
"eval_steps_per_second": 26.284, |
|
"step": 42624 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 6.664998331665e-05, |
|
"loss": 0.0007, |
|
"step": 42640 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 6.609421921921923e-05, |
|
"loss": 0.0004, |
|
"step": 43173 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 6.553845512178845e-05, |
|
"loss": 0.0006, |
|
"step": 43706 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 6.498269102435769e-05, |
|
"loss": 0.0017, |
|
"step": 44239 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 6.442692692692693e-05, |
|
"loss": 0.0013, |
|
"step": 44772 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.022025227546691895, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9176, |
|
"eval_samples_per_second": 390.127, |
|
"eval_steps_per_second": 26.154, |
|
"step": 45288 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 6.387116282949617e-05, |
|
"loss": 0.0013, |
|
"step": 45305 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 6.33153987320654e-05, |
|
"loss": 0.0003, |
|
"step": 45838 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 6.275963463463464e-05, |
|
"loss": 0.0011, |
|
"step": 46371 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 6.220387053720387e-05, |
|
"loss": 0.0004, |
|
"step": 46904 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 6.164810643977311e-05, |
|
"loss": 0.0009, |
|
"step": 47437 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.024678541347384453, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.908, |
|
"eval_samples_per_second": 394.28, |
|
"eval_steps_per_second": 26.432, |
|
"step": 47952 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 6.109234234234234e-05, |
|
"loss": 0.0023, |
|
"step": 47970 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 6.053657824491158e-05, |
|
"loss": 0.0007, |
|
"step": 48503 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 5.9980814147480815e-05, |
|
"loss": 0.0015, |
|
"step": 49036 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 5.9425050050050057e-05, |
|
"loss": 0.0004, |
|
"step": 49569 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 5.886928595261929e-05, |
|
"loss": 0.0017, |
|
"step": 50102 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.03220739960670471, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9151, |
|
"eval_samples_per_second": 391.2, |
|
"eval_steps_per_second": 26.226, |
|
"step": 50616 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 5.831352185518853e-05, |
|
"loss": 0.0011, |
|
"step": 50635 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 5.7757757757757755e-05, |
|
"loss": 0.0009, |
|
"step": 51168 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 5.720199366032699e-05, |
|
"loss": 0.0005, |
|
"step": 51701 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 5.664622956289624e-05, |
|
"loss": 0.0011, |
|
"step": 52234 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 5.609046546546547e-05, |
|
"loss": 0.0022, |
|
"step": 52767 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.0314439982175827, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9074, |
|
"eval_samples_per_second": 394.526, |
|
"eval_steps_per_second": 26.449, |
|
"step": 53280 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 5.55347013680347e-05, |
|
"loss": 0.001, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 5.497893727060394e-05, |
|
"loss": 0.001, |
|
"step": 53833 |
|
}, |
|
{ |
|
"epoch": 20.41, |
|
"learning_rate": 5.442317317317318e-05, |
|
"loss": 0.0, |
|
"step": 54366 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"learning_rate": 5.3867409075742415e-05, |
|
"loss": 0.0015, |
|
"step": 54899 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"learning_rate": 5.331164497831165e-05, |
|
"loss": 0.0006, |
|
"step": 55432 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.030524656176567078, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9097, |
|
"eval_samples_per_second": 393.555, |
|
"eval_steps_per_second": 26.384, |
|
"step": 55944 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 5.275588088088088e-05, |
|
"loss": 0.0005, |
|
"step": 55965 |
|
}, |
|
{ |
|
"epoch": 21.21, |
|
"learning_rate": 5.220011678345011e-05, |
|
"loss": 0.0006, |
|
"step": 56498 |
|
}, |
|
{ |
|
"epoch": 21.41, |
|
"learning_rate": 5.164435268601936e-05, |
|
"loss": 0.0012, |
|
"step": 57031 |
|
}, |
|
{ |
|
"epoch": 21.61, |
|
"learning_rate": 5.108858858858859e-05, |
|
"loss": 0.0005, |
|
"step": 57564 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 5.0532824491157825e-05, |
|
"loss": 0.001, |
|
"step": 58097 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.029209736734628677, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9114, |
|
"eval_samples_per_second": 392.814, |
|
"eval_steps_per_second": 26.334, |
|
"step": 58608 |
|
}, |
|
{ |
|
"epoch": 22.01, |
|
"learning_rate": 4.997706039372707e-05, |
|
"loss": 0.0007, |
|
"step": 58630 |
|
}, |
|
{ |
|
"epoch": 22.21, |
|
"learning_rate": 4.94212962962963e-05, |
|
"loss": 0.0011, |
|
"step": 59163 |
|
}, |
|
{ |
|
"epoch": 22.41, |
|
"learning_rate": 4.886553219886553e-05, |
|
"loss": 0.0004, |
|
"step": 59696 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 4.830976810143477e-05, |
|
"loss": 0.001, |
|
"step": 60229 |
|
}, |
|
{ |
|
"epoch": 22.81, |
|
"learning_rate": 4.775400400400401e-05, |
|
"loss": 0.0008, |
|
"step": 60762 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.03728558123111725, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9095, |
|
"eval_samples_per_second": 393.613, |
|
"eval_steps_per_second": 26.387, |
|
"step": 61272 |
|
}, |
|
{ |
|
"epoch": 23.01, |
|
"learning_rate": 4.719823990657324e-05, |
|
"loss": 0.0001, |
|
"step": 61295 |
|
}, |
|
{ |
|
"epoch": 23.21, |
|
"learning_rate": 4.664247580914248e-05, |
|
"loss": 0.0006, |
|
"step": 61828 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"learning_rate": 4.608671171171172e-05, |
|
"loss": 0.0002, |
|
"step": 62361 |
|
}, |
|
{ |
|
"epoch": 23.61, |
|
"learning_rate": 4.553094761428095e-05, |
|
"loss": 0.0009, |
|
"step": 62894 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"learning_rate": 4.497518351685018e-05, |
|
"loss": 0.0008, |
|
"step": 63427 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.030942877754569054, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9149, |
|
"eval_samples_per_second": 391.282, |
|
"eval_steps_per_second": 26.231, |
|
"step": 63936 |
|
}, |
|
{ |
|
"epoch": 24.01, |
|
"learning_rate": 4.4419419419419425e-05, |
|
"loss": 0.0003, |
|
"step": 63960 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 4.386365532198865e-05, |
|
"loss": 0.0007, |
|
"step": 64493 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"learning_rate": 4.3307891224557895e-05, |
|
"loss": 0.0004, |
|
"step": 65026 |
|
}, |
|
{ |
|
"epoch": 24.61, |
|
"learning_rate": 4.275212712712713e-05, |
|
"loss": 0.0001, |
|
"step": 65559 |
|
}, |
|
{ |
|
"epoch": 24.81, |
|
"learning_rate": 4.2196363029696365e-05, |
|
"loss": 0.0008, |
|
"step": 66092 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.038451410830020905, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9087, |
|
"eval_samples_per_second": 393.969, |
|
"eval_steps_per_second": 26.411, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 4.16405989322656e-05, |
|
"loss": 0.0006, |
|
"step": 66625 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 4.1084834834834836e-05, |
|
"loss": 0.0, |
|
"step": 67158 |
|
}, |
|
{ |
|
"epoch": 25.41, |
|
"learning_rate": 4.052907073740407e-05, |
|
"loss": 0.0013, |
|
"step": 67691 |
|
}, |
|
{ |
|
"epoch": 25.61, |
|
"learning_rate": 3.9973306639973306e-05, |
|
"loss": 0.0001, |
|
"step": 68224 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 3.941754254254255e-05, |
|
"loss": 0.0014, |
|
"step": 68757 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.0133729362860322, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9209, |
|
"eval_samples_per_second": 388.755, |
|
"eval_steps_per_second": 26.062, |
|
"step": 69264 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 3.8861778445111776e-05, |
|
"loss": 0.0001, |
|
"step": 69290 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 3.830601434768102e-05, |
|
"loss": 0.0008, |
|
"step": 69823 |
|
}, |
|
{ |
|
"epoch": 26.41, |
|
"learning_rate": 3.775025025025025e-05, |
|
"loss": 0.0008, |
|
"step": 70356 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 3.719448615281949e-05, |
|
"loss": 0.0002, |
|
"step": 70889 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"learning_rate": 3.663872205538872e-05, |
|
"loss": 0.0004, |
|
"step": 71422 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.023867754265666008, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9071, |
|
"eval_samples_per_second": 394.647, |
|
"eval_steps_per_second": 26.457, |
|
"step": 71928 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 3.608295795795796e-05, |
|
"loss": 0.0009, |
|
"step": 71955 |
|
}, |
|
{ |
|
"epoch": 27.21, |
|
"learning_rate": 3.55271938605272e-05, |
|
"loss": 0.0005, |
|
"step": 72488 |
|
}, |
|
{ |
|
"epoch": 27.41, |
|
"learning_rate": 3.497142976309643e-05, |
|
"loss": 0.0007, |
|
"step": 73021 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"learning_rate": 3.441566566566567e-05, |
|
"loss": 0.0001, |
|
"step": 73554 |
|
}, |
|
{ |
|
"epoch": 27.81, |
|
"learning_rate": 3.3859901568234906e-05, |
|
"loss": 0.0011, |
|
"step": 74087 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.01642591878771782, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9124, |
|
"eval_samples_per_second": 392.371, |
|
"eval_steps_per_second": 26.304, |
|
"step": 74592 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 3.3304137470804134e-05, |
|
"loss": 0.0006, |
|
"step": 74620 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 3.2748373373373376e-05, |
|
"loss": 0.0008, |
|
"step": 75153 |
|
}, |
|
{ |
|
"epoch": 28.41, |
|
"learning_rate": 3.219260927594261e-05, |
|
"loss": 0.0005, |
|
"step": 75686 |
|
}, |
|
{ |
|
"epoch": 28.61, |
|
"learning_rate": 3.1636845178511846e-05, |
|
"loss": 0.0002, |
|
"step": 76219 |
|
}, |
|
{ |
|
"epoch": 28.81, |
|
"learning_rate": 3.108108108108108e-05, |
|
"loss": 0.0002, |
|
"step": 76752 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.018625039607286453, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9144, |
|
"eval_samples_per_second": 391.524, |
|
"eval_steps_per_second": 26.247, |
|
"step": 77256 |
|
}, |
|
{ |
|
"epoch": 29.01, |
|
"learning_rate": 3.052531698365032e-05, |
|
"loss": 0.0003, |
|
"step": 77285 |
|
}, |
|
{ |
|
"epoch": 29.21, |
|
"learning_rate": 2.9969552886219555e-05, |
|
"loss": 0.0002, |
|
"step": 77818 |
|
}, |
|
{ |
|
"epoch": 29.41, |
|
"learning_rate": 2.9413788788788787e-05, |
|
"loss": 0.0001, |
|
"step": 78351 |
|
}, |
|
{ |
|
"epoch": 29.61, |
|
"learning_rate": 2.8858024691358025e-05, |
|
"loss": 0.0013, |
|
"step": 78884 |
|
}, |
|
{ |
|
"epoch": 29.81, |
|
"learning_rate": 2.830226059392726e-05, |
|
"loss": 0.0001, |
|
"step": 79417 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.029812639579176903, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9075, |
|
"eval_samples_per_second": 394.481, |
|
"eval_steps_per_second": 26.446, |
|
"step": 79920 |
|
}, |
|
{ |
|
"epoch": 30.01, |
|
"learning_rate": 2.77464964964965e-05, |
|
"loss": 0.0012, |
|
"step": 79950 |
|
}, |
|
{ |
|
"epoch": 30.21, |
|
"learning_rate": 2.7190732399065734e-05, |
|
"loss": 0.0003, |
|
"step": 80483 |
|
}, |
|
{ |
|
"epoch": 30.41, |
|
"learning_rate": 2.6634968301634972e-05, |
|
"loss": 0.0001, |
|
"step": 81016 |
|
}, |
|
{ |
|
"epoch": 30.61, |
|
"learning_rate": 2.6079204204204204e-05, |
|
"loss": 0.0004, |
|
"step": 81549 |
|
}, |
|
{ |
|
"epoch": 30.81, |
|
"learning_rate": 2.5523440106773443e-05, |
|
"loss": 0.0008, |
|
"step": 82082 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.027695728465914726, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9096, |
|
"eval_samples_per_second": 393.574, |
|
"eval_steps_per_second": 26.385, |
|
"step": 82584 |
|
}, |
|
{ |
|
"epoch": 31.01, |
|
"learning_rate": 2.4967676009342678e-05, |
|
"loss": 0.0008, |
|
"step": 82615 |
|
}, |
|
{ |
|
"epoch": 31.21, |
|
"learning_rate": 2.4411911911911913e-05, |
|
"loss": 0.0003, |
|
"step": 83148 |
|
}, |
|
{ |
|
"epoch": 31.41, |
|
"learning_rate": 2.385614781448115e-05, |
|
"loss": 0.0003, |
|
"step": 83681 |
|
}, |
|
{ |
|
"epoch": 31.61, |
|
"learning_rate": 2.3300383717050383e-05, |
|
"loss": 0.0002, |
|
"step": 84214 |
|
}, |
|
{ |
|
"epoch": 31.81, |
|
"learning_rate": 2.2744619619619618e-05, |
|
"loss": 0.0003, |
|
"step": 84747 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.03773302584886551, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9096, |
|
"eval_samples_per_second": 393.585, |
|
"eval_steps_per_second": 26.386, |
|
"step": 85248 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 2.2188855522188857e-05, |
|
"loss": 0.0007, |
|
"step": 85280 |
|
}, |
|
{ |
|
"epoch": 32.21, |
|
"learning_rate": 2.1633091424758092e-05, |
|
"loss": 0.001, |
|
"step": 85813 |
|
}, |
|
{ |
|
"epoch": 32.41, |
|
"learning_rate": 2.107732732732733e-05, |
|
"loss": 0.0002, |
|
"step": 86346 |
|
}, |
|
{ |
|
"epoch": 32.61, |
|
"learning_rate": 2.0521563229896565e-05, |
|
"loss": 0.0002, |
|
"step": 86879 |
|
}, |
|
{ |
|
"epoch": 32.81, |
|
"learning_rate": 1.99657991324658e-05, |
|
"loss": 0.0003, |
|
"step": 87412 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.03536462038755417, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9089, |
|
"eval_samples_per_second": 393.87, |
|
"eval_steps_per_second": 26.405, |
|
"step": 87912 |
|
}, |
|
{ |
|
"epoch": 33.01, |
|
"learning_rate": 1.9410035035035036e-05, |
|
"loss": 0.0006, |
|
"step": 87945 |
|
}, |
|
{ |
|
"epoch": 33.21, |
|
"learning_rate": 1.885427093760427e-05, |
|
"loss": 0.0003, |
|
"step": 88478 |
|
}, |
|
{ |
|
"epoch": 33.41, |
|
"learning_rate": 1.8298506840173506e-05, |
|
"loss": 0.0003, |
|
"step": 89011 |
|
}, |
|
{ |
|
"epoch": 33.61, |
|
"learning_rate": 1.7742742742742744e-05, |
|
"loss": 0.0006, |
|
"step": 89544 |
|
}, |
|
{ |
|
"epoch": 33.81, |
|
"learning_rate": 1.718697864531198e-05, |
|
"loss": 0.0007, |
|
"step": 90077 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.05854496732354164, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9101, |
|
"eval_samples_per_second": 393.356, |
|
"eval_steps_per_second": 26.37, |
|
"step": 90576 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 1.6631214547881215e-05, |
|
"loss": 0.0002, |
|
"step": 90610 |
|
}, |
|
{ |
|
"epoch": 34.21, |
|
"learning_rate": 1.6075450450450453e-05, |
|
"loss": 0.0006, |
|
"step": 91143 |
|
}, |
|
{ |
|
"epoch": 34.41, |
|
"learning_rate": 1.5519686353019688e-05, |
|
"loss": 0.0003, |
|
"step": 91676 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"learning_rate": 1.4963922255588922e-05, |
|
"loss": 0.0006, |
|
"step": 92209 |
|
}, |
|
{ |
|
"epoch": 34.81, |
|
"learning_rate": 1.4408158158158158e-05, |
|
"loss": 0.0005, |
|
"step": 92742 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.05680559575557709, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9087, |
|
"eval_samples_per_second": 393.956, |
|
"eval_steps_per_second": 26.41, |
|
"step": 93240 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 1.3852394060727395e-05, |
|
"loss": 0.0, |
|
"step": 93275 |
|
}, |
|
{ |
|
"epoch": 35.21, |
|
"learning_rate": 1.329662996329663e-05, |
|
"loss": 0.0005, |
|
"step": 93808 |
|
}, |
|
{ |
|
"epoch": 35.41, |
|
"learning_rate": 1.2740865865865867e-05, |
|
"loss": 0.0002, |
|
"step": 94341 |
|
}, |
|
{ |
|
"epoch": 35.61, |
|
"learning_rate": 1.2185101768435102e-05, |
|
"loss": 0.0007, |
|
"step": 94874 |
|
}, |
|
{ |
|
"epoch": 35.81, |
|
"learning_rate": 1.1629337671004337e-05, |
|
"loss": 0.0001, |
|
"step": 95407 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.05670797452330589, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9196, |
|
"eval_samples_per_second": 389.279, |
|
"eval_steps_per_second": 26.097, |
|
"step": 95904 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 1.1073573573573574e-05, |
|
"loss": 0.0003, |
|
"step": 95940 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"learning_rate": 1.0517809476142811e-05, |
|
"loss": 0.0004, |
|
"step": 96473 |
|
}, |
|
{ |
|
"epoch": 36.41, |
|
"learning_rate": 9.962045378712046e-06, |
|
"loss": 0.0002, |
|
"step": 97006 |
|
}, |
|
{ |
|
"epoch": 36.61, |
|
"learning_rate": 9.406281281281281e-06, |
|
"loss": 0.0006, |
|
"step": 97539 |
|
}, |
|
{ |
|
"epoch": 36.81, |
|
"learning_rate": 8.850517183850518e-06, |
|
"loss": 0.0009, |
|
"step": 98072 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.060491062700748444, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9347, |
|
"eval_samples_per_second": 383.022, |
|
"eval_steps_per_second": 25.677, |
|
"step": 98568 |
|
}, |
|
{ |
|
"epoch": 37.01, |
|
"learning_rate": 8.294753086419753e-06, |
|
"loss": 0.0002, |
|
"step": 98605 |
|
}, |
|
{ |
|
"epoch": 37.21, |
|
"learning_rate": 7.738988988988988e-06, |
|
"loss": 0.001, |
|
"step": 99138 |
|
}, |
|
{ |
|
"epoch": 37.41, |
|
"learning_rate": 7.183224891558225e-06, |
|
"loss": 0.0003, |
|
"step": 99671 |
|
}, |
|
{ |
|
"epoch": 37.61, |
|
"learning_rate": 6.627460794127462e-06, |
|
"loss": 0.0001, |
|
"step": 100204 |
|
}, |
|
{ |
|
"epoch": 37.81, |
|
"learning_rate": 6.071696696696697e-06, |
|
"loss": 0.0002, |
|
"step": 100737 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.06128498166799545, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9116, |
|
"eval_samples_per_second": 392.708, |
|
"eval_steps_per_second": 26.327, |
|
"step": 101232 |
|
}, |
|
{ |
|
"epoch": 38.01, |
|
"learning_rate": 5.515932599265933e-06, |
|
"loss": 0.0002, |
|
"step": 101270 |
|
}, |
|
{ |
|
"epoch": 38.21, |
|
"learning_rate": 4.960168501835169e-06, |
|
"loss": 0.0002, |
|
"step": 101803 |
|
}, |
|
{ |
|
"epoch": 38.41, |
|
"learning_rate": 4.404404404404405e-06, |
|
"loss": 0.0001, |
|
"step": 102336 |
|
}, |
|
{ |
|
"epoch": 38.61, |
|
"learning_rate": 3.848640306973641e-06, |
|
"loss": 0.0006, |
|
"step": 102869 |
|
}, |
|
{ |
|
"epoch": 38.81, |
|
"learning_rate": 3.2928762095428764e-06, |
|
"loss": 0.0002, |
|
"step": 103402 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.05631242319941521, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9146, |
|
"eval_samples_per_second": 391.433, |
|
"eval_steps_per_second": 26.241, |
|
"step": 103896 |
|
}, |
|
{ |
|
"epoch": 39.01, |
|
"learning_rate": 2.7371121121121123e-06, |
|
"loss": 0.0004, |
|
"step": 103935 |
|
}, |
|
{ |
|
"epoch": 39.21, |
|
"learning_rate": 2.1813480146813483e-06, |
|
"loss": 0.0004, |
|
"step": 104468 |
|
}, |
|
{ |
|
"epoch": 39.41, |
|
"learning_rate": 1.625583917250584e-06, |
|
"loss": 0.0001, |
|
"step": 105001 |
|
}, |
|
{ |
|
"epoch": 39.61, |
|
"learning_rate": 1.0698198198198198e-06, |
|
"loss": 0.0013, |
|
"step": 105534 |
|
}, |
|
{ |
|
"epoch": 39.81, |
|
"learning_rate": 5.140557223890558e-07, |
|
"loss": 0.0002, |
|
"step": 106067 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.06320372968912125, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9155, |
|
"eval_samples_per_second": 391.055, |
|
"eval_steps_per_second": 26.216, |
|
"step": 106560 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 106560, |
|
"total_flos": 6.005678715251712e+16, |
|
"train_loss": 0.2983123329788039, |
|
"train_runtime": 9788.1362, |
|
"train_samples_per_second": 163.251, |
|
"train_steps_per_second": 10.887 |
|
} |
|
], |
|
"logging_steps": 533, |
|
"max_steps": 106560, |
|
"num_train_epochs": 40, |
|
"save_steps": 1066, |
|
"total_flos": 6.005678715251712e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|