|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 274520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.6427218417601638e-09, |
|
"loss": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.001457088736705e-06, |
|
"loss": 0.0052, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.000291417747341e-05, |
|
"loss": 0.0041, |
|
"step": 2746 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5004371266210113e-05, |
|
"loss": 0.0025, |
|
"step": 4119 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.000582835494682e-05, |
|
"loss": 0.002, |
|
"step": 5492 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.500728544368352e-05, |
|
"loss": 0.0018, |
|
"step": 6865 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.0008742532420226e-05, |
|
"loss": 0.0014, |
|
"step": 8238 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.501019962115693e-05, |
|
"loss": 0.0015, |
|
"step": 9611 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.001165670989364e-05, |
|
"loss": 0.0012, |
|
"step": 10984 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.501311379863034e-05, |
|
"loss": 0.001, |
|
"step": 12357 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.011379459872841835, |
|
"eval_max_distance": 5, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.5607, |
|
"eval_samples_per_second": 19.763, |
|
"eval_steps_per_second": 1.327, |
|
"step": 13726 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.001457088736704e-05, |
|
"loss": 0.0009, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.501602797610374e-05, |
|
"loss": 0.0008, |
|
"step": 15103 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.001748506484045e-05, |
|
"loss": 0.0009, |
|
"step": 16476 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.501894215357715e-05, |
|
"loss": 0.0008, |
|
"step": 17849 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.002039924231386e-05, |
|
"loss": 0.0007, |
|
"step": 19222 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.502185633105057e-05, |
|
"loss": 0.0008, |
|
"step": 20595 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.002331341978728e-05, |
|
"loss": 0.0008, |
|
"step": 21968 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.502477050852397e-05, |
|
"loss": 0.0009, |
|
"step": 23341 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.002622759726068e-05, |
|
"loss": 0.0008, |
|
"step": 24714 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.502768468599739e-05, |
|
"loss": 0.0009, |
|
"step": 26087 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.01430370844900608, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 14.233, |
|
"eval_samples_per_second": 18.829, |
|
"eval_steps_per_second": 1.265, |
|
"step": 27452 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.999676202502955e-05, |
|
"loss": 0.0008, |
|
"step": 27460 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.944104457072547e-05, |
|
"loss": 0.0008, |
|
"step": 28833 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.88853271164214e-05, |
|
"loss": 0.0008, |
|
"step": 30206 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 9.832960966211731e-05, |
|
"loss": 0.0008, |
|
"step": 31579 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.777389220781323e-05, |
|
"loss": 0.0009, |
|
"step": 32952 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.721817475350917e-05, |
|
"loss": 0.0008, |
|
"step": 34325 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.666245729920507e-05, |
|
"loss": 0.0009, |
|
"step": 35698 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.6106739844901e-05, |
|
"loss": 0.0008, |
|
"step": 37071 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 9.555102239059693e-05, |
|
"loss": 0.0008, |
|
"step": 38444 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.499530493629284e-05, |
|
"loss": 0.0008, |
|
"step": 39817 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.014528523199260235, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.815, |
|
"eval_samples_per_second": 20.913, |
|
"eval_steps_per_second": 1.405, |
|
"step": 41178 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.443958748198876e-05, |
|
"loss": 0.0009, |
|
"step": 41190 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.38838700276847e-05, |
|
"loss": 0.0008, |
|
"step": 42563 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.332815257338062e-05, |
|
"loss": 0.0008, |
|
"step": 43936 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 9.277243511907654e-05, |
|
"loss": 0.0008, |
|
"step": 45309 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.221671766477246e-05, |
|
"loss": 0.0008, |
|
"step": 46682 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.166100021046838e-05, |
|
"loss": 0.0008, |
|
"step": 48055 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.11052827561643e-05, |
|
"loss": 0.0008, |
|
"step": 49428 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.054956530186022e-05, |
|
"loss": 0.0008, |
|
"step": 50801 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.999384784755615e-05, |
|
"loss": 0.0008, |
|
"step": 52174 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.943813039325206e-05, |
|
"loss": 0.0009, |
|
"step": 53547 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.009767626412212849, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.1075, |
|
"eval_samples_per_second": 20.446, |
|
"eval_steps_per_second": 1.373, |
|
"step": 54904 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.888241293894799e-05, |
|
"loss": 0.0009, |
|
"step": 54920 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.832669548464391e-05, |
|
"loss": 0.0008, |
|
"step": 56293 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.777097803033983e-05, |
|
"loss": 0.0008, |
|
"step": 57666 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 8.721526057603575e-05, |
|
"loss": 0.0007, |
|
"step": 59039 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.665954312173167e-05, |
|
"loss": 0.0008, |
|
"step": 60412 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.61038256674276e-05, |
|
"loss": 0.0008, |
|
"step": 61785 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.554810821312351e-05, |
|
"loss": 0.0008, |
|
"step": 63158 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.499239075881944e-05, |
|
"loss": 0.0008, |
|
"step": 64531 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.443667330451536e-05, |
|
"loss": 0.0008, |
|
"step": 65904 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 8.388095585021128e-05, |
|
"loss": 0.0007, |
|
"step": 67277 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.012975725345313549, |
|
"eval_max_distance": 2, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.0896, |
|
"eval_samples_per_second": 20.474, |
|
"eval_steps_per_second": 1.375, |
|
"step": 68630 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 8.33252383959072e-05, |
|
"loss": 0.0008, |
|
"step": 68650 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.276952094160313e-05, |
|
"loss": 0.0008, |
|
"step": 70023 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.221380348729904e-05, |
|
"loss": 0.0007, |
|
"step": 71396 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.165808603299497e-05, |
|
"loss": 0.0008, |
|
"step": 72769 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 8.11023685786909e-05, |
|
"loss": 0.0008, |
|
"step": 74142 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 8.054665112438681e-05, |
|
"loss": 0.0007, |
|
"step": 75515 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.999093367008273e-05, |
|
"loss": 0.0008, |
|
"step": 76888 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 7.943521621577865e-05, |
|
"loss": 0.0007, |
|
"step": 78261 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.887949876147457e-05, |
|
"loss": 0.0008, |
|
"step": 79634 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.832378130717049e-05, |
|
"loss": 0.0008, |
|
"step": 81007 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.015315458178520203, |
|
"eval_max_distance": 2, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.1351, |
|
"eval_samples_per_second": 20.403, |
|
"eval_steps_per_second": 1.37, |
|
"step": 82356 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.776806385286643e-05, |
|
"loss": 0.0008, |
|
"step": 82380 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.721234639856235e-05, |
|
"loss": 0.0007, |
|
"step": 83753 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.665662894425827e-05, |
|
"loss": 0.0007, |
|
"step": 85126 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.610091148995419e-05, |
|
"loss": 0.0006, |
|
"step": 86499 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.554519403565012e-05, |
|
"loss": 0.0007, |
|
"step": 87872 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7.498947658134603e-05, |
|
"loss": 0.0007, |
|
"step": 89245 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 7.443375912704195e-05, |
|
"loss": 0.0007, |
|
"step": 90618 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 7.387804167273788e-05, |
|
"loss": 0.0007, |
|
"step": 91991 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 7.332232421843378e-05, |
|
"loss": 0.0007, |
|
"step": 93364 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 7.276660676412972e-05, |
|
"loss": 0.0007, |
|
"step": 94737 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.01277723629027605, |
|
"eval_max_distance": 2, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.169, |
|
"eval_samples_per_second": 20.351, |
|
"eval_steps_per_second": 1.367, |
|
"step": 96082 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 7.221088930982564e-05, |
|
"loss": 0.0007, |
|
"step": 96110 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 7.165517185552156e-05, |
|
"loss": 0.0007, |
|
"step": 97483 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 7.109945440121748e-05, |
|
"loss": 0.0006, |
|
"step": 98856 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 7.054373694691341e-05, |
|
"loss": 0.0006, |
|
"step": 100229 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 6.998801949260933e-05, |
|
"loss": 0.0007, |
|
"step": 101602 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.943230203830525e-05, |
|
"loss": 0.0006, |
|
"step": 102975 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 6.887658458400117e-05, |
|
"loss": 0.0006, |
|
"step": 104348 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.832086712969709e-05, |
|
"loss": 0.0007, |
|
"step": 105721 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 6.776514967539301e-05, |
|
"loss": 0.0006, |
|
"step": 107094 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.720943222108893e-05, |
|
"loss": 0.0006, |
|
"step": 108467 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.013012989424169064, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.4679, |
|
"eval_samples_per_second": 21.495, |
|
"eval_steps_per_second": 1.444, |
|
"step": 109808 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.665371476678486e-05, |
|
"loss": 0.0007, |
|
"step": 109840 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 6.609799731248077e-05, |
|
"loss": 0.0007, |
|
"step": 111213 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 6.55422798581767e-05, |
|
"loss": 0.0005, |
|
"step": 112586 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 6.498656240387262e-05, |
|
"loss": 0.0007, |
|
"step": 113959 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 6.443084494956854e-05, |
|
"loss": 0.0006, |
|
"step": 115332 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.387512749526446e-05, |
|
"loss": 0.0006, |
|
"step": 116705 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 6.33194100409604e-05, |
|
"loss": 0.0007, |
|
"step": 118078 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.276369258665632e-05, |
|
"loss": 0.0006, |
|
"step": 119451 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.220797513235222e-05, |
|
"loss": 0.0006, |
|
"step": 120824 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 6.165225767804815e-05, |
|
"loss": 0.0006, |
|
"step": 122197 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.015804793685674667, |
|
"eval_max_distance": 5, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.8641, |
|
"eval_samples_per_second": 20.833, |
|
"eval_steps_per_second": 1.399, |
|
"step": 123534 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 6.109654022374407e-05, |
|
"loss": 0.0005, |
|
"step": 123570 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 6.0540822769439994e-05, |
|
"loss": 0.0005, |
|
"step": 124943 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5.9985105315135914e-05, |
|
"loss": 0.0006, |
|
"step": 126316 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.942938786083184e-05, |
|
"loss": 0.0005, |
|
"step": 127689 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 5.887367040652776e-05, |
|
"loss": 0.0006, |
|
"step": 129062 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.831795295222369e-05, |
|
"loss": 0.0006, |
|
"step": 130435 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.77622354979196e-05, |
|
"loss": 0.0006, |
|
"step": 131808 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.720651804361552e-05, |
|
"loss": 0.0007, |
|
"step": 133181 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.6650800589311447e-05, |
|
"loss": 0.0006, |
|
"step": 134554 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.6095083135007366e-05, |
|
"loss": 0.0006, |
|
"step": 135927 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.015074091032147408, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.5962, |
|
"eval_samples_per_second": 21.276, |
|
"eval_steps_per_second": 1.429, |
|
"step": 137260 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.553936568070329e-05, |
|
"loss": 0.0006, |
|
"step": 137300 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 5.4983648226399206e-05, |
|
"loss": 0.0006, |
|
"step": 138673 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 5.442793077209514e-05, |
|
"loss": 0.0006, |
|
"step": 140046 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 5.387221331779105e-05, |
|
"loss": 0.0006, |
|
"step": 141419 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5.331649586348698e-05, |
|
"loss": 0.0008, |
|
"step": 142792 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 5.27607784091829e-05, |
|
"loss": 0.0005, |
|
"step": 144165 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 5.2205060954878825e-05, |
|
"loss": 0.0006, |
|
"step": 145538 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 5.1649343500574745e-05, |
|
"loss": 0.0006, |
|
"step": 146911 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5.109362604627066e-05, |
|
"loss": 0.0006, |
|
"step": 148284 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5.0537908591966585e-05, |
|
"loss": 0.0005, |
|
"step": 149657 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.016692565754055977, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.5906, |
|
"eval_samples_per_second": 21.286, |
|
"eval_steps_per_second": 1.43, |
|
"step": 150986 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.998219113766251e-05, |
|
"loss": 0.0006, |
|
"step": 151030 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 4.942647368335843e-05, |
|
"loss": 0.0005, |
|
"step": 152403 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.887075622905435e-05, |
|
"loss": 0.0005, |
|
"step": 153776 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.831503877475027e-05, |
|
"loss": 0.0005, |
|
"step": 155149 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.775932132044619e-05, |
|
"loss": 0.0005, |
|
"step": 156522 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.720360386614212e-05, |
|
"loss": 0.0005, |
|
"step": 157895 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.664788641183804e-05, |
|
"loss": 0.0005, |
|
"step": 159268 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 4.6092168957533957e-05, |
|
"loss": 0.0005, |
|
"step": 160641 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 4.553645150322988e-05, |
|
"loss": 0.0005, |
|
"step": 162014 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 4.49807340489258e-05, |
|
"loss": 0.0005, |
|
"step": 163387 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.014475121162831783, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.6623, |
|
"eval_samples_per_second": 21.165, |
|
"eval_steps_per_second": 1.422, |
|
"step": 164712 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.442501659462173e-05, |
|
"loss": 0.0005, |
|
"step": 164760 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 4.386929914031765e-05, |
|
"loss": 0.0005, |
|
"step": 166133 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 4.331358168601357e-05, |
|
"loss": 0.0005, |
|
"step": 167506 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 4.2757864231709496e-05, |
|
"loss": 0.0005, |
|
"step": 168879 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.220214677740541e-05, |
|
"loss": 0.0005, |
|
"step": 170252 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 4.1646429323101335e-05, |
|
"loss": 0.0004, |
|
"step": 171625 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.1090711868797255e-05, |
|
"loss": 0.0006, |
|
"step": 172998 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 4.0534994414493175e-05, |
|
"loss": 0.0006, |
|
"step": 174371 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3.99792769601891e-05, |
|
"loss": 0.0006, |
|
"step": 175744 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.942355950588502e-05, |
|
"loss": 0.0005, |
|
"step": 177117 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.016049271449446678, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.7374, |
|
"eval_samples_per_second": 21.04, |
|
"eval_steps_per_second": 1.413, |
|
"step": 178438 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.886784205158094e-05, |
|
"loss": 0.0005, |
|
"step": 178490 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 3.831212459727687e-05, |
|
"loss": 0.0005, |
|
"step": 179863 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.775640714297279e-05, |
|
"loss": 0.0005, |
|
"step": 181236 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 3.720068968866871e-05, |
|
"loss": 0.0005, |
|
"step": 182609 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 3.6644972234364634e-05, |
|
"loss": 0.0005, |
|
"step": 183982 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 3.608925478006055e-05, |
|
"loss": 0.0004, |
|
"step": 185355 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 3.5533537325756473e-05, |
|
"loss": 0.0005, |
|
"step": 186728 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 3.497781987145239e-05, |
|
"loss": 0.0005, |
|
"step": 188101 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 3.442210241714832e-05, |
|
"loss": 0.0005, |
|
"step": 189474 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 3.386638496284424e-05, |
|
"loss": 0.0005, |
|
"step": 190847 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.015727248042821884, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.6356, |
|
"eval_samples_per_second": 21.21, |
|
"eval_steps_per_second": 1.425, |
|
"step": 192164 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.331066750854016e-05, |
|
"loss": 0.0004, |
|
"step": 192220 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 3.2754950054236086e-05, |
|
"loss": 0.0005, |
|
"step": 193593 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 3.2199232599932006e-05, |
|
"loss": 0.0005, |
|
"step": 194966 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 3.1643515145627926e-05, |
|
"loss": 0.0004, |
|
"step": 196339 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 3.108779769132385e-05, |
|
"loss": 0.0004, |
|
"step": 197712 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 3.0532080237019765e-05, |
|
"loss": 0.0005, |
|
"step": 199085 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 2.997636278271569e-05, |
|
"loss": 0.0005, |
|
"step": 200458 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 2.942064532841161e-05, |
|
"loss": 0.0004, |
|
"step": 201831 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 2.8864927874107535e-05, |
|
"loss": 0.0004, |
|
"step": 203204 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 2.8309210419803455e-05, |
|
"loss": 0.0004, |
|
"step": 204577 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.015685711055994034, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.7509, |
|
"eval_samples_per_second": 21.018, |
|
"eval_steps_per_second": 1.412, |
|
"step": 205890 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.7753492965499378e-05, |
|
"loss": 0.0004, |
|
"step": 205950 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 2.71977755111953e-05, |
|
"loss": 0.0004, |
|
"step": 207323 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 2.6642058056891224e-05, |
|
"loss": 0.0004, |
|
"step": 208696 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 2.6086340602587144e-05, |
|
"loss": 0.0004, |
|
"step": 210069 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 2.5530623148283067e-05, |
|
"loss": 0.0004, |
|
"step": 211442 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 2.4974905693978987e-05, |
|
"loss": 0.0005, |
|
"step": 212815 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 2.441918823967491e-05, |
|
"loss": 0.0004, |
|
"step": 214188 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 2.386347078537083e-05, |
|
"loss": 0.0004, |
|
"step": 215561 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 2.330775333106675e-05, |
|
"loss": 0.0005, |
|
"step": 216934 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 2.2752035876762673e-05, |
|
"loss": 0.0004, |
|
"step": 218307 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.017339130863547325, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.871, |
|
"eval_samples_per_second": 20.822, |
|
"eval_steps_per_second": 1.398, |
|
"step": 219616 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.2196318422458596e-05, |
|
"loss": 0.0004, |
|
"step": 219680 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 2.1640600968154516e-05, |
|
"loss": 0.0004, |
|
"step": 221053 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 2.108488351385044e-05, |
|
"loss": 0.0004, |
|
"step": 222426 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 2.0529166059546362e-05, |
|
"loss": 0.0004, |
|
"step": 223799 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 1.9973448605242282e-05, |
|
"loss": 0.0004, |
|
"step": 225172 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 1.9417731150938205e-05, |
|
"loss": 0.0004, |
|
"step": 226545 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 1.8862013696634125e-05, |
|
"loss": 0.0004, |
|
"step": 227918 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 1.8306296242330048e-05, |
|
"loss": 0.0004, |
|
"step": 229291 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 1.775057878802597e-05, |
|
"loss": 0.0004, |
|
"step": 230664 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 1.719486133372189e-05, |
|
"loss": 0.0004, |
|
"step": 232037 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.015850957483053207, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.7459, |
|
"eval_samples_per_second": 21.026, |
|
"eval_steps_per_second": 1.412, |
|
"step": 233342 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.663914387941781e-05, |
|
"loss": 0.0004, |
|
"step": 233410 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 1.6083426425113734e-05, |
|
"loss": 0.0004, |
|
"step": 234783 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 1.5527708970809657e-05, |
|
"loss": 0.0004, |
|
"step": 236156 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 1.4971991516505579e-05, |
|
"loss": 0.0004, |
|
"step": 237529 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.4416274062201499e-05, |
|
"loss": 0.0005, |
|
"step": 238902 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 1.386055660789742e-05, |
|
"loss": 0.0004, |
|
"step": 240275 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 1.3304839153593343e-05, |
|
"loss": 0.0004, |
|
"step": 241648 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 1.2749121699289265e-05, |
|
"loss": 0.0004, |
|
"step": 243021 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 1.2193404244985186e-05, |
|
"loss": 0.0004, |
|
"step": 244394 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 1.1637686790681108e-05, |
|
"loss": 0.0004, |
|
"step": 245767 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.016846980899572372, |
|
"eval_max_distance": 2, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 14.3322, |
|
"eval_samples_per_second": 18.699, |
|
"eval_steps_per_second": 1.256, |
|
"step": 247068 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 1.1081969336377031e-05, |
|
"loss": 0.0004, |
|
"step": 247140 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 1.0526251882072952e-05, |
|
"loss": 0.0004, |
|
"step": 248513 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 9.970534427768874e-06, |
|
"loss": 0.0004, |
|
"step": 249886 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 9.414816973464795e-06, |
|
"loss": 0.0003, |
|
"step": 251259 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 8.859099519160717e-06, |
|
"loss": 0.0004, |
|
"step": 252632 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 8.30338206485664e-06, |
|
"loss": 0.0004, |
|
"step": 254005 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 7.74766461055256e-06, |
|
"loss": 0.0004, |
|
"step": 255378 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 7.191947156248482e-06, |
|
"loss": 0.0004, |
|
"step": 256751 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 6.636229701944405e-06, |
|
"loss": 0.0003, |
|
"step": 258124 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 6.080512247640326e-06, |
|
"loss": 0.0004, |
|
"step": 259497 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.01681215688586235, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.5068, |
|
"eval_samples_per_second": 21.428, |
|
"eval_steps_per_second": 1.439, |
|
"step": 260794 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 5.524794793336248e-06, |
|
"loss": 0.0004, |
|
"step": 260870 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 4.969077339032169e-06, |
|
"loss": 0.0004, |
|
"step": 262243 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 4.4133598847280914e-06, |
|
"loss": 0.0004, |
|
"step": 263616 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 3.857642430424013e-06, |
|
"loss": 0.0004, |
|
"step": 264989 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 3.301924976119935e-06, |
|
"loss": 0.0004, |
|
"step": 266362 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 2.7462075218158563e-06, |
|
"loss": 0.0004, |
|
"step": 267735 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 2.1904900675117783e-06, |
|
"loss": 0.0003, |
|
"step": 269108 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 1.6347726132077e-06, |
|
"loss": 0.0004, |
|
"step": 270481 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.0790551589036217e-06, |
|
"loss": 0.0003, |
|
"step": 271854 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 5.233377045995435e-07, |
|
"loss": 0.0004, |
|
"step": 273227 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.016863718628883362, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 12.4816, |
|
"eval_samples_per_second": 21.472, |
|
"eval_steps_per_second": 1.442, |
|
"step": 274520 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 274520, |
|
"total_flos": 7.204011364904141e+16, |
|
"train_loss": 2.2036947758330143e-05, |
|
"train_runtime": 1601.2933, |
|
"train_samples_per_second": 2571.434, |
|
"train_steps_per_second": 171.436 |
|
} |
|
], |
|
"logging_steps": 1373, |
|
"max_steps": 274520, |
|
"num_train_epochs": 20, |
|
"save_steps": 2746, |
|
"total_flos": 7.204011364904141e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|