|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 343740, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.909175539652063e-09, |
|
"loss": 0.0085, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.000872752661896e-06, |
|
"loss": 0.0066, |
|
"step": 1719 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.0001745505323792e-05, |
|
"loss": 0.0061, |
|
"step": 3438 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5002618257985687e-05, |
|
"loss": 0.0059, |
|
"step": 5157 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.0003491010647585e-05, |
|
"loss": 0.0056, |
|
"step": 6876 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.500436376330948e-05, |
|
"loss": 0.0052, |
|
"step": 8595 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.0005236515971374e-05, |
|
"loss": 0.0052, |
|
"step": 10314 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3.500610926863327e-05, |
|
"loss": 0.0049, |
|
"step": 12033 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.000698202129517e-05, |
|
"loss": 0.0049, |
|
"step": 13752 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.5007854773957064e-05, |
|
"loss": 0.0052, |
|
"step": 15471 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.000872752661896e-05, |
|
"loss": 0.0051, |
|
"step": 17190 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.500960027928086e-05, |
|
"loss": 0.0052, |
|
"step": 18909 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.001047303194275e-05, |
|
"loss": 0.0048, |
|
"step": 20628 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.501134578460465e-05, |
|
"loss": 0.0052, |
|
"step": 22347 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.027106985449790955, |
|
"eval_max_distance": 9, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 14.4629, |
|
"eval_samples_per_second": 17.355, |
|
"eval_steps_per_second": 1.175, |
|
"step": 22916 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.001221853726654e-05, |
|
"loss": 0.0045, |
|
"step": 24066 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 7.501309128992844e-05, |
|
"loss": 0.0044, |
|
"step": 25785 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.001396404259034e-05, |
|
"loss": 0.0048, |
|
"step": 27504 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.501483679525223e-05, |
|
"loss": 0.0045, |
|
"step": 29223 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 9.001570954791413e-05, |
|
"loss": 0.0044, |
|
"step": 30942 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.501658230057602e-05, |
|
"loss": 0.0047, |
|
"step": 32661 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 9.999806054964024e-05, |
|
"loss": 0.005, |
|
"step": 34380 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.944240802156669e-05, |
|
"loss": 0.0049, |
|
"step": 36099 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.888675549349314e-05, |
|
"loss": 0.0053, |
|
"step": 37818 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.83311029654196e-05, |
|
"loss": 0.0047, |
|
"step": 39537 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.777545043734606e-05, |
|
"loss": 0.005, |
|
"step": 41256 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.721979790927251e-05, |
|
"loss": 0.0048, |
|
"step": 42975 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 9.666414538119898e-05, |
|
"loss": 0.0051, |
|
"step": 44694 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.026126669719815254, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.5883, |
|
"eval_samples_per_second": 18.472, |
|
"eval_steps_per_second": 1.251, |
|
"step": 45832 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.610849285312543e-05, |
|
"loss": 0.0047, |
|
"step": 46413 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.555284032505189e-05, |
|
"loss": 0.004, |
|
"step": 48132 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.499718779697834e-05, |
|
"loss": 0.004, |
|
"step": 49851 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 9.44415352689048e-05, |
|
"loss": 0.0042, |
|
"step": 51570 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 9.388588274083125e-05, |
|
"loss": 0.004, |
|
"step": 53289 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.333023021275771e-05, |
|
"loss": 0.0043, |
|
"step": 55008 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.277457768468416e-05, |
|
"loss": 0.0042, |
|
"step": 56727 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.221892515661063e-05, |
|
"loss": 0.004, |
|
"step": 58446 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.166327262853708e-05, |
|
"loss": 0.0045, |
|
"step": 60165 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.110762010046352e-05, |
|
"loss": 0.0044, |
|
"step": 61884 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 9.055196757238999e-05, |
|
"loss": 0.0044, |
|
"step": 63603 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.999631504431645e-05, |
|
"loss": 0.0044, |
|
"step": 65322 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.94406625162429e-05, |
|
"loss": 0.0043, |
|
"step": 67041 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.03130079433321953, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4308, |
|
"eval_samples_per_second": 18.688, |
|
"eval_steps_per_second": 1.266, |
|
"step": 68748 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.888500998816935e-05, |
|
"loss": 0.0046, |
|
"step": 68760 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 8.832935746009581e-05, |
|
"loss": 0.0036, |
|
"step": 70479 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.777370493202228e-05, |
|
"loss": 0.0036, |
|
"step": 72198 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 8.721805240394872e-05, |
|
"loss": 0.0038, |
|
"step": 73917 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 8.666239987587517e-05, |
|
"loss": 0.0036, |
|
"step": 75636 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 8.610674734780164e-05, |
|
"loss": 0.0038, |
|
"step": 77355 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 8.55510948197281e-05, |
|
"loss": 0.0038, |
|
"step": 79074 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 8.499544229165455e-05, |
|
"loss": 0.0038, |
|
"step": 80793 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 8.4439789763581e-05, |
|
"loss": 0.004, |
|
"step": 82512 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 8.388413723550746e-05, |
|
"loss": 0.0037, |
|
"step": 84231 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 8.332848470743392e-05, |
|
"loss": 0.0038, |
|
"step": 85950 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 8.277283217936037e-05, |
|
"loss": 0.0039, |
|
"step": 87669 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.221717965128682e-05, |
|
"loss": 0.0039, |
|
"step": 89388 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 8.166152712321329e-05, |
|
"loss": 0.0041, |
|
"step": 91107 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.02780107595026493, |
|
"eval_max_distance": 10, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.351, |
|
"eval_samples_per_second": 18.8, |
|
"eval_steps_per_second": 1.273, |
|
"step": 91664 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 8.110587459513974e-05, |
|
"loss": 0.0037, |
|
"step": 92826 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 8.05502220670662e-05, |
|
"loss": 0.0032, |
|
"step": 94545 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 7.999456953899266e-05, |
|
"loss": 0.0034, |
|
"step": 96264 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.943891701091911e-05, |
|
"loss": 0.0034, |
|
"step": 97983 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.888326448284556e-05, |
|
"loss": 0.0035, |
|
"step": 99702 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 7.832761195477202e-05, |
|
"loss": 0.0034, |
|
"step": 101421 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 7.777195942669849e-05, |
|
"loss": 0.0036, |
|
"step": 103140 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 7.721630689862494e-05, |
|
"loss": 0.0035, |
|
"step": 104859 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 7.666065437055139e-05, |
|
"loss": 0.0034, |
|
"step": 106578 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 7.610500184247783e-05, |
|
"loss": 0.0034, |
|
"step": 108297 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 7.554934931440431e-05, |
|
"loss": 0.0036, |
|
"step": 110016 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 7.499369678633076e-05, |
|
"loss": 0.0034, |
|
"step": 111735 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 7.443804425825721e-05, |
|
"loss": 0.0037, |
|
"step": 113454 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.028013188391923904, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4584, |
|
"eval_samples_per_second": 18.65, |
|
"eval_steps_per_second": 1.263, |
|
"step": 114580 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 7.388239173018366e-05, |
|
"loss": 0.0033, |
|
"step": 115173 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 7.332673920211012e-05, |
|
"loss": 0.0031, |
|
"step": 116892 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 7.277108667403659e-05, |
|
"loss": 0.0031, |
|
"step": 118611 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 7.221543414596303e-05, |
|
"loss": 0.0032, |
|
"step": 120330 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 7.16597816178895e-05, |
|
"loss": 0.0031, |
|
"step": 122049 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 7.110412908981595e-05, |
|
"loss": 0.0032, |
|
"step": 123768 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 7.054847656174241e-05, |
|
"loss": 0.0031, |
|
"step": 125487 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 6.999282403366886e-05, |
|
"loss": 0.0032, |
|
"step": 127206 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 6.943717150559532e-05, |
|
"loss": 0.0032, |
|
"step": 128925 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 6.888151897752177e-05, |
|
"loss": 0.0032, |
|
"step": 130644 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 6.832586644944823e-05, |
|
"loss": 0.0031, |
|
"step": 132363 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 6.777021392137468e-05, |
|
"loss": 0.0031, |
|
"step": 134082 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 6.721456139330115e-05, |
|
"loss": 0.0032, |
|
"step": 135801 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.028835317119956017, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4137, |
|
"eval_samples_per_second": 18.712, |
|
"eval_steps_per_second": 1.267, |
|
"step": 137496 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 6.66589088652276e-05, |
|
"loss": 0.0033, |
|
"step": 137520 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 6.610325633715405e-05, |
|
"loss": 0.0028, |
|
"step": 139239 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 6.554760380908051e-05, |
|
"loss": 0.0026, |
|
"step": 140958 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 6.499195128100697e-05, |
|
"loss": 0.0027, |
|
"step": 142677 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 6.443629875293342e-05, |
|
"loss": 0.0029, |
|
"step": 144396 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 6.388064622485987e-05, |
|
"loss": 0.0029, |
|
"step": 146115 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 6.332499369678633e-05, |
|
"loss": 0.0029, |
|
"step": 147834 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 6.27693411687128e-05, |
|
"loss": 0.0028, |
|
"step": 149553 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.221368864063925e-05, |
|
"loss": 0.0029, |
|
"step": 151272 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 6.16580361125657e-05, |
|
"loss": 0.0029, |
|
"step": 152991 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 6.110238358449216e-05, |
|
"loss": 0.0029, |
|
"step": 154710 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 6.0546731056418614e-05, |
|
"loss": 0.0028, |
|
"step": 156429 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 5.999107852834507e-05, |
|
"loss": 0.0029, |
|
"step": 158148 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 5.943542600027152e-05, |
|
"loss": 0.003, |
|
"step": 159867 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.030847659334540367, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4895, |
|
"eval_samples_per_second": 18.607, |
|
"eval_steps_per_second": 1.26, |
|
"step": 160412 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 5.887977347219798e-05, |
|
"loss": 0.0027, |
|
"step": 161586 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 5.832412094412444e-05, |
|
"loss": 0.0025, |
|
"step": 163305 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 5.7768468416050895e-05, |
|
"loss": 0.0026, |
|
"step": 165024 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 5.7212815887977344e-05, |
|
"loss": 0.0027, |
|
"step": 166743 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 5.665716335990381e-05, |
|
"loss": 0.0025, |
|
"step": 168462 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 5.610151083183026e-05, |
|
"loss": 0.0026, |
|
"step": 170181 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 5.554585830375671e-05, |
|
"loss": 0.0026, |
|
"step": 171900 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 5.499020577568318e-05, |
|
"loss": 0.0026, |
|
"step": 173619 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 5.443455324760963e-05, |
|
"loss": 0.0026, |
|
"step": 175338 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 5.387890071953609e-05, |
|
"loss": 0.0025, |
|
"step": 177057 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 5.332324819146254e-05, |
|
"loss": 0.0027, |
|
"step": 178776 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 5.2767595663389e-05, |
|
"loss": 0.0028, |
|
"step": 180495 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 5.2211943135315456e-05, |
|
"loss": 0.0025, |
|
"step": 182214 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.03048335202038288, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.5077, |
|
"eval_samples_per_second": 18.582, |
|
"eval_steps_per_second": 1.259, |
|
"step": 183328 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 5.1656290607241906e-05, |
|
"loss": 0.0026, |
|
"step": 183933 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 5.110063807916836e-05, |
|
"loss": 0.0023, |
|
"step": 185652 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 5.0544985551094825e-05, |
|
"loss": 0.0024, |
|
"step": 187371 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 4.998933302302128e-05, |
|
"loss": 0.0023, |
|
"step": 189090 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.943368049494773e-05, |
|
"loss": 0.0024, |
|
"step": 190809 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.887802796687419e-05, |
|
"loss": 0.0024, |
|
"step": 192528 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 4.832237543880065e-05, |
|
"loss": 0.0025, |
|
"step": 194247 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 4.77667229107271e-05, |
|
"loss": 0.0024, |
|
"step": 195966 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 4.721107038265356e-05, |
|
"loss": 0.0026, |
|
"step": 197685 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 4.665541785458001e-05, |
|
"loss": 0.0023, |
|
"step": 199404 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 4.6099765326506474e-05, |
|
"loss": 0.0024, |
|
"step": 201123 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 4.5544112798432924e-05, |
|
"loss": 0.0024, |
|
"step": 202842 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 4.4988460270359386e-05, |
|
"loss": 0.0025, |
|
"step": 204561 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.030335595831274986, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4109, |
|
"eval_samples_per_second": 18.716, |
|
"eval_steps_per_second": 1.268, |
|
"step": 206244 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.4432807742285836e-05, |
|
"loss": 0.0026, |
|
"step": 206280 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 4.38771552142123e-05, |
|
"loss": 0.0021, |
|
"step": 207999 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 4.332150268613875e-05, |
|
"loss": 0.0022, |
|
"step": 209718 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 4.2765850158065204e-05, |
|
"loss": 0.0022, |
|
"step": 211437 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 4.221019762999166e-05, |
|
"loss": 0.0023, |
|
"step": 213156 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 4.165454510191812e-05, |
|
"loss": 0.0023, |
|
"step": 214875 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 4.109889257384457e-05, |
|
"loss": 0.0023, |
|
"step": 216594 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 4.054324004577103e-05, |
|
"loss": 0.0023, |
|
"step": 218313 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 3.998758751769749e-05, |
|
"loss": 0.0024, |
|
"step": 220032 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 3.943193498962394e-05, |
|
"loss": 0.0024, |
|
"step": 221751 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 3.88762824615504e-05, |
|
"loss": 0.0022, |
|
"step": 223470 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 3.8320629933476854e-05, |
|
"loss": 0.0023, |
|
"step": 225189 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 3.776497740540331e-05, |
|
"loss": 0.0024, |
|
"step": 226908 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 3.7209324877329766e-05, |
|
"loss": 0.0023, |
|
"step": 228627 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.034065987914800644, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4726, |
|
"eval_samples_per_second": 18.63, |
|
"eval_steps_per_second": 1.262, |
|
"step": 229160 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 3.665367234925622e-05, |
|
"loss": 0.0021, |
|
"step": 230346 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 3.609801982118268e-05, |
|
"loss": 0.0021, |
|
"step": 232065 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 3.5542367293109135e-05, |
|
"loss": 0.0021, |
|
"step": 233784 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 3.498671476503559e-05, |
|
"loss": 0.0022, |
|
"step": 235503 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 3.443106223696205e-05, |
|
"loss": 0.0021, |
|
"step": 237222 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 3.38754097088885e-05, |
|
"loss": 0.0022, |
|
"step": 238941 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 3.331975718081496e-05, |
|
"loss": 0.0021, |
|
"step": 240660 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 3.276410465274141e-05, |
|
"loss": 0.0021, |
|
"step": 242379 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 3.220845212466787e-05, |
|
"loss": 0.0021, |
|
"step": 244098 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 3.165279959659433e-05, |
|
"loss": 0.0022, |
|
"step": 245817 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 3.1097147068520784e-05, |
|
"loss": 0.0022, |
|
"step": 247536 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 3.054149454044724e-05, |
|
"loss": 0.002, |
|
"step": 249255 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 2.9985842012373693e-05, |
|
"loss": 0.0022, |
|
"step": 250974 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.03288768604397774, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.3832, |
|
"eval_samples_per_second": 18.755, |
|
"eval_steps_per_second": 1.27, |
|
"step": 252076 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 2.9430189484300152e-05, |
|
"loss": 0.0022, |
|
"step": 252693 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 2.8874536956226605e-05, |
|
"loss": 0.002, |
|
"step": 254412 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 2.8318884428153065e-05, |
|
"loss": 0.002, |
|
"step": 256131 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 2.7763231900079517e-05, |
|
"loss": 0.002, |
|
"step": 257850 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 2.7207579372005977e-05, |
|
"loss": 0.0021, |
|
"step": 259569 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 2.665192684393243e-05, |
|
"loss": 0.0019, |
|
"step": 261288 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 2.6096274315858886e-05, |
|
"loss": 0.002, |
|
"step": 263007 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 2.5540621787785342e-05, |
|
"loss": 0.002, |
|
"step": 264726 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 2.4984969259711798e-05, |
|
"loss": 0.002, |
|
"step": 266445 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 2.4429316731638254e-05, |
|
"loss": 0.0019, |
|
"step": 268164 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 2.387366420356471e-05, |
|
"loss": 0.0021, |
|
"step": 269883 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 2.3318011675491167e-05, |
|
"loss": 0.0021, |
|
"step": 271602 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 2.2762359147417623e-05, |
|
"loss": 0.0019, |
|
"step": 273321 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.03355114161968231, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4567, |
|
"eval_samples_per_second": 18.652, |
|
"eval_steps_per_second": 1.263, |
|
"step": 274992 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 2.220670661934408e-05, |
|
"loss": 0.0021, |
|
"step": 275040 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 2.1651054091270535e-05, |
|
"loss": 0.002, |
|
"step": 276759 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 2.109540156319699e-05, |
|
"loss": 0.002, |
|
"step": 278478 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 2.0539749035123444e-05, |
|
"loss": 0.0018, |
|
"step": 280197 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 1.9984096507049904e-05, |
|
"loss": 0.002, |
|
"step": 281916 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 1.942844397897636e-05, |
|
"loss": 0.0019, |
|
"step": 283635 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 1.8872791450902816e-05, |
|
"loss": 0.0018, |
|
"step": 285354 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 1.8317138922829272e-05, |
|
"loss": 0.0019, |
|
"step": 287073 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 1.776148639475573e-05, |
|
"loss": 0.0018, |
|
"step": 288792 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 1.7205833866682185e-05, |
|
"loss": 0.0019, |
|
"step": 290511 |
|
}, |
|
{ |
|
"epoch": 12.75, |
|
"learning_rate": 1.665018133860864e-05, |
|
"loss": 0.0019, |
|
"step": 292230 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 1.6094528810535094e-05, |
|
"loss": 0.0021, |
|
"step": 293949 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 1.553887628246155e-05, |
|
"loss": 0.0019, |
|
"step": 295668 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 1.4983223754388006e-05, |
|
"loss": 0.002, |
|
"step": 297387 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.035788267850875854, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4958, |
|
"eval_samples_per_second": 18.598, |
|
"eval_steps_per_second": 1.26, |
|
"step": 297908 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 1.4427571226314462e-05, |
|
"loss": 0.0019, |
|
"step": 299106 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 1.3871918698240918e-05, |
|
"loss": 0.0018, |
|
"step": 300825 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 1.3316266170167374e-05, |
|
"loss": 0.0018, |
|
"step": 302544 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 1.276061364209383e-05, |
|
"loss": 0.0018, |
|
"step": 304263 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 1.2204961114020287e-05, |
|
"loss": 0.0018, |
|
"step": 305982 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 1.1649308585946743e-05, |
|
"loss": 0.0018, |
|
"step": 307701 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 1.1093656057873199e-05, |
|
"loss": 0.0019, |
|
"step": 309420 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 1.0538003529799655e-05, |
|
"loss": 0.0018, |
|
"step": 311139 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 9.982351001726111e-06, |
|
"loss": 0.0017, |
|
"step": 312858 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 9.426698473652567e-06, |
|
"loss": 0.0018, |
|
"step": 314577 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 8.871045945579024e-06, |
|
"loss": 0.002, |
|
"step": 316296 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 8.31539341750548e-06, |
|
"loss": 0.0018, |
|
"step": 318015 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 7.759740889431934e-06, |
|
"loss": 0.0018, |
|
"step": 319734 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.03550655022263527, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.4713, |
|
"eval_samples_per_second": 18.632, |
|
"eval_steps_per_second": 1.262, |
|
"step": 320824 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 7.204088361358391e-06, |
|
"loss": 0.0017, |
|
"step": 321453 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 6.648435833284847e-06, |
|
"loss": 0.0018, |
|
"step": 323172 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 6.092783305211304e-06, |
|
"loss": 0.0018, |
|
"step": 324891 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 5.537130777137759e-06, |
|
"loss": 0.0017, |
|
"step": 326610 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 4.981478249064216e-06, |
|
"loss": 0.0018, |
|
"step": 328329 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 4.425825720990671e-06, |
|
"loss": 0.0018, |
|
"step": 330048 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 3.8701731929171274e-06, |
|
"loss": 0.0018, |
|
"step": 331767 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 3.3145206648435836e-06, |
|
"loss": 0.0019, |
|
"step": 333486 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 2.7588681367700398e-06, |
|
"loss": 0.0017, |
|
"step": 335205 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 2.2032156086964955e-06, |
|
"loss": 0.0018, |
|
"step": 336924 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 1.6475630806229517e-06, |
|
"loss": 0.0017, |
|
"step": 338643 |
|
}, |
|
{ |
|
"epoch": 14.85, |
|
"learning_rate": 1.0919105525494076e-06, |
|
"loss": 0.0018, |
|
"step": 340362 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 5.362580244758636e-07, |
|
"loss": 0.0019, |
|
"step": 342081 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.03661360964179039, |
|
"eval_max_distance": 8, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 13.3536, |
|
"eval_samples_per_second": 18.796, |
|
"eval_steps_per_second": 1.273, |
|
"step": 343740 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 343740, |
|
"total_flos": 8.727792619277722e+16, |
|
"train_loss": 0.0029792904397642345, |
|
"train_runtime": 24306.5697, |
|
"train_samples_per_second": 212.119, |
|
"train_steps_per_second": 14.142 |
|
} |
|
], |
|
"logging_steps": 1719, |
|
"max_steps": 343740, |
|
"num_train_epochs": 15, |
|
"save_steps": 3438, |
|
"total_flos": 8.727792619277722e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|