|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 69180, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.4455044810638915e-08, |
|
"loss": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.001445504481065e-06, |
|
"loss": 0.0054, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.000289100896213e-05, |
|
"loss": 0.0038, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5004336513443193e-05, |
|
"loss": 0.0027, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.000578201792426e-05, |
|
"loss": 0.0019, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5007227522405318e-05, |
|
"loss": 0.0012, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.0008673026886387e-05, |
|
"loss": 0.0011, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.5010118531367445e-05, |
|
"loss": 0.0012, |
|
"step": 2422 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.001156403584852e-05, |
|
"loss": 0.0011, |
|
"step": 2768 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.5013009540329577e-05, |
|
"loss": 0.001, |
|
"step": 3114 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.0005217896541580558, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1674, |
|
"eval_samples_per_second": 23.069, |
|
"eval_steps_per_second": 0.923, |
|
"step": 3459 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.0014455044810635e-05, |
|
"loss": 0.001, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.501590054929171e-05, |
|
"loss": 0.001, |
|
"step": 3806 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.001734605377277e-05, |
|
"loss": 0.0008, |
|
"step": 4152 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.501879155825383e-05, |
|
"loss": 0.0009, |
|
"step": 4498 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.002023706273489e-05, |
|
"loss": 0.0011, |
|
"step": 4844 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.502168256721596e-05, |
|
"loss": 0.001, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.002312807169704e-05, |
|
"loss": 0.0008, |
|
"step": 5536 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.50245735761781e-05, |
|
"loss": 0.0009, |
|
"step": 5882 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.002601908065915e-05, |
|
"loss": 0.0011, |
|
"step": 6228 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.502746458514021e-05, |
|
"loss": 0.0007, |
|
"step": 6574 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.0005328759434632957, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1588, |
|
"eval_samples_per_second": 23.161, |
|
"eval_steps_per_second": 0.926, |
|
"step": 6918 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.999678776781987e-05, |
|
"loss": 0.001, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.94410716006553e-05, |
|
"loss": 0.0007, |
|
"step": 7266 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.888535543349073e-05, |
|
"loss": 0.0009, |
|
"step": 7612 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 9.832963926632618e-05, |
|
"loss": 0.0008, |
|
"step": 7958 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.777392309916162e-05, |
|
"loss": 0.0009, |
|
"step": 8304 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.721820693199704e-05, |
|
"loss": 0.0008, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.666249076483249e-05, |
|
"loss": 0.0008, |
|
"step": 8996 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.610677459766793e-05, |
|
"loss": 0.0009, |
|
"step": 9342 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 9.555105843050336e-05, |
|
"loss": 0.0008, |
|
"step": 9688 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.49953422633388e-05, |
|
"loss": 0.0008, |
|
"step": 10034 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.0005345833487808704, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1807, |
|
"eval_samples_per_second": 22.928, |
|
"eval_steps_per_second": 0.917, |
|
"step": 10377 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.443962609617424e-05, |
|
"loss": 0.0012, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 9.388390992900967e-05, |
|
"loss": 0.0008, |
|
"step": 10726 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.332819376184511e-05, |
|
"loss": 0.0006, |
|
"step": 11072 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 9.277247759468054e-05, |
|
"loss": 0.0009, |
|
"step": 11418 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.221676142751598e-05, |
|
"loss": 0.0007, |
|
"step": 11764 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 9.166104526035142e-05, |
|
"loss": 0.0006, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 9.110532909318687e-05, |
|
"loss": 0.0007, |
|
"step": 12456 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 9.05496129260223e-05, |
|
"loss": 0.0008, |
|
"step": 12802 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.999389675885773e-05, |
|
"loss": 0.0008, |
|
"step": 13148 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.943818059169318e-05, |
|
"loss": 0.0007, |
|
"step": 13494 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.0006046278867870569, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1455, |
|
"eval_samples_per_second": 23.305, |
|
"eval_steps_per_second": 0.932, |
|
"step": 13836 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.888246442452861e-05, |
|
"loss": 0.0008, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.832674825736405e-05, |
|
"loss": 0.0005, |
|
"step": 14186 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.777103209019947e-05, |
|
"loss": 0.0006, |
|
"step": 14532 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 8.721531592303492e-05, |
|
"loss": 0.0009, |
|
"step": 14878 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.665959975587036e-05, |
|
"loss": 0.0005, |
|
"step": 15224 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.61038835887058e-05, |
|
"loss": 0.0006, |
|
"step": 15570 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.554816742154123e-05, |
|
"loss": 0.0007, |
|
"step": 15916 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.499245125437667e-05, |
|
"loss": 0.0006, |
|
"step": 16262 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.44367350872121e-05, |
|
"loss": 0.0008, |
|
"step": 16608 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 8.388101892004755e-05, |
|
"loss": 0.0006, |
|
"step": 16954 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.0002887483569793403, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1519, |
|
"eval_samples_per_second": 23.235, |
|
"eval_steps_per_second": 0.929, |
|
"step": 17295 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 8.332530275288298e-05, |
|
"loss": 0.0007, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.276958658571841e-05, |
|
"loss": 0.0006, |
|
"step": 17646 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 8.221387041855386e-05, |
|
"loss": 0.0006, |
|
"step": 17992 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.16581542513893e-05, |
|
"loss": 0.0006, |
|
"step": 18338 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 8.110243808422474e-05, |
|
"loss": 0.0006, |
|
"step": 18684 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 8.054672191706016e-05, |
|
"loss": 0.0007, |
|
"step": 19030 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.999100574989561e-05, |
|
"loss": 0.0007, |
|
"step": 19376 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 7.943528958273105e-05, |
|
"loss": 0.0005, |
|
"step": 19722 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.887957341556648e-05, |
|
"loss": 0.0007, |
|
"step": 20068 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.832385724840192e-05, |
|
"loss": 0.0006, |
|
"step": 20414 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.00028923238278366625, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.139, |
|
"eval_samples_per_second": 23.376, |
|
"eval_steps_per_second": 0.935, |
|
"step": 20754 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.776814108123735e-05, |
|
"loss": 0.0007, |
|
"step": 20760 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.721242491407279e-05, |
|
"loss": 0.0006, |
|
"step": 21106 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.665670874690823e-05, |
|
"loss": 0.0007, |
|
"step": 21452 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.610099257974366e-05, |
|
"loss": 0.0006, |
|
"step": 21798 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.55452764125791e-05, |
|
"loss": 0.0005, |
|
"step": 22144 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7.498956024541455e-05, |
|
"loss": 0.0006, |
|
"step": 22490 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 7.443384407824999e-05, |
|
"loss": 0.0005, |
|
"step": 22836 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 7.387812791108541e-05, |
|
"loss": 0.0006, |
|
"step": 23182 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 7.332241174392085e-05, |
|
"loss": 0.0006, |
|
"step": 23528 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 7.27666955767563e-05, |
|
"loss": 0.0005, |
|
"step": 23874 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.00023660251463297755, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1425, |
|
"eval_samples_per_second": 23.338, |
|
"eval_steps_per_second": 0.934, |
|
"step": 24213 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 7.221097940959173e-05, |
|
"loss": 0.0006, |
|
"step": 24220 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 7.165526324242716e-05, |
|
"loss": 0.0005, |
|
"step": 24566 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 7.10995470752626e-05, |
|
"loss": 0.0006, |
|
"step": 24912 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 7.054383090809804e-05, |
|
"loss": 0.0006, |
|
"step": 25258 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 6.998811474093348e-05, |
|
"loss": 0.0005, |
|
"step": 25604 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.943239857376891e-05, |
|
"loss": 0.0006, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 6.887668240660435e-05, |
|
"loss": 0.0004, |
|
"step": 26296 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 6.832096623943979e-05, |
|
"loss": 0.0007, |
|
"step": 26642 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 6.776525007227524e-05, |
|
"loss": 0.0007, |
|
"step": 26988 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 6.720953390511066e-05, |
|
"loss": 0.0006, |
|
"step": 27334 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.0004639440739993006, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1108, |
|
"eval_samples_per_second": 23.687, |
|
"eval_steps_per_second": 0.947, |
|
"step": 27672 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.66538177379461e-05, |
|
"loss": 0.0006, |
|
"step": 27680 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 6.609810157078153e-05, |
|
"loss": 0.0005, |
|
"step": 28026 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 6.554238540361698e-05, |
|
"loss": 0.0005, |
|
"step": 28372 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 6.498666923645242e-05, |
|
"loss": 0.0004, |
|
"step": 28718 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 6.443095306928784e-05, |
|
"loss": 0.0004, |
|
"step": 29064 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.387523690212329e-05, |
|
"loss": 0.0006, |
|
"step": 29410 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 6.331952073495873e-05, |
|
"loss": 0.0008, |
|
"step": 29756 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.276380456779417e-05, |
|
"loss": 0.0006, |
|
"step": 30102 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.22080884006296e-05, |
|
"loss": 0.0005, |
|
"step": 30448 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 6.165237223346504e-05, |
|
"loss": 0.0006, |
|
"step": 30794 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.0005358799826353788, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1129, |
|
"eval_samples_per_second": 23.664, |
|
"eval_steps_per_second": 0.947, |
|
"step": 31131 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 6.109665606630047e-05, |
|
"loss": 0.0004, |
|
"step": 31140 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 6.054093989913592e-05, |
|
"loss": 0.0005, |
|
"step": 31486 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 5.9985223731971354e-05, |
|
"loss": 0.0006, |
|
"step": 31832 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.9429507564806783e-05, |
|
"loss": 0.0004, |
|
"step": 32178 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 5.887379139764222e-05, |
|
"loss": 0.0007, |
|
"step": 32524 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.831807523047766e-05, |
|
"loss": 0.0005, |
|
"step": 32870 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 5.77623590633131e-05, |
|
"loss": 0.0004, |
|
"step": 33216 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.720664289614853e-05, |
|
"loss": 0.0006, |
|
"step": 33562 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 5.665092672898398e-05, |
|
"loss": 0.0005, |
|
"step": 33908 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 5.609521056181941e-05, |
|
"loss": 0.0005, |
|
"step": 34254 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.0005827790591865778, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.188, |
|
"eval_samples_per_second": 22.852, |
|
"eval_steps_per_second": 0.914, |
|
"step": 34590 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.5539494394654845e-05, |
|
"loss": 0.0005, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 5.498377822749029e-05, |
|
"loss": 0.0005, |
|
"step": 34946 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 5.4428062060325725e-05, |
|
"loss": 0.0004, |
|
"step": 35292 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 5.387234589316116e-05, |
|
"loss": 0.0004, |
|
"step": 35638 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 5.3316629725996604e-05, |
|
"loss": 0.0004, |
|
"step": 35984 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 5.2760913558832034e-05, |
|
"loss": 0.0005, |
|
"step": 36330 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 5.220519739166747e-05, |
|
"loss": 0.0006, |
|
"step": 36676 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 5.164948122450291e-05, |
|
"loss": 0.0005, |
|
"step": 37022 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5.109376505733835e-05, |
|
"loss": 0.0004, |
|
"step": 37368 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 5.0538048890173786e-05, |
|
"loss": 0.0004, |
|
"step": 37714 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.0005017376388423145, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1154, |
|
"eval_samples_per_second": 23.636, |
|
"eval_steps_per_second": 0.945, |
|
"step": 38049 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.998233272300922e-05, |
|
"loss": 0.0005, |
|
"step": 38060 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 4.942661655584466e-05, |
|
"loss": 0.0004, |
|
"step": 38406 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.8870900388680096e-05, |
|
"loss": 0.0004, |
|
"step": 38752 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.831518422151554e-05, |
|
"loss": 0.0004, |
|
"step": 39098 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.775946805435097e-05, |
|
"loss": 0.0007, |
|
"step": 39444 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 4.720375188718641e-05, |
|
"loss": 0.0005, |
|
"step": 39790 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.664803572002184e-05, |
|
"loss": 0.0005, |
|
"step": 40136 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 4.6092319552857284e-05, |
|
"loss": 0.0007, |
|
"step": 40482 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 4.553660338569272e-05, |
|
"loss": 0.0006, |
|
"step": 40828 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 4.498088721852816e-05, |
|
"loss": 0.0005, |
|
"step": 41174 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.0003966529038734734, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1167, |
|
"eval_samples_per_second": 23.622, |
|
"eval_steps_per_second": 0.945, |
|
"step": 41508 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.4425171051363594e-05, |
|
"loss": 0.0005, |
|
"step": 41520 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 4.386945488419903e-05, |
|
"loss": 0.0005, |
|
"step": 41866 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 4.3313738717034466e-05, |
|
"loss": 0.0005, |
|
"step": 42212 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 4.275802254986991e-05, |
|
"loss": 0.0004, |
|
"step": 42558 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.220230638270534e-05, |
|
"loss": 0.0004, |
|
"step": 42904 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 4.164659021554078e-05, |
|
"loss": 0.0005, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 4.109087404837622e-05, |
|
"loss": 0.0005, |
|
"step": 43596 |
|
}, |
|
{ |
|
"epoch": 12.7, |
|
"learning_rate": 4.0535157881211655e-05, |
|
"loss": 0.0004, |
|
"step": 43942 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 3.997944171404709e-05, |
|
"loss": 0.0004, |
|
"step": 44288 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.942372554688253e-05, |
|
"loss": 0.0004, |
|
"step": 44634 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.0004018562030978501, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1157, |
|
"eval_samples_per_second": 23.633, |
|
"eval_steps_per_second": 0.945, |
|
"step": 44967 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 3.886800937971797e-05, |
|
"loss": 0.0005, |
|
"step": 44980 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 3.831229321255341e-05, |
|
"loss": 0.0006, |
|
"step": 45326 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 3.7756577045388844e-05, |
|
"loss": 0.0005, |
|
"step": 45672 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 3.720086087822428e-05, |
|
"loss": 0.0004, |
|
"step": 46018 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 3.664514471105972e-05, |
|
"loss": 0.0004, |
|
"step": 46364 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 3.608942854389515e-05, |
|
"loss": 0.0004, |
|
"step": 46710 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 3.5533712376730596e-05, |
|
"loss": 0.0004, |
|
"step": 47056 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 3.4977996209566026e-05, |
|
"loss": 0.0003, |
|
"step": 47402 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 3.442228004240147e-05, |
|
"loss": 0.0006, |
|
"step": 47748 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 3.3866563875236906e-05, |
|
"loss": 0.0004, |
|
"step": 48094 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.00045212701661512256, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.109, |
|
"eval_samples_per_second": 23.708, |
|
"eval_steps_per_second": 0.948, |
|
"step": 48426 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.331084770807234e-05, |
|
"loss": 0.0004, |
|
"step": 48440 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 3.275513154090778e-05, |
|
"loss": 0.0004, |
|
"step": 48786 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 3.2199415373743215e-05, |
|
"loss": 0.0005, |
|
"step": 49132 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 3.164369920657865e-05, |
|
"loss": 0.0004, |
|
"step": 49478 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 3.1087983039414094e-05, |
|
"loss": 0.0004, |
|
"step": 49824 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 3.0532266872249524e-05, |
|
"loss": 0.0004, |
|
"step": 50170 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 2.9976550705084967e-05, |
|
"loss": 0.0005, |
|
"step": 50516 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 2.94208345379204e-05, |
|
"loss": 0.0003, |
|
"step": 50862 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 2.886511837075584e-05, |
|
"loss": 0.0004, |
|
"step": 51208 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 2.830940220359128e-05, |
|
"loss": 0.0004, |
|
"step": 51554 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.0004186382284387946, |
|
"eval_max_distance": 0, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1163, |
|
"eval_samples_per_second": 23.626, |
|
"eval_steps_per_second": 0.945, |
|
"step": 51885 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.7753686036426713e-05, |
|
"loss": 0.0004, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 2.7197969869262153e-05, |
|
"loss": 0.0004, |
|
"step": 52246 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 2.6642253702097592e-05, |
|
"loss": 0.0005, |
|
"step": 52592 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 2.6086537534933025e-05, |
|
"loss": 0.0004, |
|
"step": 52938 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 2.5530821367768465e-05, |
|
"loss": 0.0004, |
|
"step": 53284 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 2.49751052006039e-05, |
|
"loss": 0.0005, |
|
"step": 53630 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 2.4419389033439338e-05, |
|
"loss": 0.0004, |
|
"step": 53976 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 2.3863672866274774e-05, |
|
"loss": 0.0004, |
|
"step": 54322 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 2.330795669911021e-05, |
|
"loss": 0.0004, |
|
"step": 54668 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 2.275224053194565e-05, |
|
"loss": 0.0004, |
|
"step": 55014 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.00046242817188613117, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1198, |
|
"eval_samples_per_second": 23.587, |
|
"eval_steps_per_second": 0.943, |
|
"step": 55344 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.2196524364781087e-05, |
|
"loss": 0.0003, |
|
"step": 55360 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 2.1640808197616523e-05, |
|
"loss": 0.0004, |
|
"step": 55706 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 2.108509203045196e-05, |
|
"loss": 0.0004, |
|
"step": 56052 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 2.05293758632874e-05, |
|
"loss": 0.0003, |
|
"step": 56398 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 1.9973659696122836e-05, |
|
"loss": 0.0005, |
|
"step": 56744 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 1.9417943528958272e-05, |
|
"loss": 0.0003, |
|
"step": 57090 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 1.8862227361793712e-05, |
|
"loss": 0.0003, |
|
"step": 57436 |
|
}, |
|
{ |
|
"epoch": 16.7, |
|
"learning_rate": 1.830651119462915e-05, |
|
"loss": 0.0003, |
|
"step": 57782 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 1.7750795027464585e-05, |
|
"loss": 0.0006, |
|
"step": 58128 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 1.7195078860300025e-05, |
|
"loss": 0.0004, |
|
"step": 58474 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.0004974314360879362, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1095, |
|
"eval_samples_per_second": 23.702, |
|
"eval_steps_per_second": 0.948, |
|
"step": 58803 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.663936269313546e-05, |
|
"loss": 0.0005, |
|
"step": 58820 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 1.6083646525970898e-05, |
|
"loss": 0.0004, |
|
"step": 59166 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 1.5527930358806337e-05, |
|
"loss": 0.0003, |
|
"step": 59512 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 1.4972214191641772e-05, |
|
"loss": 0.0003, |
|
"step": 59858 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.441649802447721e-05, |
|
"loss": 0.0005, |
|
"step": 60204 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 1.3860781857312647e-05, |
|
"loss": 0.0004, |
|
"step": 60550 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 1.3305065690148087e-05, |
|
"loss": 0.0003, |
|
"step": 60896 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 1.2749349522983523e-05, |
|
"loss": 0.0004, |
|
"step": 61242 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 1.219363335581896e-05, |
|
"loss": 0.0005, |
|
"step": 61588 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 1.1637917188654396e-05, |
|
"loss": 0.0004, |
|
"step": 61934 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.0004895007587037981, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1247, |
|
"eval_samples_per_second": 23.533, |
|
"eval_steps_per_second": 0.941, |
|
"step": 62262 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 1.1082201021489834e-05, |
|
"loss": 0.0003, |
|
"step": 62280 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 1.052648485432527e-05, |
|
"loss": 0.0003, |
|
"step": 62626 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 9.970768687160708e-06, |
|
"loss": 0.0005, |
|
"step": 62972 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 9.415052519996145e-06, |
|
"loss": 0.0004, |
|
"step": 63318 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 8.859336352831583e-06, |
|
"loss": 0.0004, |
|
"step": 63664 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 8.303620185667021e-06, |
|
"loss": 0.0004, |
|
"step": 64010 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 7.747904018502457e-06, |
|
"loss": 0.0005, |
|
"step": 64356 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 7.192187851337895e-06, |
|
"loss": 0.0004, |
|
"step": 64702 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 6.636471684173333e-06, |
|
"loss": 0.0004, |
|
"step": 65048 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 6.08075551700877e-06, |
|
"loss": 0.0004, |
|
"step": 65394 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.00047712118248455226, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1311, |
|
"eval_samples_per_second": 23.462, |
|
"eval_steps_per_second": 0.938, |
|
"step": 65721 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 5.525039349844207e-06, |
|
"loss": 0.0003, |
|
"step": 65740 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 4.9693231826796444e-06, |
|
"loss": 0.0003, |
|
"step": 66086 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 4.413607015515082e-06, |
|
"loss": 0.0003, |
|
"step": 66432 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 3.857890848350519e-06, |
|
"loss": 0.0004, |
|
"step": 66778 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 3.3021746811859566e-06, |
|
"loss": 0.0005, |
|
"step": 67124 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 2.7464585140213935e-06, |
|
"loss": 0.0003, |
|
"step": 67470 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 2.1907423468568307e-06, |
|
"loss": 0.0003, |
|
"step": 67816 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 1.6350261796922682e-06, |
|
"loss": 0.0003, |
|
"step": 68162 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.0793100125277054e-06, |
|
"loss": 0.0004, |
|
"step": 68508 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 5.235938453631429e-07, |
|
"loss": 0.0004, |
|
"step": 68854 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.00047323742182925344, |
|
"eval_max_distance": 3, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.107, |
|
"eval_samples_per_second": 23.73, |
|
"eval_steps_per_second": 0.949, |
|
"step": 69180 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 69180, |
|
"total_flos": 4.132028143466496e+16, |
|
"train_loss": 0.0006141960998232213, |
|
"train_runtime": 5700.2515, |
|
"train_samples_per_second": 364.026, |
|
"train_steps_per_second": 12.136 |
|
} |
|
], |
|
"logging_steps": 346, |
|
"max_steps": 69180, |
|
"num_train_epochs": 20, |
|
"save_steps": 692, |
|
"total_flos": 4.132028143466496e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|