|
{ |
|
"best_metric": 0.8666666666666667, |
|
"best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-100-iN\\checkpoint-85", |
|
"epoch": 90.3225806451613, |
|
"eval_steps": 500, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.3812448978424072, |
|
"eval_runtime": 0.9939, |
|
"eval_samples_per_second": 60.369, |
|
"eval_steps_per_second": 2.012, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 1.3848, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.5, |
|
"eval_loss": 1.3605996370315552, |
|
"eval_runtime": 1.0777, |
|
"eval_samples_per_second": 55.675, |
|
"eval_steps_per_second": 1.856, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 1.3686, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.3074811697006226, |
|
"eval_runtime": 1.0069, |
|
"eval_samples_per_second": 59.587, |
|
"eval_steps_per_second": 1.986, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 1.2965, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.2370479106903076, |
|
"eval_runtime": 1.023, |
|
"eval_samples_per_second": 58.649, |
|
"eval_steps_per_second": 1.955, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.5333333333333333, |
|
"eval_loss": 1.1167830228805542, |
|
"eval_runtime": 1.0537, |
|
"eval_samples_per_second": 56.944, |
|
"eval_steps_per_second": 1.898, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.9624060150375936e-05, |
|
"loss": 1.1753, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.5666666666666667, |
|
"eval_loss": 1.0309817790985107, |
|
"eval_runtime": 0.9955, |
|
"eval_samples_per_second": 60.27, |
|
"eval_steps_per_second": 2.009, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 4.887218045112782e-05, |
|
"loss": 1.0294, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.9315768480300903, |
|
"eval_runtime": 1.0112, |
|
"eval_samples_per_second": 59.337, |
|
"eval_steps_per_second": 1.978, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.81203007518797e-05, |
|
"loss": 0.902, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6833333333333333, |
|
"eval_loss": 0.8727719187736511, |
|
"eval_runtime": 1.0259, |
|
"eval_samples_per_second": 58.484, |
|
"eval_steps_per_second": 1.949, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.8128864765167236, |
|
"eval_runtime": 1.0791, |
|
"eval_samples_per_second": 55.601, |
|
"eval_steps_per_second": 1.853, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 0.7812, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7006206512451172, |
|
"eval_runtime": 1.0876, |
|
"eval_samples_per_second": 55.167, |
|
"eval_steps_per_second": 1.839, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 4.6616541353383456e-05, |
|
"loss": 0.6419, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.6381439566612244, |
|
"eval_runtime": 1.0437, |
|
"eval_samples_per_second": 57.486, |
|
"eval_steps_per_second": 1.916, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 4.586466165413534e-05, |
|
"loss": 0.5109, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.6327125430107117, |
|
"eval_runtime": 1.0397, |
|
"eval_samples_per_second": 57.707, |
|
"eval_steps_per_second": 1.924, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 4.511278195488722e-05, |
|
"loss": 0.3838, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.8666666666666667, |
|
"eval_loss": 0.5441552996635437, |
|
"eval_runtime": 1.0509, |
|
"eval_samples_per_second": 57.092, |
|
"eval_steps_per_second": 1.903, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6755495667457581, |
|
"eval_runtime": 1.041, |
|
"eval_samples_per_second": 57.635, |
|
"eval_steps_per_second": 1.921, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 4.43609022556391e-05, |
|
"loss": 0.285, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7755728960037231, |
|
"eval_runtime": 1.0199, |
|
"eval_samples_per_second": 58.828, |
|
"eval_steps_per_second": 1.961, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 4.3609022556390975e-05, |
|
"loss": 0.2672, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.8106668591499329, |
|
"eval_runtime": 1.0429, |
|
"eval_samples_per_second": 57.531, |
|
"eval_steps_per_second": 1.918, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.2466, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5219495892524719, |
|
"eval_runtime": 1.0179, |
|
"eval_samples_per_second": 58.943, |
|
"eval_steps_per_second": 1.965, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.7040973901748657, |
|
"eval_runtime": 1.0682, |
|
"eval_samples_per_second": 56.169, |
|
"eval_steps_per_second": 1.872, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 0.2312, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7878958582878113, |
|
"eval_runtime": 0.9838, |
|
"eval_samples_per_second": 60.989, |
|
"eval_steps_per_second": 2.033, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 4.135338345864662e-05, |
|
"loss": 0.1933, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.709021806716919, |
|
"eval_runtime": 1.0518, |
|
"eval_samples_per_second": 57.046, |
|
"eval_steps_per_second": 1.902, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 4.0601503759398494e-05, |
|
"loss": 0.1692, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5394979119300842, |
|
"eval_runtime": 1.0183, |
|
"eval_samples_per_second": 58.921, |
|
"eval_steps_per_second": 1.964, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 3.9849624060150376e-05, |
|
"loss": 0.1578, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.6418856382369995, |
|
"eval_runtime": 1.0177, |
|
"eval_samples_per_second": 58.959, |
|
"eval_steps_per_second": 1.965, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.5736449956893921, |
|
"eval_runtime": 1.0279, |
|
"eval_samples_per_second": 58.369, |
|
"eval_steps_per_second": 1.946, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 3.909774436090226e-05, |
|
"loss": 0.1321, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.747129499912262, |
|
"eval_runtime": 1.021, |
|
"eval_samples_per_second": 58.768, |
|
"eval_steps_per_second": 1.959, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 3.834586466165413e-05, |
|
"loss": 0.1114, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 0.6447189450263977, |
|
"eval_runtime": 1.0464, |
|
"eval_samples_per_second": 57.341, |
|
"eval_steps_per_second": 1.911, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 3.759398496240601e-05, |
|
"loss": 0.1385, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.61579829454422, |
|
"eval_runtime": 0.9804, |
|
"eval_samples_per_second": 61.197, |
|
"eval_steps_per_second": 2.04, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.6467480063438416, |
|
"eval_runtime": 1.0425, |
|
"eval_samples_per_second": 57.556, |
|
"eval_steps_per_second": 1.919, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 0.1136, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.6180290579795837, |
|
"eval_runtime": 1.0892, |
|
"eval_samples_per_second": 55.084, |
|
"eval_steps_per_second": 1.836, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 3.6090225563909776e-05, |
|
"loss": 0.0997, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.8578386902809143, |
|
"eval_runtime": 0.9851, |
|
"eval_samples_per_second": 60.908, |
|
"eval_steps_per_second": 2.03, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 3.533834586466165e-05, |
|
"loss": 0.1064, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.6778053641319275, |
|
"eval_runtime": 0.9651, |
|
"eval_samples_per_second": 62.17, |
|
"eval_steps_per_second": 2.072, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 3.458646616541353e-05, |
|
"loss": 0.0775, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8123681545257568, |
|
"eval_runtime": 0.9886, |
|
"eval_samples_per_second": 60.693, |
|
"eval_steps_per_second": 2.023, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7782633900642395, |
|
"eval_runtime": 1.0238, |
|
"eval_samples_per_second": 58.605, |
|
"eval_steps_per_second": 1.953, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 3.3834586466165414e-05, |
|
"loss": 0.0921, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.8320418000221252, |
|
"eval_runtime": 1.0125, |
|
"eval_samples_per_second": 59.258, |
|
"eval_steps_per_second": 1.975, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"learning_rate": 3.3082706766917295e-05, |
|
"loss": 0.0919, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.8309503197669983, |
|
"eval_runtime": 0.9631, |
|
"eval_samples_per_second": 62.3, |
|
"eval_steps_per_second": 2.077, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 3.233082706766917e-05, |
|
"loss": 0.0888, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.6575559377670288, |
|
"eval_runtime": 0.9897, |
|
"eval_samples_per_second": 60.621, |
|
"eval_steps_per_second": 2.021, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7043873071670532, |
|
"eval_runtime": 0.9919, |
|
"eval_samples_per_second": 60.489, |
|
"eval_steps_per_second": 2.016, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 0.0693, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.7607947587966919, |
|
"eval_runtime": 0.9426, |
|
"eval_samples_per_second": 63.653, |
|
"eval_steps_per_second": 2.122, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.42, |
|
"learning_rate": 3.082706766917293e-05, |
|
"loss": 0.061, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7802255153656006, |
|
"eval_runtime": 0.9812, |
|
"eval_samples_per_second": 61.149, |
|
"eval_steps_per_second": 2.038, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 38.71, |
|
"learning_rate": 3.007518796992481e-05, |
|
"loss": 0.0699, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 38.97, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.7761934995651245, |
|
"eval_runtime": 0.9881, |
|
"eval_samples_per_second": 60.722, |
|
"eval_steps_per_second": 2.024, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 2.9323308270676693e-05, |
|
"loss": 0.0652, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7579043507575989, |
|
"eval_runtime": 1.0374, |
|
"eval_samples_per_second": 57.837, |
|
"eval_steps_per_second": 1.928, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 40.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.998461902141571, |
|
"eval_runtime": 0.9702, |
|
"eval_samples_per_second": 61.842, |
|
"eval_steps_per_second": 2.061, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 41.29, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 0.0562, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 41.94, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.8027188777923584, |
|
"eval_runtime": 1.0453, |
|
"eval_samples_per_second": 57.397, |
|
"eval_steps_per_second": 1.913, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 42.58, |
|
"learning_rate": 2.7819548872180452e-05, |
|
"loss": 0.0534, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 42.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9704654812812805, |
|
"eval_runtime": 1.0187, |
|
"eval_samples_per_second": 58.899, |
|
"eval_steps_per_second": 1.963, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 43.87, |
|
"learning_rate": 2.706766917293233e-05, |
|
"loss": 0.0519, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7301105260848999, |
|
"eval_runtime": 0.998, |
|
"eval_samples_per_second": 60.123, |
|
"eval_steps_per_second": 2.004, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 44.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8432899713516235, |
|
"eval_runtime": 1.0546, |
|
"eval_samples_per_second": 56.895, |
|
"eval_steps_per_second": 1.897, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.0529, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 45.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8533557653427124, |
|
"eval_runtime": 1.0201, |
|
"eval_samples_per_second": 58.815, |
|
"eval_steps_per_second": 1.961, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 2.556390977443609e-05, |
|
"loss": 0.0772, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 46.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8561839461326599, |
|
"eval_runtime": 1.0725, |
|
"eval_samples_per_second": 55.942, |
|
"eval_steps_per_second": 1.865, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 47.74, |
|
"learning_rate": 2.4812030075187968e-05, |
|
"loss": 0.0644, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8419451117515564, |
|
"eval_runtime": 1.0154, |
|
"eval_samples_per_second": 59.091, |
|
"eval_steps_per_second": 1.97, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 48.9, |
|
"eval_accuracy": 0.7666666666666667, |
|
"eval_loss": 1.125100016593933, |
|
"eval_runtime": 1.3342, |
|
"eval_samples_per_second": 44.969, |
|
"eval_steps_per_second": 1.499, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 49.03, |
|
"learning_rate": 2.406015037593985e-05, |
|
"loss": 0.0467, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 49.94, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7536706924438477, |
|
"eval_runtime": 1.6625, |
|
"eval_samples_per_second": 36.09, |
|
"eval_steps_per_second": 1.203, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 50.32, |
|
"learning_rate": 2.3308270676691728e-05, |
|
"loss": 0.0576, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 50.97, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.7517222166061401, |
|
"eval_runtime": 1.0574, |
|
"eval_samples_per_second": 56.741, |
|
"eval_steps_per_second": 1.891, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"learning_rate": 2.255639097744361e-05, |
|
"loss": 0.0344, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8343304395675659, |
|
"eval_runtime": 1.0509, |
|
"eval_samples_per_second": 57.096, |
|
"eval_steps_per_second": 1.903, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"learning_rate": 2.1804511278195487e-05, |
|
"loss": 0.0663, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 52.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.7636159062385559, |
|
"eval_runtime": 0.952, |
|
"eval_samples_per_second": 63.026, |
|
"eval_steps_per_second": 2.101, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 53.94, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.8253324031829834, |
|
"eval_runtime": 0.9847, |
|
"eval_samples_per_second": 60.932, |
|
"eval_steps_per_second": 2.031, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 54.19, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 0.0353, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 54.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.934796154499054, |
|
"eval_runtime": 1.0691, |
|
"eval_samples_per_second": 56.12, |
|
"eval_steps_per_second": 1.871, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 55.48, |
|
"learning_rate": 2.0300751879699247e-05, |
|
"loss": 0.0524, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.8216551542282104, |
|
"eval_runtime": 0.9842, |
|
"eval_samples_per_second": 60.966, |
|
"eval_steps_per_second": 2.032, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 56.77, |
|
"learning_rate": 1.954887218045113e-05, |
|
"loss": 0.0479, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 56.9, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.758594810962677, |
|
"eval_runtime": 1.0267, |
|
"eval_samples_per_second": 58.439, |
|
"eval_steps_per_second": 1.948, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 57.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8146753311157227, |
|
"eval_runtime": 0.965, |
|
"eval_samples_per_second": 62.179, |
|
"eval_steps_per_second": 2.073, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"learning_rate": 1.8796992481203007e-05, |
|
"loss": 0.0595, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9999889135360718, |
|
"eval_runtime": 1.0193, |
|
"eval_samples_per_second": 58.862, |
|
"eval_steps_per_second": 1.962, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 59.35, |
|
"learning_rate": 1.8045112781954888e-05, |
|
"loss": 0.0475, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9291054010391235, |
|
"eval_runtime": 0.9846, |
|
"eval_samples_per_second": 60.941, |
|
"eval_steps_per_second": 2.031, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 60.65, |
|
"learning_rate": 1.7293233082706766e-05, |
|
"loss": 0.049, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 60.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9588444828987122, |
|
"eval_runtime": 1.0072, |
|
"eval_samples_per_second": 59.57, |
|
"eval_steps_per_second": 1.986, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 61.94, |
|
"learning_rate": 1.6541353383458648e-05, |
|
"loss": 0.0398, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 61.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9500763416290283, |
|
"eval_runtime": 1.1967, |
|
"eval_samples_per_second": 50.137, |
|
"eval_steps_per_second": 1.671, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 62.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9498623013496399, |
|
"eval_runtime": 1.0197, |
|
"eval_samples_per_second": 58.839, |
|
"eval_steps_per_second": 1.961, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 63.23, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 0.0496, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9279070496559143, |
|
"eval_runtime": 1.006, |
|
"eval_samples_per_second": 59.643, |
|
"eval_steps_per_second": 1.988, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 64.52, |
|
"learning_rate": 1.5037593984962406e-05, |
|
"loss": 0.0354, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 64.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.9677076935768127, |
|
"eval_runtime": 0.9989, |
|
"eval_samples_per_second": 60.065, |
|
"eval_steps_per_second": 2.002, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 65.81, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 0.0325, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 65.94, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.8371397852897644, |
|
"eval_runtime": 0.9656, |
|
"eval_samples_per_second": 62.134, |
|
"eval_steps_per_second": 2.071, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 66.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9682708978652954, |
|
"eval_runtime": 0.956, |
|
"eval_samples_per_second": 62.76, |
|
"eval_steps_per_second": 2.092, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 67.1, |
|
"learning_rate": 1.3533834586466165e-05, |
|
"loss": 0.0335, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 1.0455142259597778, |
|
"eval_runtime": 0.9995, |
|
"eval_samples_per_second": 60.033, |
|
"eval_steps_per_second": 2.001, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 68.39, |
|
"learning_rate": 1.2781954887218045e-05, |
|
"loss": 0.0375, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 68.9, |
|
"eval_accuracy": 0.8166666666666667, |
|
"eval_loss": 0.9026990532875061, |
|
"eval_runtime": 1.016, |
|
"eval_samples_per_second": 59.058, |
|
"eval_steps_per_second": 1.969, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 69.68, |
|
"learning_rate": 1.2030075187969925e-05, |
|
"loss": 0.0424, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 69.94, |
|
"eval_accuracy": 0.85, |
|
"eval_loss": 0.8043019771575928, |
|
"eval_runtime": 0.9715, |
|
"eval_samples_per_second": 61.76, |
|
"eval_steps_per_second": 2.059, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 70.97, |
|
"learning_rate": 1.1278195488721805e-05, |
|
"loss": 0.0383, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 70.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9034760594367981, |
|
"eval_runtime": 0.9985, |
|
"eval_samples_per_second": 60.088, |
|
"eval_steps_per_second": 2.003, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9360395073890686, |
|
"eval_runtime": 1.0923, |
|
"eval_samples_per_second": 54.931, |
|
"eval_steps_per_second": 1.831, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 72.26, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.0295, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 72.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9840652346611023, |
|
"eval_runtime": 1.1084, |
|
"eval_samples_per_second": 54.132, |
|
"eval_steps_per_second": 1.804, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 73.55, |
|
"learning_rate": 9.774436090225564e-06, |
|
"loss": 0.0307, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 73.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9300164580345154, |
|
"eval_runtime": 1.152, |
|
"eval_samples_per_second": 52.082, |
|
"eval_steps_per_second": 1.736, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 74.84, |
|
"learning_rate": 9.022556390977444e-06, |
|
"loss": 0.0376, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 74.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9629533290863037, |
|
"eval_runtime": 1.3504, |
|
"eval_samples_per_second": 44.432, |
|
"eval_steps_per_second": 1.481, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.977742612361908, |
|
"eval_runtime": 1.044, |
|
"eval_samples_per_second": 57.469, |
|
"eval_steps_per_second": 1.916, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 76.13, |
|
"learning_rate": 8.270676691729324e-06, |
|
"loss": 0.0259, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 76.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9322593808174133, |
|
"eval_runtime": 1.0858, |
|
"eval_samples_per_second": 55.256, |
|
"eval_steps_per_second": 1.842, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 77.42, |
|
"learning_rate": 7.518796992481203e-06, |
|
"loss": 0.0345, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 77.94, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9075172543525696, |
|
"eval_runtime": 1.1281, |
|
"eval_samples_per_second": 53.185, |
|
"eval_steps_per_second": 1.773, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 78.71, |
|
"learning_rate": 6.766917293233083e-06, |
|
"loss": 0.0346, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 78.97, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.8950561881065369, |
|
"eval_runtime": 0.9932, |
|
"eval_samples_per_second": 60.412, |
|
"eval_steps_per_second": 2.014, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 6.015037593984962e-06, |
|
"loss": 0.0319, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.9676039814949036, |
|
"eval_runtime": 0.9844, |
|
"eval_samples_per_second": 60.95, |
|
"eval_steps_per_second": 2.032, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 80.9, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.988412618637085, |
|
"eval_runtime": 0.9973, |
|
"eval_samples_per_second": 60.161, |
|
"eval_steps_per_second": 2.005, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 81.29, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.0226, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 81.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9850680232048035, |
|
"eval_runtime": 1.0518, |
|
"eval_samples_per_second": 57.046, |
|
"eval_steps_per_second": 1.902, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 82.58, |
|
"learning_rate": 4.511278195488722e-06, |
|
"loss": 0.033, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 82.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9710198640823364, |
|
"eval_runtime": 1.0106, |
|
"eval_samples_per_second": 59.371, |
|
"eval_steps_per_second": 1.979, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 83.87, |
|
"learning_rate": 3.7593984962406014e-06, |
|
"loss": 0.0262, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.985089123249054, |
|
"eval_runtime": 1.0015, |
|
"eval_samples_per_second": 59.912, |
|
"eval_steps_per_second": 1.997, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 84.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9868298172950745, |
|
"eval_runtime": 1.0224, |
|
"eval_samples_per_second": 58.683, |
|
"eval_steps_per_second": 1.956, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 85.16, |
|
"learning_rate": 3.007518796992481e-06, |
|
"loss": 0.0345, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 85.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9702271819114685, |
|
"eval_runtime": 1.0333, |
|
"eval_samples_per_second": 58.065, |
|
"eval_steps_per_second": 1.936, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 86.45, |
|
"learning_rate": 2.255639097744361e-06, |
|
"loss": 0.0299, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 86.97, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9888620376586914, |
|
"eval_runtime": 0.9649, |
|
"eval_samples_per_second": 62.184, |
|
"eval_steps_per_second": 2.073, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"learning_rate": 1.5037593984962406e-06, |
|
"loss": 0.0347, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 1.000300645828247, |
|
"eval_runtime": 1.0185, |
|
"eval_samples_per_second": 58.91, |
|
"eval_steps_per_second": 1.964, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 88.9, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9913238286972046, |
|
"eval_runtime": 1.0979, |
|
"eval_samples_per_second": 54.647, |
|
"eval_steps_per_second": 1.822, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 89.03, |
|
"learning_rate": 7.518796992481203e-07, |
|
"loss": 0.0288, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 89.94, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9858866333961487, |
|
"eval_runtime": 1.063, |
|
"eval_samples_per_second": 56.445, |
|
"eval_steps_per_second": 1.882, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 90.32, |
|
"learning_rate": 0.0, |
|
"loss": 0.0198, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 90.32, |
|
"eval_accuracy": 0.7833333333333333, |
|
"eval_loss": 0.9857978820800781, |
|
"eval_runtime": 1.01, |
|
"eval_samples_per_second": 59.406, |
|
"eval_steps_per_second": 1.98, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 90.32, |
|
"step": 700, |
|
"total_flos": 6.817867456880148e+18, |
|
"train_loss": 0.19939448043704033, |
|
"train_runtime": 1530.1698, |
|
"train_samples_per_second": 63.653, |
|
"train_steps_per_second": 0.457 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 6.817867456880148e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|