{ "best_metric": 0.8666666666666667, "best_model_checkpoint": "vit-base-patch16-224-dmae-va-U5-100-iN\\checkpoint-85", "epoch": 90.3225806451613, "eval_steps": 500, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_accuracy": 0.45, "eval_loss": 1.3812448978424072, "eval_runtime": 0.9939, "eval_samples_per_second": 60.369, "eval_steps_per_second": 2.012, "step": 7 }, { "epoch": 1.29, "learning_rate": 1.4285714285714285e-05, "loss": 1.3848, "step": 10 }, { "epoch": 1.94, "eval_accuracy": 0.5, "eval_loss": 1.3605996370315552, "eval_runtime": 1.0777, "eval_samples_per_second": 55.675, "eval_steps_per_second": 1.856, "step": 15 }, { "epoch": 2.58, "learning_rate": 2.857142857142857e-05, "loss": 1.3686, "step": 20 }, { "epoch": 2.97, "eval_accuracy": 0.5333333333333333, "eval_loss": 1.3074811697006226, "eval_runtime": 1.0069, "eval_samples_per_second": 59.587, "eval_steps_per_second": 1.986, "step": 23 }, { "epoch": 3.87, "learning_rate": 4.2857142857142856e-05, "loss": 1.2965, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.2370479106903076, "eval_runtime": 1.023, "eval_samples_per_second": 58.649, "eval_steps_per_second": 1.955, "step": 31 }, { "epoch": 4.9, "eval_accuracy": 0.5333333333333333, "eval_loss": 1.1167830228805542, "eval_runtime": 1.0537, "eval_samples_per_second": 56.944, "eval_steps_per_second": 1.898, "step": 38 }, { "epoch": 5.16, "learning_rate": 4.9624060150375936e-05, "loss": 1.1753, "step": 40 }, { "epoch": 5.94, "eval_accuracy": 0.5666666666666667, "eval_loss": 1.0309817790985107, "eval_runtime": 0.9955, "eval_samples_per_second": 60.27, "eval_steps_per_second": 2.009, "step": 46 }, { "epoch": 6.45, "learning_rate": 4.887218045112782e-05, "loss": 1.0294, "step": 50 }, { "epoch": 6.97, "eval_accuracy": 0.6, "eval_loss": 0.9315768480300903, "eval_runtime": 1.0112, "eval_samples_per_second": 59.337, "eval_steps_per_second": 1.978, "step": 54 }, { "epoch": 7.74, "learning_rate": 4.81203007518797e-05, "loss": 0.902, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.8727719187736511, "eval_runtime": 1.0259, "eval_samples_per_second": 58.484, "eval_steps_per_second": 1.949, "step": 62 }, { "epoch": 8.9, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.8128864765167236, "eval_runtime": 1.0791, "eval_samples_per_second": 55.601, "eval_steps_per_second": 1.853, "step": 69 }, { "epoch": 9.03, "learning_rate": 4.736842105263158e-05, "loss": 0.7812, "step": 70 }, { "epoch": 9.94, "eval_accuracy": 0.8, "eval_loss": 0.7006206512451172, "eval_runtime": 1.0876, "eval_samples_per_second": 55.167, "eval_steps_per_second": 1.839, "step": 77 }, { "epoch": 10.32, "learning_rate": 4.6616541353383456e-05, "loss": 0.6419, "step": 80 }, { "epoch": 10.97, "eval_accuracy": 0.8666666666666667, "eval_loss": 0.6381439566612244, "eval_runtime": 1.0437, "eval_samples_per_second": 57.486, "eval_steps_per_second": 1.916, "step": 85 }, { "epoch": 11.61, "learning_rate": 4.586466165413534e-05, "loss": 0.5109, "step": 90 }, { "epoch": 12.0, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.6327125430107117, "eval_runtime": 1.0397, "eval_samples_per_second": 57.707, "eval_steps_per_second": 1.924, "step": 93 }, { "epoch": 12.9, "learning_rate": 4.511278195488722e-05, "loss": 0.3838, "step": 100 }, { "epoch": 12.9, "eval_accuracy": 0.8666666666666667, "eval_loss": 0.5441552996635437, "eval_runtime": 1.0509, "eval_samples_per_second": 57.092, "eval_steps_per_second": 1.903, "step": 100 }, { "epoch": 13.94, "eval_accuracy": 0.75, "eval_loss": 0.6755495667457581, "eval_runtime": 1.041, "eval_samples_per_second": 57.635, "eval_steps_per_second": 1.921, "step": 108 }, { "epoch": 14.19, "learning_rate": 4.43609022556391e-05, "loss": 0.285, "step": 110 }, { "epoch": 14.97, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7755728960037231, "eval_runtime": 1.0199, "eval_samples_per_second": 58.828, "eval_steps_per_second": 1.961, "step": 116 }, { "epoch": 15.48, "learning_rate": 4.3609022556390975e-05, "loss": 0.2672, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.8106668591499329, "eval_runtime": 1.0429, "eval_samples_per_second": 57.531, "eval_steps_per_second": 1.918, "step": 124 }, { "epoch": 16.77, "learning_rate": 4.2857142857142856e-05, "loss": 0.2466, "step": 130 }, { "epoch": 16.9, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.5219495892524719, "eval_runtime": 1.0179, "eval_samples_per_second": 58.943, "eval_steps_per_second": 1.965, "step": 131 }, { "epoch": 17.94, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.7040973901748657, "eval_runtime": 1.0682, "eval_samples_per_second": 56.169, "eval_steps_per_second": 1.872, "step": 139 }, { "epoch": 18.06, "learning_rate": 4.210526315789474e-05, "loss": 0.2312, "step": 140 }, { "epoch": 18.97, "eval_accuracy": 0.75, "eval_loss": 0.7878958582878113, "eval_runtime": 0.9838, "eval_samples_per_second": 60.989, "eval_steps_per_second": 2.033, "step": 147 }, { "epoch": 19.35, "learning_rate": 4.135338345864662e-05, "loss": 0.1933, "step": 150 }, { "epoch": 20.0, "eval_accuracy": 0.8, "eval_loss": 0.709021806716919, "eval_runtime": 1.0518, "eval_samples_per_second": 57.046, "eval_steps_per_second": 1.902, "step": 155 }, { "epoch": 20.65, "learning_rate": 4.0601503759398494e-05, "loss": 0.1692, "step": 160 }, { "epoch": 20.9, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.5394979119300842, "eval_runtime": 1.0183, "eval_samples_per_second": 58.921, "eval_steps_per_second": 1.964, "step": 162 }, { "epoch": 21.94, "learning_rate": 3.9849624060150376e-05, "loss": 0.1578, "step": 170 }, { "epoch": 21.94, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.6418856382369995, "eval_runtime": 1.0177, "eval_samples_per_second": 58.959, "eval_steps_per_second": 1.965, "step": 170 }, { "epoch": 22.97, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.5736449956893921, "eval_runtime": 1.0279, "eval_samples_per_second": 58.369, "eval_steps_per_second": 1.946, "step": 178 }, { "epoch": 23.23, "learning_rate": 3.909774436090226e-05, "loss": 0.1321, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.75, "eval_loss": 0.747129499912262, "eval_runtime": 1.021, "eval_samples_per_second": 58.768, "eval_steps_per_second": 1.959, "step": 186 }, { "epoch": 24.52, "learning_rate": 3.834586466165413e-05, "loss": 0.1114, "step": 190 }, { "epoch": 24.9, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.6447189450263977, "eval_runtime": 1.0464, "eval_samples_per_second": 57.341, "eval_steps_per_second": 1.911, "step": 193 }, { "epoch": 25.81, "learning_rate": 3.759398496240601e-05, "loss": 0.1385, "step": 200 }, { "epoch": 25.94, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.61579829454422, "eval_runtime": 0.9804, "eval_samples_per_second": 61.197, "eval_steps_per_second": 2.04, "step": 201 }, { "epoch": 26.97, "eval_accuracy": 0.8, "eval_loss": 0.6467480063438416, "eval_runtime": 1.0425, "eval_samples_per_second": 57.556, "eval_steps_per_second": 1.919, "step": 209 }, { "epoch": 27.1, "learning_rate": 3.6842105263157895e-05, "loss": 0.1136, "step": 210 }, { "epoch": 28.0, "eval_accuracy": 0.85, "eval_loss": 0.6180290579795837, "eval_runtime": 1.0892, "eval_samples_per_second": 55.084, "eval_steps_per_second": 1.836, "step": 217 }, { "epoch": 28.39, "learning_rate": 3.6090225563909776e-05, "loss": 0.0997, "step": 220 }, { "epoch": 28.9, "eval_accuracy": 0.75, "eval_loss": 0.8578386902809143, "eval_runtime": 0.9851, "eval_samples_per_second": 60.908, "eval_steps_per_second": 2.03, "step": 224 }, { "epoch": 29.68, "learning_rate": 3.533834586466165e-05, "loss": 0.1064, "step": 230 }, { "epoch": 29.94, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.6778053641319275, "eval_runtime": 0.9651, "eval_samples_per_second": 62.17, "eval_steps_per_second": 2.072, "step": 232 }, { "epoch": 30.97, "learning_rate": 3.458646616541353e-05, "loss": 0.0775, "step": 240 }, { "epoch": 30.97, "eval_accuracy": 0.8, "eval_loss": 0.8123681545257568, "eval_runtime": 0.9886, "eval_samples_per_second": 60.693, "eval_steps_per_second": 2.023, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.8, "eval_loss": 0.7782633900642395, "eval_runtime": 1.0238, "eval_samples_per_second": 58.605, "eval_steps_per_second": 1.953, "step": 248 }, { "epoch": 32.26, "learning_rate": 3.3834586466165414e-05, "loss": 0.0921, "step": 250 }, { "epoch": 32.9, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.8320418000221252, "eval_runtime": 1.0125, "eval_samples_per_second": 59.258, "eval_steps_per_second": 1.975, "step": 255 }, { "epoch": 33.55, "learning_rate": 3.3082706766917295e-05, "loss": 0.0919, "step": 260 }, { "epoch": 33.94, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.8309503197669983, "eval_runtime": 0.9631, "eval_samples_per_second": 62.3, "eval_steps_per_second": 2.077, "step": 263 }, { "epoch": 34.84, "learning_rate": 3.233082706766917e-05, "loss": 0.0888, "step": 270 }, { "epoch": 34.97, "eval_accuracy": 0.85, "eval_loss": 0.6575559377670288, "eval_runtime": 0.9897, "eval_samples_per_second": 60.621, "eval_steps_per_second": 2.021, "step": 271 }, { "epoch": 36.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.7043873071670532, "eval_runtime": 0.9919, "eval_samples_per_second": 60.489, "eval_steps_per_second": 2.016, "step": 279 }, { "epoch": 36.13, "learning_rate": 3.157894736842105e-05, "loss": 0.0693, "step": 280 }, { "epoch": 36.9, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.7607947587966919, "eval_runtime": 0.9426, "eval_samples_per_second": 63.653, "eval_steps_per_second": 2.122, "step": 286 }, { "epoch": 37.42, "learning_rate": 3.082706766917293e-05, "loss": 0.061, "step": 290 }, { "epoch": 37.94, "eval_accuracy": 0.8, "eval_loss": 0.7802255153656006, "eval_runtime": 0.9812, "eval_samples_per_second": 61.149, "eval_steps_per_second": 2.038, "step": 294 }, { "epoch": 38.71, "learning_rate": 3.007518796992481e-05, "loss": 0.0699, "step": 300 }, { "epoch": 38.97, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.7761934995651245, "eval_runtime": 0.9881, "eval_samples_per_second": 60.722, "eval_steps_per_second": 2.024, "step": 302 }, { "epoch": 40.0, "learning_rate": 2.9323308270676693e-05, "loss": 0.0652, "step": 310 }, { "epoch": 40.0, "eval_accuracy": 0.8, "eval_loss": 0.7579043507575989, "eval_runtime": 1.0374, "eval_samples_per_second": 57.837, "eval_steps_per_second": 1.928, "step": 310 }, { "epoch": 40.9, "eval_accuracy": 0.75, "eval_loss": 0.998461902141571, "eval_runtime": 0.9702, "eval_samples_per_second": 61.842, "eval_steps_per_second": 2.061, "step": 317 }, { "epoch": 41.29, "learning_rate": 2.857142857142857e-05, "loss": 0.0562, "step": 320 }, { "epoch": 41.94, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8027188777923584, "eval_runtime": 1.0453, "eval_samples_per_second": 57.397, "eval_steps_per_second": 1.913, "step": 325 }, { "epoch": 42.58, "learning_rate": 2.7819548872180452e-05, "loss": 0.0534, "step": 330 }, { "epoch": 42.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9704654812812805, "eval_runtime": 1.0187, "eval_samples_per_second": 58.899, "eval_steps_per_second": 1.963, "step": 333 }, { "epoch": 43.87, "learning_rate": 2.706766917293233e-05, "loss": 0.0519, "step": 340 }, { "epoch": 44.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.7301105260848999, "eval_runtime": 0.998, "eval_samples_per_second": 60.123, "eval_steps_per_second": 2.004, "step": 341 }, { "epoch": 44.9, "eval_accuracy": 0.8, "eval_loss": 0.8432899713516235, "eval_runtime": 1.0546, "eval_samples_per_second": 56.895, "eval_steps_per_second": 1.897, "step": 348 }, { "epoch": 45.16, "learning_rate": 2.6315789473684212e-05, "loss": 0.0529, "step": 350 }, { "epoch": 45.94, "eval_accuracy": 0.8, "eval_loss": 0.8533557653427124, "eval_runtime": 1.0201, "eval_samples_per_second": 58.815, "eval_steps_per_second": 1.961, "step": 356 }, { "epoch": 46.45, "learning_rate": 2.556390977443609e-05, "loss": 0.0772, "step": 360 }, { "epoch": 46.97, "eval_accuracy": 0.8, "eval_loss": 0.8561839461326599, "eval_runtime": 1.0725, "eval_samples_per_second": 55.942, "eval_steps_per_second": 1.865, "step": 364 }, { "epoch": 47.74, "learning_rate": 2.4812030075187968e-05, "loss": 0.0644, "step": 370 }, { "epoch": 48.0, "eval_accuracy": 0.8, "eval_loss": 0.8419451117515564, "eval_runtime": 1.0154, "eval_samples_per_second": 59.091, "eval_steps_per_second": 1.97, "step": 372 }, { "epoch": 48.9, "eval_accuracy": 0.7666666666666667, "eval_loss": 1.125100016593933, "eval_runtime": 1.3342, "eval_samples_per_second": 44.969, "eval_steps_per_second": 1.499, "step": 379 }, { "epoch": 49.03, "learning_rate": 2.406015037593985e-05, "loss": 0.0467, "step": 380 }, { "epoch": 49.94, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.7536706924438477, "eval_runtime": 1.6625, "eval_samples_per_second": 36.09, "eval_steps_per_second": 1.203, "step": 387 }, { "epoch": 50.32, "learning_rate": 2.3308270676691728e-05, "loss": 0.0576, "step": 390 }, { "epoch": 50.97, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.7517222166061401, "eval_runtime": 1.0574, "eval_samples_per_second": 56.741, "eval_steps_per_second": 1.891, "step": 395 }, { "epoch": 51.61, "learning_rate": 2.255639097744361e-05, "loss": 0.0344, "step": 400 }, { "epoch": 52.0, "eval_accuracy": 0.8, "eval_loss": 0.8343304395675659, "eval_runtime": 1.0509, "eval_samples_per_second": 57.096, "eval_steps_per_second": 1.903, "step": 403 }, { "epoch": 52.9, "learning_rate": 2.1804511278195487e-05, "loss": 0.0663, "step": 410 }, { "epoch": 52.9, "eval_accuracy": 0.8, "eval_loss": 0.7636159062385559, "eval_runtime": 0.952, "eval_samples_per_second": 63.026, "eval_steps_per_second": 2.101, "step": 410 }, { "epoch": 53.94, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8253324031829834, "eval_runtime": 0.9847, "eval_samples_per_second": 60.932, "eval_steps_per_second": 2.031, "step": 418 }, { "epoch": 54.19, "learning_rate": 2.105263157894737e-05, "loss": 0.0353, "step": 420 }, { "epoch": 54.97, "eval_accuracy": 0.8, "eval_loss": 0.934796154499054, "eval_runtime": 1.0691, "eval_samples_per_second": 56.12, "eval_steps_per_second": 1.871, "step": 426 }, { "epoch": 55.48, "learning_rate": 2.0300751879699247e-05, "loss": 0.0524, "step": 430 }, { "epoch": 56.0, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.8216551542282104, "eval_runtime": 0.9842, "eval_samples_per_second": 60.966, "eval_steps_per_second": 2.032, "step": 434 }, { "epoch": 56.77, "learning_rate": 1.954887218045113e-05, "loss": 0.0479, "step": 440 }, { "epoch": 56.9, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.758594810962677, "eval_runtime": 1.0267, "eval_samples_per_second": 58.439, "eval_steps_per_second": 1.948, "step": 441 }, { "epoch": 57.94, "eval_accuracy": 0.8, "eval_loss": 0.8146753311157227, "eval_runtime": 0.965, "eval_samples_per_second": 62.179, "eval_steps_per_second": 2.073, "step": 449 }, { "epoch": 58.06, "learning_rate": 1.8796992481203007e-05, "loss": 0.0595, "step": 450 }, { "epoch": 58.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9999889135360718, "eval_runtime": 1.0193, "eval_samples_per_second": 58.862, "eval_steps_per_second": 1.962, "step": 457 }, { "epoch": 59.35, "learning_rate": 1.8045112781954888e-05, "loss": 0.0475, "step": 460 }, { "epoch": 60.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9291054010391235, "eval_runtime": 0.9846, "eval_samples_per_second": 60.941, "eval_steps_per_second": 2.031, "step": 465 }, { "epoch": 60.65, "learning_rate": 1.7293233082706766e-05, "loss": 0.049, "step": 470 }, { "epoch": 60.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9588444828987122, "eval_runtime": 1.0072, "eval_samples_per_second": 59.57, "eval_steps_per_second": 1.986, "step": 472 }, { "epoch": 61.94, "learning_rate": 1.6541353383458648e-05, "loss": 0.0398, "step": 480 }, { "epoch": 61.94, "eval_accuracy": 0.8, "eval_loss": 0.9500763416290283, "eval_runtime": 1.1967, "eval_samples_per_second": 50.137, "eval_steps_per_second": 1.671, "step": 480 }, { "epoch": 62.97, "eval_accuracy": 0.8, "eval_loss": 0.9498623013496399, "eval_runtime": 1.0197, "eval_samples_per_second": 58.839, "eval_steps_per_second": 1.961, "step": 488 }, { "epoch": 63.23, "learning_rate": 1.5789473684210526e-05, "loss": 0.0496, "step": 490 }, { "epoch": 64.0, "eval_accuracy": 0.8, "eval_loss": 0.9279070496559143, "eval_runtime": 1.006, "eval_samples_per_second": 59.643, "eval_steps_per_second": 1.988, "step": 496 }, { "epoch": 64.52, "learning_rate": 1.5037593984962406e-05, "loss": 0.0354, "step": 500 }, { "epoch": 64.9, "eval_accuracy": 0.75, "eval_loss": 0.9677076935768127, "eval_runtime": 0.9989, "eval_samples_per_second": 60.065, "eval_steps_per_second": 2.002, "step": 503 }, { "epoch": 65.81, "learning_rate": 1.4285714285714285e-05, "loss": 0.0325, "step": 510 }, { "epoch": 65.94, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.8371397852897644, "eval_runtime": 0.9656, "eval_samples_per_second": 62.134, "eval_steps_per_second": 2.071, "step": 511 }, { "epoch": 66.97, "eval_accuracy": 0.8, "eval_loss": 0.9682708978652954, "eval_runtime": 0.956, "eval_samples_per_second": 62.76, "eval_steps_per_second": 2.092, "step": 519 }, { "epoch": 67.1, "learning_rate": 1.3533834586466165e-05, "loss": 0.0335, "step": 520 }, { "epoch": 68.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 1.0455142259597778, "eval_runtime": 0.9995, "eval_samples_per_second": 60.033, "eval_steps_per_second": 2.001, "step": 527 }, { "epoch": 68.39, "learning_rate": 1.2781954887218045e-05, "loss": 0.0375, "step": 530 }, { "epoch": 68.9, "eval_accuracy": 0.8166666666666667, "eval_loss": 0.9026990532875061, "eval_runtime": 1.016, "eval_samples_per_second": 59.058, "eval_steps_per_second": 1.969, "step": 534 }, { "epoch": 69.68, "learning_rate": 1.2030075187969925e-05, "loss": 0.0424, "step": 540 }, { "epoch": 69.94, "eval_accuracy": 0.85, "eval_loss": 0.8043019771575928, "eval_runtime": 0.9715, "eval_samples_per_second": 61.76, "eval_steps_per_second": 2.059, "step": 542 }, { "epoch": 70.97, "learning_rate": 1.1278195488721805e-05, "loss": 0.0383, "step": 550 }, { "epoch": 70.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9034760594367981, "eval_runtime": 0.9985, "eval_samples_per_second": 60.088, "eval_steps_per_second": 2.003, "step": 550 }, { "epoch": 72.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9360395073890686, "eval_runtime": 1.0923, "eval_samples_per_second": 54.931, "eval_steps_per_second": 1.831, "step": 558 }, { "epoch": 72.26, "learning_rate": 1.0526315789473684e-05, "loss": 0.0295, "step": 560 }, { "epoch": 72.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9840652346611023, "eval_runtime": 1.1084, "eval_samples_per_second": 54.132, "eval_steps_per_second": 1.804, "step": 565 }, { "epoch": 73.55, "learning_rate": 9.774436090225564e-06, "loss": 0.0307, "step": 570 }, { "epoch": 73.94, "eval_accuracy": 0.8, "eval_loss": 0.9300164580345154, "eval_runtime": 1.152, "eval_samples_per_second": 52.082, "eval_steps_per_second": 1.736, "step": 573 }, { "epoch": 74.84, "learning_rate": 9.022556390977444e-06, "loss": 0.0376, "step": 580 }, { "epoch": 74.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9629533290863037, "eval_runtime": 1.3504, "eval_samples_per_second": 44.432, "eval_steps_per_second": 1.481, "step": 581 }, { "epoch": 76.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.977742612361908, "eval_runtime": 1.044, "eval_samples_per_second": 57.469, "eval_steps_per_second": 1.916, "step": 589 }, { "epoch": 76.13, "learning_rate": 8.270676691729324e-06, "loss": 0.0259, "step": 590 }, { "epoch": 76.9, "eval_accuracy": 0.8, "eval_loss": 0.9322593808174133, "eval_runtime": 1.0858, "eval_samples_per_second": 55.256, "eval_steps_per_second": 1.842, "step": 596 }, { "epoch": 77.42, "learning_rate": 7.518796992481203e-06, "loss": 0.0345, "step": 600 }, { "epoch": 77.94, "eval_accuracy": 0.8, "eval_loss": 0.9075172543525696, "eval_runtime": 1.1281, "eval_samples_per_second": 53.185, "eval_steps_per_second": 1.773, "step": 604 }, { "epoch": 78.71, "learning_rate": 6.766917293233083e-06, "loss": 0.0346, "step": 610 }, { "epoch": 78.97, "eval_accuracy": 0.8, "eval_loss": 0.8950561881065369, "eval_runtime": 0.9932, "eval_samples_per_second": 60.412, "eval_steps_per_second": 2.014, "step": 612 }, { "epoch": 80.0, "learning_rate": 6.015037593984962e-06, "loss": 0.0319, "step": 620 }, { "epoch": 80.0, "eval_accuracy": 0.8, "eval_loss": 0.9676039814949036, "eval_runtime": 0.9844, "eval_samples_per_second": 60.95, "eval_steps_per_second": 2.032, "step": 620 }, { "epoch": 80.9, "eval_accuracy": 0.8, "eval_loss": 0.988412618637085, "eval_runtime": 0.9973, "eval_samples_per_second": 60.161, "eval_steps_per_second": 2.005, "step": 627 }, { "epoch": 81.29, "learning_rate": 5.263157894736842e-06, "loss": 0.0226, "step": 630 }, { "epoch": 81.94, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9850680232048035, "eval_runtime": 1.0518, "eval_samples_per_second": 57.046, "eval_steps_per_second": 1.902, "step": 635 }, { "epoch": 82.58, "learning_rate": 4.511278195488722e-06, "loss": 0.033, "step": 640 }, { "epoch": 82.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9710198640823364, "eval_runtime": 1.0106, "eval_samples_per_second": 59.371, "eval_steps_per_second": 1.979, "step": 643 }, { "epoch": 83.87, "learning_rate": 3.7593984962406014e-06, "loss": 0.0262, "step": 650 }, { "epoch": 84.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.985089123249054, "eval_runtime": 1.0015, "eval_samples_per_second": 59.912, "eval_steps_per_second": 1.997, "step": 651 }, { "epoch": 84.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9868298172950745, "eval_runtime": 1.0224, "eval_samples_per_second": 58.683, "eval_steps_per_second": 1.956, "step": 658 }, { "epoch": 85.16, "learning_rate": 3.007518796992481e-06, "loss": 0.0345, "step": 660 }, { "epoch": 85.94, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9702271819114685, "eval_runtime": 1.0333, "eval_samples_per_second": 58.065, "eval_steps_per_second": 1.936, "step": 666 }, { "epoch": 86.45, "learning_rate": 2.255639097744361e-06, "loss": 0.0299, "step": 670 }, { "epoch": 86.97, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9888620376586914, "eval_runtime": 0.9649, "eval_samples_per_second": 62.184, "eval_steps_per_second": 2.073, "step": 674 }, { "epoch": 87.74, "learning_rate": 1.5037593984962406e-06, "loss": 0.0347, "step": 680 }, { "epoch": 88.0, "eval_accuracy": 0.7833333333333333, "eval_loss": 1.000300645828247, "eval_runtime": 1.0185, "eval_samples_per_second": 58.91, "eval_steps_per_second": 1.964, "step": 682 }, { "epoch": 88.9, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9913238286972046, "eval_runtime": 1.0979, "eval_samples_per_second": 54.647, "eval_steps_per_second": 1.822, "step": 689 }, { "epoch": 89.03, "learning_rate": 7.518796992481203e-07, "loss": 0.0288, "step": 690 }, { "epoch": 89.94, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9858866333961487, "eval_runtime": 1.063, "eval_samples_per_second": 56.445, "eval_steps_per_second": 1.882, "step": 697 }, { "epoch": 90.32, "learning_rate": 0.0, "loss": 0.0198, "step": 700 }, { "epoch": 90.32, "eval_accuracy": 0.7833333333333333, "eval_loss": 0.9857978820800781, "eval_runtime": 1.01, "eval_samples_per_second": 59.406, "eval_steps_per_second": 1.98, "step": 700 }, { "epoch": 90.32, "step": 700, "total_flos": 6.817867456880148e+18, "train_loss": 0.19939448043704033, "train_runtime": 1530.1698, "train_samples_per_second": 63.653, "train_steps_per_second": 0.457 } ], "logging_steps": 10, "max_steps": 700, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 6.817867456880148e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }