{ "best_metric": 0.782608695652174, "best_model_checkpoint": "swin-tiny-patch4-window7-224-ve-U13-b-80\\checkpoint-240", "epoch": 73.84615384615384, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.92, "eval_accuracy": 0.13043478260869565, "eval_loss": 1.385940432548523, "eval_runtime": 0.6339, "eval_samples_per_second": 72.571, "eval_steps_per_second": 3.155, "step": 6 }, { "epoch": 1.54, "learning_rate": 1.0416666666666668e-05, "loss": 1.3859, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.2826086956521739, "eval_loss": 1.382836103439331, "eval_runtime": 0.653, "eval_samples_per_second": 70.443, "eval_steps_per_second": 3.063, "step": 13 }, { "epoch": 2.92, "eval_accuracy": 0.32608695652173914, "eval_loss": 1.3768701553344727, "eval_runtime": 0.65, "eval_samples_per_second": 70.768, "eval_steps_per_second": 3.077, "step": 19 }, { "epoch": 3.08, "learning_rate": 2.0833333333333336e-05, "loss": 1.379, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.2826086956521739, "eval_loss": 1.3565824031829834, "eval_runtime": 0.6532, "eval_samples_per_second": 70.417, "eval_steps_per_second": 3.062, "step": 26 }, { "epoch": 4.62, "learning_rate": 3.125e-05, "loss": 1.3356, "step": 30 }, { "epoch": 4.92, "eval_accuracy": 0.2391304347826087, "eval_loss": 1.316235065460205, "eval_runtime": 0.6545, "eval_samples_per_second": 70.281, "eval_steps_per_second": 3.056, "step": 32 }, { "epoch": 6.0, "eval_accuracy": 0.3695652173913043, "eval_loss": 1.2090449333190918, "eval_runtime": 0.6376, "eval_samples_per_second": 72.151, "eval_steps_per_second": 3.137, "step": 39 }, { "epoch": 6.15, "learning_rate": 4.166666666666667e-05, "loss": 1.2023, "step": 40 }, { "epoch": 6.92, "eval_accuracy": 0.41304347826086957, "eval_loss": 1.140857458114624, "eval_runtime": 0.6476, "eval_samples_per_second": 71.035, "eval_steps_per_second": 3.088, "step": 45 }, { "epoch": 7.69, "learning_rate": 4.976851851851852e-05, "loss": 1.0289, "step": 50 }, { "epoch": 8.0, "eval_accuracy": 0.45652173913043476, "eval_loss": 1.0442323684692383, "eval_runtime": 0.6862, "eval_samples_per_second": 67.038, "eval_steps_per_second": 2.915, "step": 52 }, { "epoch": 8.92, "eval_accuracy": 0.5217391304347826, "eval_loss": 0.9695850610733032, "eval_runtime": 0.6564, "eval_samples_per_second": 70.079, "eval_steps_per_second": 3.047, "step": 58 }, { "epoch": 9.23, "learning_rate": 4.8611111111111115e-05, "loss": 0.9132, "step": 60 }, { "epoch": 10.0, "eval_accuracy": 0.43478260869565216, "eval_loss": 1.0133169889450073, "eval_runtime": 0.6439, "eval_samples_per_second": 71.444, "eval_steps_per_second": 3.106, "step": 65 }, { "epoch": 10.77, "learning_rate": 4.745370370370371e-05, "loss": 0.7677, "step": 70 }, { "epoch": 10.92, "eval_accuracy": 0.5, "eval_loss": 1.0143684148788452, "eval_runtime": 0.6229, "eval_samples_per_second": 73.85, "eval_steps_per_second": 3.211, "step": 71 }, { "epoch": 12.0, "eval_accuracy": 0.34782608695652173, "eval_loss": 1.137718915939331, "eval_runtime": 0.6502, "eval_samples_per_second": 70.745, "eval_steps_per_second": 3.076, "step": 78 }, { "epoch": 12.31, "learning_rate": 4.62962962962963e-05, "loss": 0.6988, "step": 80 }, { "epoch": 12.92, "eval_accuracy": 0.5869565217391305, "eval_loss": 0.817061722278595, "eval_runtime": 0.6531, "eval_samples_per_second": 70.438, "eval_steps_per_second": 3.063, "step": 84 }, { "epoch": 13.85, "learning_rate": 4.5138888888888894e-05, "loss": 0.6051, "step": 90 }, { "epoch": 14.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.8982558846473694, "eval_runtime": 0.6551, "eval_samples_per_second": 70.213, "eval_steps_per_second": 3.053, "step": 91 }, { "epoch": 14.92, "eval_accuracy": 0.6086956521739131, "eval_loss": 0.8593006134033203, "eval_runtime": 0.6479, "eval_samples_per_second": 71.003, "eval_steps_per_second": 3.087, "step": 97 }, { "epoch": 15.38, "learning_rate": 4.3981481481481486e-05, "loss": 0.5175, "step": 100 }, { "epoch": 16.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.818878173828125, "eval_runtime": 0.6394, "eval_samples_per_second": 71.94, "eval_steps_per_second": 3.128, "step": 104 }, { "epoch": 16.92, "learning_rate": 4.282407407407408e-05, "loss": 0.429, "step": 110 }, { "epoch": 16.92, "eval_accuracy": 0.717391304347826, "eval_loss": 0.6789580583572388, "eval_runtime": 0.6455, "eval_samples_per_second": 71.263, "eval_steps_per_second": 3.098, "step": 110 }, { "epoch": 18.0, "eval_accuracy": 0.6304347826086957, "eval_loss": 0.7073642015457153, "eval_runtime": 0.6678, "eval_samples_per_second": 68.88, "eval_steps_per_second": 2.995, "step": 117 }, { "epoch": 18.46, "learning_rate": 4.166666666666667e-05, "loss": 0.4349, "step": 120 }, { "epoch": 18.92, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.8890200257301331, "eval_runtime": 0.6748, "eval_samples_per_second": 68.17, "eval_steps_per_second": 2.964, "step": 123 }, { "epoch": 20.0, "learning_rate": 4.0509259259259265e-05, "loss": 0.3892, "step": 130 }, { "epoch": 20.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.979804515838623, "eval_runtime": 0.6717, "eval_samples_per_second": 68.481, "eval_steps_per_second": 2.977, "step": 130 }, { "epoch": 20.92, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.8814191222190857, "eval_runtime": 0.6613, "eval_samples_per_second": 69.559, "eval_steps_per_second": 3.024, "step": 136 }, { "epoch": 21.54, "learning_rate": 3.935185185185186e-05, "loss": 0.3613, "step": 140 }, { "epoch": 22.0, "eval_accuracy": 0.6521739130434783, "eval_loss": 0.8839537501335144, "eval_runtime": 0.6839, "eval_samples_per_second": 67.261, "eval_steps_per_second": 2.924, "step": 143 }, { "epoch": 22.92, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.7662139534950256, "eval_runtime": 0.6732, "eval_samples_per_second": 68.326, "eval_steps_per_second": 2.971, "step": 149 }, { "epoch": 23.08, "learning_rate": 3.8194444444444444e-05, "loss": 0.342, "step": 150 }, { "epoch": 24.0, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.7883625030517578, "eval_runtime": 0.6844, "eval_samples_per_second": 67.213, "eval_steps_per_second": 2.922, "step": 156 }, { "epoch": 24.62, "learning_rate": 3.7037037037037037e-05, "loss": 0.2762, "step": 160 }, { "epoch": 24.92, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.026755928993225, "eval_runtime": 0.6591, "eval_samples_per_second": 69.789, "eval_steps_per_second": 3.034, "step": 162 }, { "epoch": 26.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9206076264381409, "eval_runtime": 0.6851, "eval_samples_per_second": 67.14, "eval_steps_per_second": 2.919, "step": 169 }, { "epoch": 26.15, "learning_rate": 3.587962962962963e-05, "loss": 0.2759, "step": 170 }, { "epoch": 26.92, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9079513549804688, "eval_runtime": 0.6802, "eval_samples_per_second": 67.623, "eval_steps_per_second": 2.94, "step": 175 }, { "epoch": 27.69, "learning_rate": 3.472222222222222e-05, "loss": 0.2559, "step": 180 }, { "epoch": 28.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 0.9378835558891296, "eval_runtime": 0.6691, "eval_samples_per_second": 68.745, "eval_steps_per_second": 2.989, "step": 182 }, { "epoch": 28.92, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.0121444463729858, "eval_runtime": 0.6814, "eval_samples_per_second": 67.504, "eval_steps_per_second": 2.935, "step": 188 }, { "epoch": 29.23, "learning_rate": 3.3564814814814815e-05, "loss": 0.2455, "step": 190 }, { "epoch": 30.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.8251720070838928, "eval_runtime": 0.6693, "eval_samples_per_second": 68.731, "eval_steps_per_second": 2.988, "step": 195 }, { "epoch": 30.77, "learning_rate": 3.240740740740741e-05, "loss": 0.2125, "step": 200 }, { "epoch": 30.92, "eval_accuracy": 0.7608695652173914, "eval_loss": 0.8500754237174988, "eval_runtime": 0.693, "eval_samples_per_second": 66.38, "eval_steps_per_second": 2.886, "step": 201 }, { "epoch": 32.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.0364569425582886, "eval_runtime": 0.684, "eval_samples_per_second": 67.248, "eval_steps_per_second": 2.924, "step": 208 }, { "epoch": 32.31, "learning_rate": 3.125e-05, "loss": 0.2204, "step": 210 }, { "epoch": 32.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.0469846725463867, "eval_runtime": 0.6841, "eval_samples_per_second": 67.245, "eval_steps_per_second": 2.924, "step": 214 }, { "epoch": 33.85, "learning_rate": 3.0092592592592593e-05, "loss": 0.1785, "step": 220 }, { "epoch": 34.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.8834150433540344, "eval_runtime": 0.6551, "eval_samples_per_second": 70.218, "eval_steps_per_second": 3.053, "step": 221 }, { "epoch": 34.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.0779674053192139, "eval_runtime": 0.6643, "eval_samples_per_second": 69.243, "eval_steps_per_second": 3.011, "step": 227 }, { "epoch": 35.38, "learning_rate": 2.8935185185185186e-05, "loss": 0.1912, "step": 230 }, { "epoch": 36.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9327965378761292, "eval_runtime": 0.6863, "eval_samples_per_second": 67.022, "eval_steps_per_second": 2.914, "step": 234 }, { "epoch": 36.92, "learning_rate": 2.777777777777778e-05, "loss": 0.1518, "step": 240 }, { "epoch": 36.92, "eval_accuracy": 0.782608695652174, "eval_loss": 0.8900798559188843, "eval_runtime": 0.6842, "eval_samples_per_second": 67.236, "eval_steps_per_second": 2.923, "step": 240 }, { "epoch": 38.0, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.1069260835647583, "eval_runtime": 0.6697, "eval_samples_per_second": 68.692, "eval_steps_per_second": 2.987, "step": 247 }, { "epoch": 38.46, "learning_rate": 2.6620370370370372e-05, "loss": 0.166, "step": 250 }, { "epoch": 38.92, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9823449850082397, "eval_runtime": 0.6724, "eval_samples_per_second": 68.414, "eval_steps_per_second": 2.975, "step": 253 }, { "epoch": 40.0, "learning_rate": 2.5462962962962965e-05, "loss": 0.1775, "step": 260 }, { "epoch": 40.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9712538719177246, "eval_runtime": 0.6616, "eval_samples_per_second": 69.532, "eval_steps_per_second": 3.023, "step": 260 }, { "epoch": 40.92, "eval_accuracy": 0.717391304347826, "eval_loss": 0.972922146320343, "eval_runtime": 0.6677, "eval_samples_per_second": 68.892, "eval_steps_per_second": 2.995, "step": 266 }, { "epoch": 41.54, "learning_rate": 2.4305555555555558e-05, "loss": 0.1344, "step": 270 }, { "epoch": 42.0, "eval_accuracy": 0.717391304347826, "eval_loss": 0.9956928491592407, "eval_runtime": 0.6673, "eval_samples_per_second": 68.936, "eval_steps_per_second": 2.997, "step": 273 }, { "epoch": 42.92, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.0180084705352783, "eval_runtime": 0.6819, "eval_samples_per_second": 67.46, "eval_steps_per_second": 2.933, "step": 279 }, { "epoch": 43.08, "learning_rate": 2.314814814814815e-05, "loss": 0.1232, "step": 280 }, { "epoch": 44.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.9668875336647034, "eval_runtime": 0.6863, "eval_samples_per_second": 67.023, "eval_steps_per_second": 2.914, "step": 286 }, { "epoch": 44.62, "learning_rate": 2.1990740740740743e-05, "loss": 0.1267, "step": 290 }, { "epoch": 44.92, "eval_accuracy": 0.6956521739130435, "eval_loss": 0.9764741063117981, "eval_runtime": 0.6906, "eval_samples_per_second": 66.611, "eval_steps_per_second": 2.896, "step": 292 }, { "epoch": 46.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.0389167070388794, "eval_runtime": 0.8195, "eval_samples_per_second": 56.133, "eval_steps_per_second": 2.441, "step": 299 }, { "epoch": 46.15, "learning_rate": 2.0833333333333336e-05, "loss": 0.1548, "step": 300 }, { "epoch": 46.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.001569390296936, "eval_runtime": 0.6415, "eval_samples_per_second": 71.702, "eval_steps_per_second": 3.117, "step": 305 }, { "epoch": 47.69, "learning_rate": 1.967592592592593e-05, "loss": 0.1267, "step": 310 }, { "epoch": 48.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.156539797782898, "eval_runtime": 0.6692, "eval_samples_per_second": 68.739, "eval_steps_per_second": 2.989, "step": 312 }, { "epoch": 48.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1795543432235718, "eval_runtime": 0.7207, "eval_samples_per_second": 63.831, "eval_steps_per_second": 2.775, "step": 318 }, { "epoch": 49.23, "learning_rate": 1.8518518518518518e-05, "loss": 0.1403, "step": 320 }, { "epoch": 50.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.2806681394577026, "eval_runtime": 0.6946, "eval_samples_per_second": 66.226, "eval_steps_per_second": 2.879, "step": 325 }, { "epoch": 50.77, "learning_rate": 1.736111111111111e-05, "loss": 0.1096, "step": 330 }, { "epoch": 50.92, "eval_accuracy": 0.6739130434782609, "eval_loss": 1.2462576627731323, "eval_runtime": 0.7307, "eval_samples_per_second": 62.954, "eval_steps_per_second": 2.737, "step": 331 }, { "epoch": 52.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.145448088645935, "eval_runtime": 0.7031, "eval_samples_per_second": 65.426, "eval_steps_per_second": 2.845, "step": 338 }, { "epoch": 52.31, "learning_rate": 1.6203703703703704e-05, "loss": 0.1106, "step": 340 }, { "epoch": 52.92, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1494420766830444, "eval_runtime": 0.6634, "eval_samples_per_second": 69.34, "eval_steps_per_second": 3.015, "step": 344 }, { "epoch": 53.85, "learning_rate": 1.5046296296296297e-05, "loss": 0.1116, "step": 350 }, { "epoch": 54.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1299750804901123, "eval_runtime": 0.6843, "eval_samples_per_second": 67.217, "eval_steps_per_second": 2.922, "step": 351 }, { "epoch": 54.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.2097675800323486, "eval_runtime": 0.7032, "eval_samples_per_second": 65.42, "eval_steps_per_second": 2.844, "step": 357 }, { "epoch": 55.38, "learning_rate": 1.388888888888889e-05, "loss": 0.1229, "step": 360 }, { "epoch": 56.0, "eval_accuracy": 0.717391304347826, "eval_loss": 1.0590535402297974, "eval_runtime": 0.6998, "eval_samples_per_second": 65.732, "eval_steps_per_second": 2.858, "step": 364 }, { "epoch": 56.92, "learning_rate": 1.2731481481481482e-05, "loss": 0.1235, "step": 370 }, { "epoch": 56.92, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.1228772401809692, "eval_runtime": 0.6702, "eval_samples_per_second": 68.637, "eval_steps_per_second": 2.984, "step": 370 }, { "epoch": 58.0, "eval_accuracy": 0.717391304347826, "eval_loss": 1.2034395933151245, "eval_runtime": 0.6511, "eval_samples_per_second": 70.647, "eval_steps_per_second": 3.072, "step": 377 }, { "epoch": 58.46, "learning_rate": 1.1574074074074075e-05, "loss": 0.107, "step": 380 }, { "epoch": 58.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.0628197193145752, "eval_runtime": 0.6967, "eval_samples_per_second": 66.021, "eval_steps_per_second": 2.87, "step": 383 }, { "epoch": 60.0, "learning_rate": 1.0416666666666668e-05, "loss": 0.107, "step": 390 }, { "epoch": 60.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.0070008039474487, "eval_runtime": 0.6853, "eval_samples_per_second": 67.122, "eval_steps_per_second": 2.918, "step": 390 }, { "epoch": 60.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.16019868850708, "eval_runtime": 0.6966, "eval_samples_per_second": 66.035, "eval_steps_per_second": 2.871, "step": 396 }, { "epoch": 61.54, "learning_rate": 9.259259259259259e-06, "loss": 0.1167, "step": 400 }, { "epoch": 62.0, "eval_accuracy": 0.717391304347826, "eval_loss": 1.0720371007919312, "eval_runtime": 0.7007, "eval_samples_per_second": 65.652, "eval_steps_per_second": 2.854, "step": 403 }, { "epoch": 62.92, "eval_accuracy": 0.7391304347826086, "eval_loss": 0.9726494550704956, "eval_runtime": 0.6716, "eval_samples_per_second": 68.496, "eval_steps_per_second": 2.978, "step": 409 }, { "epoch": 63.08, "learning_rate": 8.101851851851852e-06, "loss": 0.1113, "step": 410 }, { "epoch": 64.0, "eval_accuracy": 0.717391304347826, "eval_loss": 1.032416582107544, "eval_runtime": 0.684, "eval_samples_per_second": 67.253, "eval_steps_per_second": 2.924, "step": 416 }, { "epoch": 64.62, "learning_rate": 6.944444444444445e-06, "loss": 0.0838, "step": 420 }, { "epoch": 64.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1092015504837036, "eval_runtime": 0.6999, "eval_samples_per_second": 65.722, "eval_steps_per_second": 2.857, "step": 422 }, { "epoch": 66.0, "eval_accuracy": 0.6956521739130435, "eval_loss": 1.177163004875183, "eval_runtime": 0.7851, "eval_samples_per_second": 58.589, "eval_steps_per_second": 2.547, "step": 429 }, { "epoch": 66.15, "learning_rate": 5.787037037037038e-06, "loss": 0.083, "step": 430 }, { "epoch": 66.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1195148229599, "eval_runtime": 0.6775, "eval_samples_per_second": 67.893, "eval_steps_per_second": 2.952, "step": 435 }, { "epoch": 67.69, "learning_rate": 4.6296296296296296e-06, "loss": 0.0899, "step": 440 }, { "epoch": 68.0, "eval_accuracy": 0.717391304347826, "eval_loss": 1.068082332611084, "eval_runtime": 0.668, "eval_samples_per_second": 68.86, "eval_steps_per_second": 2.994, "step": 442 }, { "epoch": 68.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1160058975219727, "eval_runtime": 0.6862, "eval_samples_per_second": 67.036, "eval_steps_per_second": 2.915, "step": 448 }, { "epoch": 69.23, "learning_rate": 3.4722222222222224e-06, "loss": 0.0813, "step": 450 }, { "epoch": 70.0, "eval_accuracy": 0.717391304347826, "eval_loss": 1.143452525138855, "eval_runtime": 0.6851, "eval_samples_per_second": 67.142, "eval_steps_per_second": 2.919, "step": 455 }, { "epoch": 70.77, "learning_rate": 2.3148148148148148e-06, "loss": 0.0782, "step": 460 }, { "epoch": 70.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1463539600372314, "eval_runtime": 0.6819, "eval_samples_per_second": 67.457, "eval_steps_per_second": 2.933, "step": 461 }, { "epoch": 72.0, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1538991928100586, "eval_runtime": 0.7861, "eval_samples_per_second": 58.52, "eval_steps_per_second": 2.544, "step": 468 }, { "epoch": 72.31, "learning_rate": 1.1574074074074074e-06, "loss": 0.1014, "step": 470 }, { "epoch": 72.92, "eval_accuracy": 0.717391304347826, "eval_loss": 1.158526062965393, "eval_runtime": 0.7055, "eval_samples_per_second": 65.203, "eval_steps_per_second": 2.835, "step": 474 }, { "epoch": 73.85, "learning_rate": 0.0, "loss": 0.0944, "step": 480 }, { "epoch": 73.85, "eval_accuracy": 0.717391304347826, "eval_loss": 1.1592216491699219, "eval_runtime": 0.7479, "eval_samples_per_second": 61.508, "eval_steps_per_second": 2.674, "step": 480 }, { "epoch": 73.85, "step": 480, "total_flos": 1.5036476561209958e+18, "train_loss": 0.34541464410722256, "train_runtime": 589.4401, "train_samples_per_second": 111.156, "train_steps_per_second": 0.814 } ], "logging_steps": 10, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 80, "save_steps": 500, "total_flos": 1.5036476561209958e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }