{ "best_metric": 0.78125, "best_model_checkpoint": "Swin-dmae-DA5-N-Colab\\checkpoint-1361", "epoch": 116.36363636363636, "eval_steps": 500, "global_step": 2880, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 1.3888888888888892e-06, "loss": 6.7613, "step": 10 }, { "epoch": 0.81, "learning_rate": 2.7777777777777783e-06, "loss": 6.8626, "step": 20 }, { "epoch": 0.97, "eval_accuracy": 0.125, "eval_loss": 7.802581787109375, "eval_runtime": 0.4701, "eval_samples_per_second": 68.069, "eval_steps_per_second": 4.254, "step": 24 }, { "epoch": 1.21, "learning_rate": 4.166666666666667e-06, "loss": 6.8425, "step": 30 }, { "epoch": 1.62, "learning_rate": 5.555555555555557e-06, "loss": 6.6392, "step": 40 }, { "epoch": 1.98, "eval_accuracy": 0.125, "eval_loss": 7.5823211669921875, "eval_runtime": 0.4001, "eval_samples_per_second": 79.982, "eval_steps_per_second": 4.999, "step": 49 }, { "epoch": 2.02, "learning_rate": 6.944444444444445e-06, "loss": 6.5894, "step": 50 }, { "epoch": 2.42, "learning_rate": 8.333333333333334e-06, "loss": 6.3711, "step": 60 }, { "epoch": 2.83, "learning_rate": 9.722222222222223e-06, "loss": 6.3514, "step": 70 }, { "epoch": 2.99, "eval_accuracy": 0.125, "eval_loss": 6.665298938751221, "eval_runtime": 0.4046, "eval_samples_per_second": 79.091, "eval_steps_per_second": 4.943, "step": 74 }, { "epoch": 3.23, "learning_rate": 1.1111111111111113e-05, "loss": 5.8289, "step": 80 }, { "epoch": 3.64, "learning_rate": 1.25e-05, "loss": 5.3109, "step": 90 }, { "epoch": 4.0, "eval_accuracy": 0.125, "eval_loss": 5.279791355133057, "eval_runtime": 0.4241, "eval_samples_per_second": 75.455, "eval_steps_per_second": 4.716, "step": 99 }, { "epoch": 4.04, "learning_rate": 1.388888888888889e-05, "loss": 4.9844, "step": 100 }, { "epoch": 4.44, "learning_rate": 1.5277777777777777e-05, "loss": 4.5015, "step": 110 }, { "epoch": 4.85, "learning_rate": 1.6666666666666667e-05, "loss": 3.7476, "step": 120 }, { "epoch": 4.97, "eval_accuracy": 0.125, "eval_loss": 3.618558168411255, "eval_runtime": 0.4101, "eval_samples_per_second": 78.031, "eval_steps_per_second": 4.877, "step": 123 }, { "epoch": 5.25, "learning_rate": 1.8055555555555558e-05, "loss": 3.1884, "step": 130 }, { "epoch": 5.66, "learning_rate": 1.9444444444444445e-05, "loss": 2.7138, "step": 140 }, { "epoch": 5.98, "eval_accuracy": 0.125, "eval_loss": 1.937683343887329, "eval_runtime": 0.4036, "eval_samples_per_second": 79.286, "eval_steps_per_second": 4.955, "step": 148 }, { "epoch": 6.06, "learning_rate": 2.0833333333333336e-05, "loss": 2.1007, "step": 150 }, { "epoch": 6.46, "learning_rate": 2.2222222222222227e-05, "loss": 1.7134, "step": 160 }, { "epoch": 6.87, "learning_rate": 2.3611111111111114e-05, "loss": 1.4116, "step": 170 }, { "epoch": 6.99, "eval_accuracy": 0.125, "eval_loss": 1.4680219888687134, "eval_runtime": 0.3951, "eval_samples_per_second": 80.993, "eval_steps_per_second": 5.062, "step": 173 }, { "epoch": 7.27, "learning_rate": 2.5e-05, "loss": 1.4216, "step": 180 }, { "epoch": 7.68, "learning_rate": 2.6388888888888892e-05, "loss": 1.3932, "step": 190 }, { "epoch": 8.0, "eval_accuracy": 0.5, "eval_loss": 1.3818670511245728, "eval_runtime": 0.4901, "eval_samples_per_second": 65.291, "eval_steps_per_second": 4.081, "step": 198 }, { "epoch": 8.08, "learning_rate": 2.777777777777778e-05, "loss": 1.3888, "step": 200 }, { "epoch": 8.48, "learning_rate": 2.9166666666666666e-05, "loss": 1.3077, "step": 210 }, { "epoch": 8.89, "learning_rate": 3.0555555555555554e-05, "loss": 1.2566, "step": 220 }, { "epoch": 8.97, "eval_accuracy": 0.15625, "eval_loss": 1.591198205947876, "eval_runtime": 0.4171, "eval_samples_per_second": 76.721, "eval_steps_per_second": 4.795, "step": 222 }, { "epoch": 9.29, "learning_rate": 3.194444444444445e-05, "loss": 1.1679, "step": 230 }, { "epoch": 9.7, "learning_rate": 3.3333333333333335e-05, "loss": 1.1332, "step": 240 }, { "epoch": 9.98, "eval_accuracy": 0.4375, "eval_loss": 1.3336095809936523, "eval_runtime": 0.4176, "eval_samples_per_second": 76.628, "eval_steps_per_second": 4.789, "step": 247 }, { "epoch": 10.1, "learning_rate": 3.472222222222223e-05, "loss": 1.0699, "step": 250 }, { "epoch": 10.51, "learning_rate": 3.6111111111111116e-05, "loss": 0.9928, "step": 260 }, { "epoch": 10.91, "learning_rate": 3.7500000000000003e-05, "loss": 0.9511, "step": 270 }, { "epoch": 10.99, "eval_accuracy": 0.28125, "eval_loss": 1.1492191553115845, "eval_runtime": 0.4006, "eval_samples_per_second": 79.881, "eval_steps_per_second": 4.993, "step": 272 }, { "epoch": 11.31, "learning_rate": 3.888888888888889e-05, "loss": 0.9286, "step": 280 }, { "epoch": 11.72, "learning_rate": 3.996913580246914e-05, "loss": 0.8905, "step": 290 }, { "epoch": 12.0, "eval_accuracy": 0.53125, "eval_loss": 1.0571255683898926, "eval_runtime": 0.4566, "eval_samples_per_second": 70.082, "eval_steps_per_second": 4.38, "step": 297 }, { "epoch": 12.12, "learning_rate": 3.981481481481482e-05, "loss": 0.8031, "step": 300 }, { "epoch": 12.53, "learning_rate": 3.9660493827160496e-05, "loss": 0.8253, "step": 310 }, { "epoch": 12.93, "learning_rate": 3.950617283950617e-05, "loss": 0.8317, "step": 320 }, { "epoch": 12.97, "eval_accuracy": 0.65625, "eval_loss": 0.8473750948905945, "eval_runtime": 0.4201, "eval_samples_per_second": 76.173, "eval_steps_per_second": 4.761, "step": 321 }, { "epoch": 13.33, "learning_rate": 3.935185185185186e-05, "loss": 0.7445, "step": 330 }, { "epoch": 13.74, "learning_rate": 3.9197530864197535e-05, "loss": 0.6611, "step": 340 }, { "epoch": 13.98, "eval_accuracy": 0.71875, "eval_loss": 0.7519997954368591, "eval_runtime": 0.4141, "eval_samples_per_second": 77.277, "eval_steps_per_second": 4.83, "step": 346 }, { "epoch": 14.14, "learning_rate": 3.904320987654321e-05, "loss": 0.6514, "step": 350 }, { "epoch": 14.55, "learning_rate": 3.888888888888889e-05, "loss": 0.6022, "step": 360 }, { "epoch": 14.95, "learning_rate": 3.8734567901234575e-05, "loss": 0.5683, "step": 370 }, { "epoch": 14.99, "eval_accuracy": 0.75, "eval_loss": 0.6655727624893188, "eval_runtime": 0.4031, "eval_samples_per_second": 79.386, "eval_steps_per_second": 4.962, "step": 371 }, { "epoch": 15.35, "learning_rate": 3.8580246913580246e-05, "loss": 0.5743, "step": 380 }, { "epoch": 15.76, "learning_rate": 3.842592592592593e-05, "loss": 0.569, "step": 390 }, { "epoch": 16.0, "eval_accuracy": 0.53125, "eval_loss": 0.8109104037284851, "eval_runtime": 0.4166, "eval_samples_per_second": 76.812, "eval_steps_per_second": 4.801, "step": 396 }, { "epoch": 16.16, "learning_rate": 3.827160493827161e-05, "loss": 0.4743, "step": 400 }, { "epoch": 16.57, "learning_rate": 3.8117283950617286e-05, "loss": 0.502, "step": 410 }, { "epoch": 16.97, "learning_rate": 3.7962962962962964e-05, "loss": 0.4702, "step": 420 }, { "epoch": 16.97, "eval_accuracy": 0.625, "eval_loss": 0.703583836555481, "eval_runtime": 0.4076, "eval_samples_per_second": 78.509, "eval_steps_per_second": 4.907, "step": 420 }, { "epoch": 17.37, "learning_rate": 3.780864197530865e-05, "loss": 0.4441, "step": 430 }, { "epoch": 17.78, "learning_rate": 3.7654320987654326e-05, "loss": 0.4244, "step": 440 }, { "epoch": 17.98, "eval_accuracy": 0.65625, "eval_loss": 0.8169162273406982, "eval_runtime": 0.4216, "eval_samples_per_second": 75.9, "eval_steps_per_second": 4.744, "step": 445 }, { "epoch": 18.18, "learning_rate": 3.7500000000000003e-05, "loss": 0.4277, "step": 450 }, { "epoch": 18.59, "learning_rate": 3.734567901234568e-05, "loss": 0.3584, "step": 460 }, { "epoch": 18.99, "learning_rate": 3.719135802469136e-05, "loss": 0.3483, "step": 470 }, { "epoch": 18.99, "eval_accuracy": 0.71875, "eval_loss": 0.7075902223587036, "eval_runtime": 0.4306, "eval_samples_per_second": 74.312, "eval_steps_per_second": 4.645, "step": 470 }, { "epoch": 19.39, "learning_rate": 3.703703703703704e-05, "loss": 0.3005, "step": 480 }, { "epoch": 19.8, "learning_rate": 3.688271604938272e-05, "loss": 0.3853, "step": 490 }, { "epoch": 20.0, "eval_accuracy": 0.71875, "eval_loss": 0.8644444942474365, "eval_runtime": 0.4716, "eval_samples_per_second": 67.852, "eval_steps_per_second": 4.241, "step": 495 }, { "epoch": 20.2, "learning_rate": 3.67283950617284e-05, "loss": 0.307, "step": 500 }, { "epoch": 20.61, "learning_rate": 3.6574074074074076e-05, "loss": 0.3038, "step": 510 }, { "epoch": 20.97, "eval_accuracy": 0.71875, "eval_loss": 0.86528080701828, "eval_runtime": 0.4181, "eval_samples_per_second": 76.538, "eval_steps_per_second": 4.784, "step": 519 }, { "epoch": 21.01, "learning_rate": 3.641975308641976e-05, "loss": 0.3084, "step": 520 }, { "epoch": 21.41, "learning_rate": 3.626543209876543e-05, "loss": 0.2967, "step": 530 }, { "epoch": 21.82, "learning_rate": 3.6111111111111116e-05, "loss": 0.2885, "step": 540 }, { "epoch": 21.98, "eval_accuracy": 0.71875, "eval_loss": 1.0443750619888306, "eval_runtime": 0.4421, "eval_samples_per_second": 72.382, "eval_steps_per_second": 4.524, "step": 544 }, { "epoch": 22.22, "learning_rate": 3.5956790123456794e-05, "loss": 0.2644, "step": 550 }, { "epoch": 22.63, "learning_rate": 3.580246913580247e-05, "loss": 0.2014, "step": 560 }, { "epoch": 22.99, "eval_accuracy": 0.59375, "eval_loss": 1.06843900680542, "eval_runtime": 0.3991, "eval_samples_per_second": 80.182, "eval_steps_per_second": 5.011, "step": 569 }, { "epoch": 23.03, "learning_rate": 3.564814814814815e-05, "loss": 0.309, "step": 570 }, { "epoch": 23.43, "learning_rate": 3.5493827160493834e-05, "loss": 0.2515, "step": 580 }, { "epoch": 23.84, "learning_rate": 3.533950617283951e-05, "loss": 0.2764, "step": 590 }, { "epoch": 24.0, "eval_accuracy": 0.65625, "eval_loss": 1.1421856880187988, "eval_runtime": 0.4621, "eval_samples_per_second": 69.248, "eval_steps_per_second": 4.328, "step": 594 }, { "epoch": 24.24, "learning_rate": 3.518518518518519e-05, "loss": 0.2519, "step": 600 }, { "epoch": 24.65, "learning_rate": 3.503086419753087e-05, "loss": 0.2493, "step": 610 }, { "epoch": 24.97, "eval_accuracy": 0.6875, "eval_loss": 1.102545976638794, "eval_runtime": 0.4256, "eval_samples_per_second": 75.187, "eval_steps_per_second": 4.699, "step": 618 }, { "epoch": 25.05, "learning_rate": 3.4876543209876545e-05, "loss": 0.2863, "step": 620 }, { "epoch": 25.45, "learning_rate": 3.472222222222223e-05, "loss": 0.2352, "step": 630 }, { "epoch": 25.86, "learning_rate": 3.45679012345679e-05, "loss": 0.2754, "step": 640 }, { "epoch": 25.98, "eval_accuracy": 0.71875, "eval_loss": 1.0370571613311768, "eval_runtime": 0.4276, "eval_samples_per_second": 74.835, "eval_steps_per_second": 4.677, "step": 643 }, { "epoch": 26.26, "learning_rate": 3.4413580246913584e-05, "loss": 0.2549, "step": 650 }, { "epoch": 26.67, "learning_rate": 3.425925925925926e-05, "loss": 0.1793, "step": 660 }, { "epoch": 26.99, "eval_accuracy": 0.65625, "eval_loss": 1.1623690128326416, "eval_runtime": 0.4181, "eval_samples_per_second": 76.534, "eval_steps_per_second": 4.783, "step": 668 }, { "epoch": 27.07, "learning_rate": 3.410493827160494e-05, "loss": 0.2305, "step": 670 }, { "epoch": 27.47, "learning_rate": 3.395061728395062e-05, "loss": 0.1836, "step": 680 }, { "epoch": 27.88, "learning_rate": 3.37962962962963e-05, "loss": 0.1971, "step": 690 }, { "epoch": 28.0, "eval_accuracy": 0.6875, "eval_loss": 1.3177158832550049, "eval_runtime": 0.3941, "eval_samples_per_second": 81.2, "eval_steps_per_second": 5.075, "step": 693 }, { "epoch": 28.28, "learning_rate": 3.364197530864198e-05, "loss": 0.1514, "step": 700 }, { "epoch": 28.69, "learning_rate": 3.348765432098766e-05, "loss": 0.1881, "step": 710 }, { "epoch": 28.97, "eval_accuracy": 0.6875, "eval_loss": 1.2813467979431152, "eval_runtime": 0.3961, "eval_samples_per_second": 80.79, "eval_steps_per_second": 5.049, "step": 717 }, { "epoch": 29.09, "learning_rate": 3.3333333333333335e-05, "loss": 0.1854, "step": 720 }, { "epoch": 29.49, "learning_rate": 3.317901234567901e-05, "loss": 0.2045, "step": 730 }, { "epoch": 29.9, "learning_rate": 3.30246913580247e-05, "loss": 0.167, "step": 740 }, { "epoch": 29.98, "eval_accuracy": 0.625, "eval_loss": 1.5564466714859009, "eval_runtime": 0.4011, "eval_samples_per_second": 79.783, "eval_steps_per_second": 4.986, "step": 742 }, { "epoch": 30.3, "learning_rate": 3.2870370370370375e-05, "loss": 0.1761, "step": 750 }, { "epoch": 30.71, "learning_rate": 3.271604938271605e-05, "loss": 0.1872, "step": 760 }, { "epoch": 30.99, "eval_accuracy": 0.71875, "eval_loss": 1.3761742115020752, "eval_runtime": 0.4136, "eval_samples_per_second": 77.369, "eval_steps_per_second": 4.836, "step": 767 }, { "epoch": 31.11, "learning_rate": 3.256172839506173e-05, "loss": 0.1697, "step": 770 }, { "epoch": 31.52, "learning_rate": 3.2407407407407415e-05, "loss": 0.1806, "step": 780 }, { "epoch": 31.92, "learning_rate": 3.2253086419753086e-05, "loss": 0.1374, "step": 790 }, { "epoch": 32.0, "eval_accuracy": 0.625, "eval_loss": 1.4406580924987793, "eval_runtime": 0.4051, "eval_samples_per_second": 78.994, "eval_steps_per_second": 4.937, "step": 792 }, { "epoch": 32.32, "learning_rate": 3.209876543209877e-05, "loss": 0.1848, "step": 800 }, { "epoch": 32.73, "learning_rate": 3.194444444444445e-05, "loss": 0.1841, "step": 810 }, { "epoch": 32.97, "eval_accuracy": 0.6875, "eval_loss": 1.4038010835647583, "eval_runtime": 0.4176, "eval_samples_per_second": 76.627, "eval_steps_per_second": 4.789, "step": 816 }, { "epoch": 33.13, "learning_rate": 3.1790123456790125e-05, "loss": 0.1365, "step": 820 }, { "epoch": 33.54, "learning_rate": 3.16358024691358e-05, "loss": 0.1309, "step": 830 }, { "epoch": 33.94, "learning_rate": 3.148148148148149e-05, "loss": 0.167, "step": 840 }, { "epoch": 33.98, "eval_accuracy": 0.6875, "eval_loss": 1.3768980503082275, "eval_runtime": 0.4141, "eval_samples_per_second": 77.277, "eval_steps_per_second": 4.83, "step": 841 }, { "epoch": 34.34, "learning_rate": 3.1327160493827165e-05, "loss": 0.1689, "step": 850 }, { "epoch": 34.75, "learning_rate": 3.117283950617284e-05, "loss": 0.1614, "step": 860 }, { "epoch": 34.99, "eval_accuracy": 0.65625, "eval_loss": 1.5350613594055176, "eval_runtime": 0.4051, "eval_samples_per_second": 78.994, "eval_steps_per_second": 4.937, "step": 866 }, { "epoch": 35.15, "learning_rate": 3.101851851851852e-05, "loss": 0.1439, "step": 870 }, { "epoch": 35.56, "learning_rate": 3.08641975308642e-05, "loss": 0.1706, "step": 880 }, { "epoch": 35.96, "learning_rate": 3.070987654320988e-05, "loss": 0.1835, "step": 890 }, { "epoch": 36.0, "eval_accuracy": 0.6875, "eval_loss": 1.4466042518615723, "eval_runtime": 0.4396, "eval_samples_per_second": 72.793, "eval_steps_per_second": 4.55, "step": 891 }, { "epoch": 36.36, "learning_rate": 3.0555555555555554e-05, "loss": 0.1558, "step": 900 }, { "epoch": 36.77, "learning_rate": 3.0401234567901238e-05, "loss": 0.1917, "step": 910 }, { "epoch": 36.97, "eval_accuracy": 0.75, "eval_loss": 1.349318265914917, "eval_runtime": 0.4031, "eval_samples_per_second": 79.387, "eval_steps_per_second": 4.962, "step": 915 }, { "epoch": 37.17, "learning_rate": 3.0246913580246916e-05, "loss": 0.1761, "step": 920 }, { "epoch": 37.58, "learning_rate": 3.0092592592592593e-05, "loss": 0.1861, "step": 930 }, { "epoch": 37.98, "learning_rate": 2.9938271604938275e-05, "loss": 0.1171, "step": 940 }, { "epoch": 37.98, "eval_accuracy": 0.75, "eval_loss": 1.4756131172180176, "eval_runtime": 0.4051, "eval_samples_per_second": 78.994, "eval_steps_per_second": 4.937, "step": 940 }, { "epoch": 38.38, "learning_rate": 2.9783950617283956e-05, "loss": 0.1234, "step": 950 }, { "epoch": 38.79, "learning_rate": 2.962962962962963e-05, "loss": 0.163, "step": 960 }, { "epoch": 38.99, "eval_accuracy": 0.6875, "eval_loss": 1.4373202323913574, "eval_runtime": 0.4221, "eval_samples_per_second": 75.809, "eval_steps_per_second": 4.738, "step": 965 }, { "epoch": 39.19, "learning_rate": 2.947530864197531e-05, "loss": 0.1452, "step": 970 }, { "epoch": 39.6, "learning_rate": 2.9320987654320992e-05, "loss": 0.088, "step": 980 }, { "epoch": 40.0, "learning_rate": 2.9166666666666666e-05, "loss": 0.1688, "step": 990 }, { "epoch": 40.0, "eval_accuracy": 0.75, "eval_loss": 1.4082034826278687, "eval_runtime": 0.4786, "eval_samples_per_second": 66.859, "eval_steps_per_second": 4.179, "step": 990 }, { "epoch": 40.4, "learning_rate": 2.9012345679012347e-05, "loss": 0.1359, "step": 1000 }, { "epoch": 40.81, "learning_rate": 2.885802469135803e-05, "loss": 0.1318, "step": 1010 }, { "epoch": 40.97, "eval_accuracy": 0.6875, "eval_loss": 1.5907220840454102, "eval_runtime": 0.4061, "eval_samples_per_second": 78.797, "eval_steps_per_second": 4.925, "step": 1014 }, { "epoch": 41.21, "learning_rate": 2.8703703703703706e-05, "loss": 0.1547, "step": 1020 }, { "epoch": 41.62, "learning_rate": 2.8549382716049384e-05, "loss": 0.1107, "step": 1030 }, { "epoch": 41.98, "eval_accuracy": 0.6875, "eval_loss": 1.7461936473846436, "eval_runtime": 0.4081, "eval_samples_per_second": 78.414, "eval_steps_per_second": 4.901, "step": 1039 }, { "epoch": 42.02, "learning_rate": 2.8395061728395065e-05, "loss": 0.1144, "step": 1040 }, { "epoch": 42.42, "learning_rate": 2.8240740740740743e-05, "loss": 0.1239, "step": 1050 }, { "epoch": 42.83, "learning_rate": 2.8086419753086424e-05, "loss": 0.1064, "step": 1060 }, { "epoch": 42.99, "eval_accuracy": 0.5625, "eval_loss": 1.8704116344451904, "eval_runtime": 0.4081, "eval_samples_per_second": 78.414, "eval_steps_per_second": 4.901, "step": 1064 }, { "epoch": 43.23, "learning_rate": 2.79320987654321e-05, "loss": 0.0978, "step": 1070 }, { "epoch": 43.64, "learning_rate": 2.777777777777778e-05, "loss": 0.1423, "step": 1080 }, { "epoch": 44.0, "eval_accuracy": 0.5625, "eval_loss": 1.715477705001831, "eval_runtime": 0.4051, "eval_samples_per_second": 78.994, "eval_steps_per_second": 4.937, "step": 1089 }, { "epoch": 44.04, "learning_rate": 2.762345679012346e-05, "loss": 0.1031, "step": 1090 }, { "epoch": 44.44, "learning_rate": 2.746913580246914e-05, "loss": 0.1473, "step": 1100 }, { "epoch": 44.85, "learning_rate": 2.7314814814814816e-05, "loss": 0.082, "step": 1110 }, { "epoch": 44.97, "eval_accuracy": 0.71875, "eval_loss": 1.5552070140838623, "eval_runtime": 0.3991, "eval_samples_per_second": 80.182, "eval_steps_per_second": 5.011, "step": 1113 }, { "epoch": 45.25, "learning_rate": 2.7160493827160497e-05, "loss": 0.1206, "step": 1120 }, { "epoch": 45.66, "learning_rate": 2.7006172839506174e-05, "loss": 0.1012, "step": 1130 }, { "epoch": 45.98, "eval_accuracy": 0.6875, "eval_loss": 1.4189940690994263, "eval_runtime": 0.4136, "eval_samples_per_second": 77.37, "eval_steps_per_second": 4.836, "step": 1138 }, { "epoch": 46.06, "learning_rate": 2.6851851851851852e-05, "loss": 0.1124, "step": 1140 }, { "epoch": 46.46, "learning_rate": 2.6697530864197533e-05, "loss": 0.114, "step": 1150 }, { "epoch": 46.87, "learning_rate": 2.654320987654321e-05, "loss": 0.1001, "step": 1160 }, { "epoch": 46.99, "eval_accuracy": 0.71875, "eval_loss": 1.6800808906555176, "eval_runtime": 0.4021, "eval_samples_per_second": 79.584, "eval_steps_per_second": 4.974, "step": 1163 }, { "epoch": 47.27, "learning_rate": 2.6388888888888892e-05, "loss": 0.1309, "step": 1170 }, { "epoch": 47.68, "learning_rate": 2.623456790123457e-05, "loss": 0.1037, "step": 1180 }, { "epoch": 48.0, "eval_accuracy": 0.71875, "eval_loss": 1.686426043510437, "eval_runtime": 0.4541, "eval_samples_per_second": 70.469, "eval_steps_per_second": 4.404, "step": 1188 }, { "epoch": 48.08, "learning_rate": 2.6080246913580247e-05, "loss": 0.0909, "step": 1190 }, { "epoch": 48.48, "learning_rate": 2.5925925925925928e-05, "loss": 0.1296, "step": 1200 }, { "epoch": 48.89, "learning_rate": 2.577160493827161e-05, "loss": 0.1089, "step": 1210 }, { "epoch": 48.97, "eval_accuracy": 0.6875, "eval_loss": 1.5225051641464233, "eval_runtime": 0.4051, "eval_samples_per_second": 78.995, "eval_steps_per_second": 4.937, "step": 1212 }, { "epoch": 49.29, "learning_rate": 2.5617283950617284e-05, "loss": 0.1184, "step": 1220 }, { "epoch": 49.7, "learning_rate": 2.5462962962962965e-05, "loss": 0.0835, "step": 1230 }, { "epoch": 49.98, "eval_accuracy": 0.6875, "eval_loss": 1.979781150817871, "eval_runtime": 0.4161, "eval_samples_per_second": 76.903, "eval_steps_per_second": 4.806, "step": 1237 }, { "epoch": 50.1, "learning_rate": 2.5308641975308646e-05, "loss": 0.1079, "step": 1240 }, { "epoch": 50.51, "learning_rate": 2.515432098765432e-05, "loss": 0.1204, "step": 1250 }, { "epoch": 50.91, "learning_rate": 2.5e-05, "loss": 0.0818, "step": 1260 }, { "epoch": 50.99, "eval_accuracy": 0.65625, "eval_loss": 1.726827621459961, "eval_runtime": 0.4051, "eval_samples_per_second": 78.995, "eval_steps_per_second": 4.937, "step": 1262 }, { "epoch": 51.31, "learning_rate": 2.4845679012345682e-05, "loss": 0.0759, "step": 1270 }, { "epoch": 51.72, "learning_rate": 2.469135802469136e-05, "loss": 0.1134, "step": 1280 }, { "epoch": 52.0, "eval_accuracy": 0.75, "eval_loss": 1.599599838256836, "eval_runtime": 0.4061, "eval_samples_per_second": 78.8, "eval_steps_per_second": 4.925, "step": 1287 }, { "epoch": 52.12, "learning_rate": 2.4537037037037038e-05, "loss": 0.0988, "step": 1290 }, { "epoch": 52.53, "learning_rate": 2.438271604938272e-05, "loss": 0.0851, "step": 1300 }, { "epoch": 52.93, "learning_rate": 2.4228395061728396e-05, "loss": 0.1115, "step": 1310 }, { "epoch": 52.97, "eval_accuracy": 0.65625, "eval_loss": 1.7280734777450562, "eval_runtime": 0.4181, "eval_samples_per_second": 76.538, "eval_steps_per_second": 4.784, "step": 1311 }, { "epoch": 53.33, "learning_rate": 2.4074074074074077e-05, "loss": 0.0995, "step": 1320 }, { "epoch": 53.74, "learning_rate": 2.391975308641976e-05, "loss": 0.0929, "step": 1330 }, { "epoch": 53.98, "eval_accuracy": 0.75, "eval_loss": 1.6346489191055298, "eval_runtime": 0.4061, "eval_samples_per_second": 78.8, "eval_steps_per_second": 4.925, "step": 1336 }, { "epoch": 54.14, "learning_rate": 2.3765432098765433e-05, "loss": 0.1187, "step": 1340 }, { "epoch": 54.55, "learning_rate": 2.3611111111111114e-05, "loss": 0.1477, "step": 1350 }, { "epoch": 54.95, "learning_rate": 2.3456790123456795e-05, "loss": 0.0909, "step": 1360 }, { "epoch": 54.99, "eval_accuracy": 0.78125, "eval_loss": 1.4369856119155884, "eval_runtime": 0.4041, "eval_samples_per_second": 79.19, "eval_steps_per_second": 4.949, "step": 1361 }, { "epoch": 55.35, "learning_rate": 2.330246913580247e-05, "loss": 0.1106, "step": 1370 }, { "epoch": 55.76, "learning_rate": 2.314814814814815e-05, "loss": 0.1076, "step": 1380 }, { "epoch": 56.0, "eval_accuracy": 0.78125, "eval_loss": 1.550971508026123, "eval_runtime": 0.4091, "eval_samples_per_second": 78.222, "eval_steps_per_second": 4.889, "step": 1386 }, { "epoch": 56.16, "learning_rate": 2.299382716049383e-05, "loss": 0.1301, "step": 1390 }, { "epoch": 56.57, "learning_rate": 2.2839506172839506e-05, "loss": 0.0807, "step": 1400 }, { "epoch": 56.97, "learning_rate": 2.2685185185185187e-05, "loss": 0.0948, "step": 1410 }, { "epoch": 56.97, "eval_accuracy": 0.75, "eval_loss": 1.6382544040679932, "eval_runtime": 0.4751, "eval_samples_per_second": 67.353, "eval_steps_per_second": 4.21, "step": 1410 }, { "epoch": 57.37, "learning_rate": 2.2530864197530865e-05, "loss": 0.0961, "step": 1420 }, { "epoch": 57.78, "learning_rate": 2.2376543209876546e-05, "loss": 0.0914, "step": 1430 }, { "epoch": 57.98, "eval_accuracy": 0.6875, "eval_loss": 1.6937541961669922, "eval_runtime": 0.3996, "eval_samples_per_second": 80.08, "eval_steps_per_second": 5.005, "step": 1435 }, { "epoch": 58.18, "learning_rate": 2.2222222222222227e-05, "loss": 0.0716, "step": 1440 }, { "epoch": 58.59, "learning_rate": 2.20679012345679e-05, "loss": 0.0911, "step": 1450 }, { "epoch": 58.99, "learning_rate": 2.1913580246913582e-05, "loss": 0.0598, "step": 1460 }, { "epoch": 58.99, "eval_accuracy": 0.75, "eval_loss": 1.6290702819824219, "eval_runtime": 0.3996, "eval_samples_per_second": 80.081, "eval_steps_per_second": 5.005, "step": 1460 }, { "epoch": 59.39, "learning_rate": 2.1759259259259263e-05, "loss": 0.087, "step": 1470 }, { "epoch": 59.8, "learning_rate": 2.1604938271604937e-05, "loss": 0.0769, "step": 1480 }, { "epoch": 60.0, "eval_accuracy": 0.75, "eval_loss": 1.6593661308288574, "eval_runtime": 0.4171, "eval_samples_per_second": 76.721, "eval_steps_per_second": 4.795, "step": 1485 }, { "epoch": 60.2, "learning_rate": 2.145061728395062e-05, "loss": 0.0964, "step": 1490 }, { "epoch": 60.61, "learning_rate": 2.12962962962963e-05, "loss": 0.0894, "step": 1500 }, { "epoch": 60.97, "eval_accuracy": 0.78125, "eval_loss": 1.630151629447937, "eval_runtime": 0.4706, "eval_samples_per_second": 67.996, "eval_steps_per_second": 4.25, "step": 1509 }, { "epoch": 61.01, "learning_rate": 2.1141975308641974e-05, "loss": 0.0664, "step": 1510 }, { "epoch": 61.41, "learning_rate": 2.0987654320987655e-05, "loss": 0.0701, "step": 1520 }, { "epoch": 61.82, "learning_rate": 2.0833333333333336e-05, "loss": 0.0999, "step": 1530 }, { "epoch": 61.98, "eval_accuracy": 0.71875, "eval_loss": 1.656170129776001, "eval_runtime": 0.7332, "eval_samples_per_second": 43.646, "eval_steps_per_second": 2.728, "step": 1534 }, { "epoch": 62.22, "learning_rate": 2.0679012345679014e-05, "loss": 0.0559, "step": 1540 }, { "epoch": 62.63, "learning_rate": 2.0524691358024695e-05, "loss": 0.0759, "step": 1550 }, { "epoch": 62.99, "eval_accuracy": 0.75, "eval_loss": 1.598868489265442, "eval_runtime": 0.4032, "eval_samples_per_second": 79.364, "eval_steps_per_second": 4.96, "step": 1559 }, { "epoch": 63.03, "learning_rate": 2.0370370370370372e-05, "loss": 0.1008, "step": 1560 }, { "epoch": 63.43, "learning_rate": 2.021604938271605e-05, "loss": 0.06, "step": 1570 }, { "epoch": 63.84, "learning_rate": 2.006172839506173e-05, "loss": 0.102, "step": 1580 }, { "epoch": 64.0, "eval_accuracy": 0.78125, "eval_loss": 1.660170078277588, "eval_runtime": 0.4806, "eval_samples_per_second": 66.581, "eval_steps_per_second": 4.161, "step": 1584 }, { "epoch": 64.24, "learning_rate": 1.990740740740741e-05, "loss": 0.0836, "step": 1590 }, { "epoch": 64.65, "learning_rate": 1.9753086419753087e-05, "loss": 0.0864, "step": 1600 }, { "epoch": 64.97, "eval_accuracy": 0.78125, "eval_loss": 1.7385599613189697, "eval_runtime": 0.4181, "eval_samples_per_second": 76.538, "eval_steps_per_second": 4.784, "step": 1608 }, { "epoch": 65.05, "learning_rate": 1.9598765432098768e-05, "loss": 0.076, "step": 1610 }, { "epoch": 65.45, "learning_rate": 1.9444444444444445e-05, "loss": 0.0763, "step": 1620 }, { "epoch": 65.86, "learning_rate": 1.9290123456790123e-05, "loss": 0.0722, "step": 1630 }, { "epoch": 65.98, "eval_accuracy": 0.71875, "eval_loss": 2.0494906902313232, "eval_runtime": 0.4086, "eval_samples_per_second": 78.317, "eval_steps_per_second": 4.895, "step": 1633 }, { "epoch": 66.26, "learning_rate": 1.9135802469135804e-05, "loss": 0.0929, "step": 1640 }, { "epoch": 66.67, "learning_rate": 1.8981481481481482e-05, "loss": 0.0956, "step": 1650 }, { "epoch": 66.99, "eval_accuracy": 0.6875, "eval_loss": 1.9748592376708984, "eval_runtime": 0.4206, "eval_samples_per_second": 76.081, "eval_steps_per_second": 4.755, "step": 1658 }, { "epoch": 67.07, "learning_rate": 1.8827160493827163e-05, "loss": 0.071, "step": 1660 }, { "epoch": 67.47, "learning_rate": 1.867283950617284e-05, "loss": 0.0844, "step": 1670 }, { "epoch": 67.88, "learning_rate": 1.851851851851852e-05, "loss": 0.0698, "step": 1680 }, { "epoch": 68.0, "eval_accuracy": 0.65625, "eval_loss": 2.0089621543884277, "eval_runtime": 0.4176, "eval_samples_per_second": 76.628, "eval_steps_per_second": 4.789, "step": 1683 }, { "epoch": 68.28, "learning_rate": 1.83641975308642e-05, "loss": 0.0837, "step": 1690 }, { "epoch": 68.69, "learning_rate": 1.820987654320988e-05, "loss": 0.0635, "step": 1700 }, { "epoch": 68.97, "eval_accuracy": 0.625, "eval_loss": 2.160045862197876, "eval_runtime": 0.4306, "eval_samples_per_second": 74.314, "eval_steps_per_second": 4.645, "step": 1707 }, { "epoch": 69.09, "learning_rate": 1.8055555555555558e-05, "loss": 0.0719, "step": 1710 }, { "epoch": 69.49, "learning_rate": 1.7901234567901236e-05, "loss": 0.0656, "step": 1720 }, { "epoch": 69.9, "learning_rate": 1.7746913580246917e-05, "loss": 0.0726, "step": 1730 }, { "epoch": 69.98, "eval_accuracy": 0.75, "eval_loss": 1.8476567268371582, "eval_runtime": 0.4076, "eval_samples_per_second": 78.508, "eval_steps_per_second": 4.907, "step": 1732 }, { "epoch": 70.3, "learning_rate": 1.7592592592592595e-05, "loss": 0.0651, "step": 1740 }, { "epoch": 70.71, "learning_rate": 1.7438271604938272e-05, "loss": 0.0905, "step": 1750 }, { "epoch": 70.99, "eval_accuracy": 0.71875, "eval_loss": 1.9970349073410034, "eval_runtime": 0.4801, "eval_samples_per_second": 66.652, "eval_steps_per_second": 4.166, "step": 1757 }, { "epoch": 71.11, "learning_rate": 1.728395061728395e-05, "loss": 0.0705, "step": 1760 }, { "epoch": 71.52, "learning_rate": 1.712962962962963e-05, "loss": 0.053, "step": 1770 }, { "epoch": 71.92, "learning_rate": 1.697530864197531e-05, "loss": 0.0955, "step": 1780 }, { "epoch": 72.0, "eval_accuracy": 0.75, "eval_loss": 1.9001073837280273, "eval_runtime": 0.4716, "eval_samples_per_second": 67.852, "eval_steps_per_second": 4.241, "step": 1782 }, { "epoch": 72.32, "learning_rate": 1.682098765432099e-05, "loss": 0.0943, "step": 1790 }, { "epoch": 72.73, "learning_rate": 1.6666666666666667e-05, "loss": 0.0614, "step": 1800 }, { "epoch": 72.97, "eval_accuracy": 0.65625, "eval_loss": 1.9346568584442139, "eval_runtime": 0.4161, "eval_samples_per_second": 76.906, "eval_steps_per_second": 4.807, "step": 1806 }, { "epoch": 73.13, "learning_rate": 1.651234567901235e-05, "loss": 0.0652, "step": 1810 }, { "epoch": 73.54, "learning_rate": 1.6358024691358026e-05, "loss": 0.0634, "step": 1820 }, { "epoch": 73.94, "learning_rate": 1.6203703703703707e-05, "loss": 0.0721, "step": 1830 }, { "epoch": 73.98, "eval_accuracy": 0.6875, "eval_loss": 1.900674819946289, "eval_runtime": 0.4776, "eval_samples_per_second": 66.996, "eval_steps_per_second": 4.187, "step": 1831 }, { "epoch": 74.34, "learning_rate": 1.6049382716049385e-05, "loss": 0.0633, "step": 1840 }, { "epoch": 74.75, "learning_rate": 1.5895061728395063e-05, "loss": 0.0868, "step": 1850 }, { "epoch": 74.99, "eval_accuracy": 0.65625, "eval_loss": 2.0204198360443115, "eval_runtime": 0.4036, "eval_samples_per_second": 79.286, "eval_steps_per_second": 4.955, "step": 1856 }, { "epoch": 75.15, "learning_rate": 1.5740740740740744e-05, "loss": 0.0798, "step": 1860 }, { "epoch": 75.56, "learning_rate": 1.558641975308642e-05, "loss": 0.0783, "step": 1870 }, { "epoch": 75.96, "learning_rate": 1.54320987654321e-05, "loss": 0.0817, "step": 1880 }, { "epoch": 76.0, "eval_accuracy": 0.71875, "eval_loss": 1.9806559085845947, "eval_runtime": 0.4096, "eval_samples_per_second": 78.123, "eval_steps_per_second": 4.883, "step": 1881 }, { "epoch": 76.36, "learning_rate": 1.5277777777777777e-05, "loss": 0.0823, "step": 1890 }, { "epoch": 76.77, "learning_rate": 1.5123456790123458e-05, "loss": 0.0533, "step": 1900 }, { "epoch": 76.97, "eval_accuracy": 0.75, "eval_loss": 1.978171467781067, "eval_runtime": 0.4191, "eval_samples_per_second": 76.353, "eval_steps_per_second": 4.772, "step": 1905 }, { "epoch": 77.17, "learning_rate": 1.4969135802469137e-05, "loss": 0.0866, "step": 1910 }, { "epoch": 77.58, "learning_rate": 1.4814814814814815e-05, "loss": 0.0737, "step": 1920 }, { "epoch": 77.98, "learning_rate": 1.4660493827160496e-05, "loss": 0.0682, "step": 1930 }, { "epoch": 77.98, "eval_accuracy": 0.75, "eval_loss": 1.831963062286377, "eval_runtime": 0.4216, "eval_samples_per_second": 75.902, "eval_steps_per_second": 4.744, "step": 1930 }, { "epoch": 78.38, "learning_rate": 1.4506172839506174e-05, "loss": 0.0819, "step": 1940 }, { "epoch": 78.79, "learning_rate": 1.4351851851851853e-05, "loss": 0.078, "step": 1950 }, { "epoch": 78.99, "eval_accuracy": 0.71875, "eval_loss": 1.835146427154541, "eval_runtime": 0.4216, "eval_samples_per_second": 75.902, "eval_steps_per_second": 4.744, "step": 1955 }, { "epoch": 79.19, "learning_rate": 1.4197530864197532e-05, "loss": 0.0574, "step": 1960 }, { "epoch": 79.6, "learning_rate": 1.4043209876543212e-05, "loss": 0.0468, "step": 1970 }, { "epoch": 80.0, "learning_rate": 1.388888888888889e-05, "loss": 0.0991, "step": 1980 }, { "epoch": 80.0, "eval_accuracy": 0.71875, "eval_loss": 1.9694030284881592, "eval_runtime": 0.4798, "eval_samples_per_second": 66.7, "eval_steps_per_second": 4.169, "step": 1980 }, { "epoch": 80.4, "learning_rate": 1.373456790123457e-05, "loss": 0.0466, "step": 1990 }, { "epoch": 80.81, "learning_rate": 1.3580246913580248e-05, "loss": 0.0601, "step": 2000 }, { "epoch": 80.97, "eval_accuracy": 0.71875, "eval_loss": 1.8794611692428589, "eval_runtime": 0.4141, "eval_samples_per_second": 77.276, "eval_steps_per_second": 4.83, "step": 2004 }, { "epoch": 81.21, "learning_rate": 1.3425925925925926e-05, "loss": 0.0567, "step": 2010 }, { "epoch": 81.62, "learning_rate": 1.3271604938271605e-05, "loss": 0.072, "step": 2020 }, { "epoch": 81.98, "eval_accuracy": 0.65625, "eval_loss": 2.02938175201416, "eval_runtime": 0.4232, "eval_samples_per_second": 75.623, "eval_steps_per_second": 4.726, "step": 2029 }, { "epoch": 82.02, "learning_rate": 1.3117283950617285e-05, "loss": 0.073, "step": 2030 }, { "epoch": 82.42, "learning_rate": 1.2962962962962964e-05, "loss": 0.0788, "step": 2040 }, { "epoch": 82.83, "learning_rate": 1.2808641975308642e-05, "loss": 0.0746, "step": 2050 }, { "epoch": 82.99, "eval_accuracy": 0.71875, "eval_loss": 1.8438613414764404, "eval_runtime": 0.3956, "eval_samples_per_second": 80.89, "eval_steps_per_second": 5.056, "step": 2054 }, { "epoch": 83.23, "learning_rate": 1.2654320987654323e-05, "loss": 0.0626, "step": 2060 }, { "epoch": 83.64, "learning_rate": 1.25e-05, "loss": 0.0547, "step": 2070 }, { "epoch": 84.0, "eval_accuracy": 0.71875, "eval_loss": 1.932082176208496, "eval_runtime": 0.4141, "eval_samples_per_second": 77.273, "eval_steps_per_second": 4.83, "step": 2079 }, { "epoch": 84.04, "learning_rate": 1.234567901234568e-05, "loss": 0.0632, "step": 2080 }, { "epoch": 84.44, "learning_rate": 1.219135802469136e-05, "loss": 0.0478, "step": 2090 }, { "epoch": 84.85, "learning_rate": 1.2037037037037039e-05, "loss": 0.0497, "step": 2100 }, { "epoch": 84.97, "eval_accuracy": 0.78125, "eval_loss": 1.8862378597259521, "eval_runtime": 0.4806, "eval_samples_per_second": 66.582, "eval_steps_per_second": 4.161, "step": 2103 }, { "epoch": 85.25, "learning_rate": 1.1882716049382716e-05, "loss": 0.0544, "step": 2110 }, { "epoch": 85.66, "learning_rate": 1.1728395061728398e-05, "loss": 0.0566, "step": 2120 }, { "epoch": 85.98, "eval_accuracy": 0.65625, "eval_loss": 2.0067098140716553, "eval_runtime": 0.5932, "eval_samples_per_second": 53.947, "eval_steps_per_second": 3.372, "step": 2128 }, { "epoch": 86.06, "learning_rate": 1.1574074074074075e-05, "loss": 0.0557, "step": 2130 }, { "epoch": 86.46, "learning_rate": 1.1419753086419753e-05, "loss": 0.0872, "step": 2140 }, { "epoch": 86.87, "learning_rate": 1.1265432098765432e-05, "loss": 0.0353, "step": 2150 }, { "epoch": 86.99, "eval_accuracy": 0.71875, "eval_loss": 2.095703363418579, "eval_runtime": 0.4511, "eval_samples_per_second": 70.942, "eval_steps_per_second": 4.434, "step": 2153 }, { "epoch": 87.27, "learning_rate": 1.1111111111111113e-05, "loss": 0.0482, "step": 2160 }, { "epoch": 87.68, "learning_rate": 1.0956790123456791e-05, "loss": 0.0634, "step": 2170 }, { "epoch": 88.0, "eval_accuracy": 0.65625, "eval_loss": 2.15714168548584, "eval_runtime": 0.4471, "eval_samples_per_second": 71.57, "eval_steps_per_second": 4.473, "step": 2178 }, { "epoch": 88.08, "learning_rate": 1.0802469135802469e-05, "loss": 0.0632, "step": 2180 }, { "epoch": 88.48, "learning_rate": 1.064814814814815e-05, "loss": 0.0504, "step": 2190 }, { "epoch": 88.89, "learning_rate": 1.0493827160493827e-05, "loss": 0.0477, "step": 2200 }, { "epoch": 88.97, "eval_accuracy": 0.6875, "eval_loss": 2.0384438037872314, "eval_runtime": 0.4271, "eval_samples_per_second": 74.924, "eval_steps_per_second": 4.683, "step": 2202 }, { "epoch": 89.29, "learning_rate": 1.0339506172839507e-05, "loss": 0.0446, "step": 2210 }, { "epoch": 89.7, "learning_rate": 1.0185185185185186e-05, "loss": 0.0513, "step": 2220 }, { "epoch": 89.98, "eval_accuracy": 0.75, "eval_loss": 1.9145632982254028, "eval_runtime": 0.4711, "eval_samples_per_second": 67.923, "eval_steps_per_second": 4.245, "step": 2227 }, { "epoch": 90.1, "learning_rate": 1.0030864197530866e-05, "loss": 0.0626, "step": 2230 }, { "epoch": 90.51, "learning_rate": 9.876543209876543e-06, "loss": 0.0354, "step": 2240 }, { "epoch": 90.91, "learning_rate": 9.722222222222223e-06, "loss": 0.0717, "step": 2250 }, { "epoch": 90.99, "eval_accuracy": 0.71875, "eval_loss": 1.8837898969650269, "eval_runtime": 0.4031, "eval_samples_per_second": 79.378, "eval_steps_per_second": 4.961, "step": 2252 }, { "epoch": 91.31, "learning_rate": 9.567901234567902e-06, "loss": 0.0486, "step": 2260 }, { "epoch": 91.72, "learning_rate": 9.413580246913581e-06, "loss": 0.0644, "step": 2270 }, { "epoch": 92.0, "eval_accuracy": 0.6875, "eval_loss": 1.9186292886734009, "eval_runtime": 0.4621, "eval_samples_per_second": 69.247, "eval_steps_per_second": 4.328, "step": 2277 }, { "epoch": 92.12, "learning_rate": 9.25925925925926e-06, "loss": 0.048, "step": 2280 }, { "epoch": 92.53, "learning_rate": 9.10493827160494e-06, "loss": 0.0711, "step": 2290 }, { "epoch": 92.93, "learning_rate": 8.950617283950618e-06, "loss": 0.0848, "step": 2300 }, { "epoch": 92.97, "eval_accuracy": 0.71875, "eval_loss": 1.882826805114746, "eval_runtime": 0.4741, "eval_samples_per_second": 67.495, "eval_steps_per_second": 4.218, "step": 2301 }, { "epoch": 93.33, "learning_rate": 8.796296296296297e-06, "loss": 0.038, "step": 2310 }, { "epoch": 93.74, "learning_rate": 8.641975308641975e-06, "loss": 0.0393, "step": 2320 }, { "epoch": 93.98, "eval_accuracy": 0.71875, "eval_loss": 1.9442168474197388, "eval_runtime": 0.4091, "eval_samples_per_second": 78.217, "eval_steps_per_second": 4.889, "step": 2326 }, { "epoch": 94.14, "learning_rate": 8.487654320987654e-06, "loss": 0.038, "step": 2330 }, { "epoch": 94.55, "learning_rate": 8.333333333333334e-06, "loss": 0.0703, "step": 2340 }, { "epoch": 94.95, "learning_rate": 8.179012345679013e-06, "loss": 0.046, "step": 2350 }, { "epoch": 94.99, "eval_accuracy": 0.71875, "eval_loss": 1.8865550756454468, "eval_runtime": 0.3931, "eval_samples_per_second": 81.403, "eval_steps_per_second": 5.088, "step": 2351 }, { "epoch": 95.35, "learning_rate": 8.024691358024692e-06, "loss": 0.0762, "step": 2360 }, { "epoch": 95.76, "learning_rate": 7.870370370370372e-06, "loss": 0.0487, "step": 2370 }, { "epoch": 96.0, "eval_accuracy": 0.6875, "eval_loss": 1.9787421226501465, "eval_runtime": 0.4221, "eval_samples_per_second": 75.807, "eval_steps_per_second": 4.738, "step": 2376 }, { "epoch": 96.16, "learning_rate": 7.71604938271605e-06, "loss": 0.0724, "step": 2380 }, { "epoch": 96.57, "learning_rate": 7.561728395061729e-06, "loss": 0.0563, "step": 2390 }, { "epoch": 96.97, "learning_rate": 7.4074074074074075e-06, "loss": 0.074, "step": 2400 }, { "epoch": 96.97, "eval_accuracy": 0.6875, "eval_loss": 2.0081026554107666, "eval_runtime": 0.6192, "eval_samples_per_second": 51.682, "eval_steps_per_second": 3.23, "step": 2400 }, { "epoch": 97.37, "learning_rate": 7.253086419753087e-06, "loss": 0.0624, "step": 2410 }, { "epoch": 97.78, "learning_rate": 7.098765432098766e-06, "loss": 0.0435, "step": 2420 }, { "epoch": 97.98, "eval_accuracy": 0.75, "eval_loss": 1.8838809728622437, "eval_runtime": 0.4211, "eval_samples_per_second": 75.988, "eval_steps_per_second": 4.749, "step": 2425 }, { "epoch": 98.18, "learning_rate": 6.944444444444445e-06, "loss": 0.0688, "step": 2430 }, { "epoch": 98.59, "learning_rate": 6.790123456790124e-06, "loss": 0.0441, "step": 2440 }, { "epoch": 98.99, "learning_rate": 6.635802469135803e-06, "loss": 0.0509, "step": 2450 }, { "epoch": 98.99, "eval_accuracy": 0.71875, "eval_loss": 1.9207700490951538, "eval_runtime": 0.4015, "eval_samples_per_second": 79.693, "eval_steps_per_second": 4.981, "step": 2450 }, { "epoch": 99.39, "learning_rate": 6.481481481481482e-06, "loss": 0.0557, "step": 2460 }, { "epoch": 99.8, "learning_rate": 6.3271604938271615e-06, "loss": 0.0571, "step": 2470 }, { "epoch": 100.0, "eval_accuracy": 0.71875, "eval_loss": 1.9770445823669434, "eval_runtime": 0.4191, "eval_samples_per_second": 76.363, "eval_steps_per_second": 4.773, "step": 2475 }, { "epoch": 100.2, "learning_rate": 6.17283950617284e-06, "loss": 0.0505, "step": 2480 }, { "epoch": 100.61, "learning_rate": 6.018518518518519e-06, "loss": 0.0327, "step": 2490 }, { "epoch": 100.97, "eval_accuracy": 0.71875, "eval_loss": 1.969954013824463, "eval_runtime": 0.5071, "eval_samples_per_second": 63.101, "eval_steps_per_second": 3.944, "step": 2499 }, { "epoch": 101.01, "learning_rate": 5.864197530864199e-06, "loss": 0.0472, "step": 2500 }, { "epoch": 101.41, "learning_rate": 5.7098765432098764e-06, "loss": 0.0512, "step": 2510 }, { "epoch": 101.82, "learning_rate": 5.555555555555557e-06, "loss": 0.0387, "step": 2520 }, { "epoch": 101.98, "eval_accuracy": 0.75, "eval_loss": 1.9250848293304443, "eval_runtime": 0.4241, "eval_samples_per_second": 75.45, "eval_steps_per_second": 4.716, "step": 2524 }, { "epoch": 102.22, "learning_rate": 5.401234567901234e-06, "loss": 0.0614, "step": 2530 }, { "epoch": 102.63, "learning_rate": 5.246913580246914e-06, "loss": 0.029, "step": 2540 }, { "epoch": 102.99, "eval_accuracy": 0.71875, "eval_loss": 1.9489786624908447, "eval_runtime": 0.4031, "eval_samples_per_second": 79.385, "eval_steps_per_second": 4.962, "step": 2549 }, { "epoch": 103.03, "learning_rate": 5.092592592592593e-06, "loss": 0.052, "step": 2550 }, { "epoch": 103.43, "learning_rate": 4.938271604938272e-06, "loss": 0.0696, "step": 2560 }, { "epoch": 103.84, "learning_rate": 4.783950617283951e-06, "loss": 0.0478, "step": 2570 }, { "epoch": 104.0, "eval_accuracy": 0.71875, "eval_loss": 1.9358038902282715, "eval_runtime": 0.4036, "eval_samples_per_second": 79.283, "eval_steps_per_second": 4.955, "step": 2574 }, { "epoch": 104.24, "learning_rate": 4.62962962962963e-06, "loss": 0.0358, "step": 2580 }, { "epoch": 104.65, "learning_rate": 4.475308641975309e-06, "loss": 0.0587, "step": 2590 }, { "epoch": 104.97, "eval_accuracy": 0.75, "eval_loss": 1.9197365045547485, "eval_runtime": 0.4296, "eval_samples_per_second": 74.488, "eval_steps_per_second": 4.655, "step": 2598 }, { "epoch": 105.05, "learning_rate": 4.3209876543209875e-06, "loss": 0.0417, "step": 2600 }, { "epoch": 105.45, "learning_rate": 4.166666666666667e-06, "loss": 0.0644, "step": 2610 }, { "epoch": 105.86, "learning_rate": 4.012345679012346e-06, "loss": 0.0523, "step": 2620 }, { "epoch": 105.98, "eval_accuracy": 0.71875, "eval_loss": 1.9309440851211548, "eval_runtime": 0.4621, "eval_samples_per_second": 69.248, "eval_steps_per_second": 4.328, "step": 2623 }, { "epoch": 106.26, "learning_rate": 3.858024691358025e-06, "loss": 0.0365, "step": 2630 }, { "epoch": 106.67, "learning_rate": 3.7037037037037037e-06, "loss": 0.0581, "step": 2640 }, { "epoch": 106.99, "eval_accuracy": 0.71875, "eval_loss": 1.9828894138336182, "eval_runtime": 0.4227, "eval_samples_per_second": 75.712, "eval_steps_per_second": 4.732, "step": 2648 }, { "epoch": 107.07, "learning_rate": 3.549382716049383e-06, "loss": 0.0459, "step": 2650 }, { "epoch": 107.47, "learning_rate": 3.395061728395062e-06, "loss": 0.031, "step": 2660 }, { "epoch": 107.88, "learning_rate": 3.240740740740741e-06, "loss": 0.0352, "step": 2670 }, { "epoch": 108.0, "eval_accuracy": 0.6875, "eval_loss": 2.00472354888916, "eval_runtime": 0.4442, "eval_samples_per_second": 72.042, "eval_steps_per_second": 4.503, "step": 2673 }, { "epoch": 108.28, "learning_rate": 3.08641975308642e-06, "loss": 0.0553, "step": 2680 }, { "epoch": 108.69, "learning_rate": 2.9320987654320994e-06, "loss": 0.0373, "step": 2690 }, { "epoch": 108.97, "eval_accuracy": 0.71875, "eval_loss": 1.9897255897521973, "eval_runtime": 0.4256, "eval_samples_per_second": 75.187, "eval_steps_per_second": 4.699, "step": 2697 }, { "epoch": 109.09, "learning_rate": 2.7777777777777783e-06, "loss": 0.049, "step": 2700 }, { "epoch": 109.49, "learning_rate": 2.623456790123457e-06, "loss": 0.0294, "step": 2710 }, { "epoch": 109.9, "learning_rate": 2.469135802469136e-06, "loss": 0.0258, "step": 2720 }, { "epoch": 109.98, "eval_accuracy": 0.71875, "eval_loss": 1.938352346420288, "eval_runtime": 0.4216, "eval_samples_per_second": 75.898, "eval_steps_per_second": 4.744, "step": 2722 }, { "epoch": 110.3, "learning_rate": 2.314814814814815e-06, "loss": 0.0484, "step": 2730 }, { "epoch": 110.71, "learning_rate": 2.1604938271604937e-06, "loss": 0.039, "step": 2740 }, { "epoch": 110.99, "eval_accuracy": 0.6875, "eval_loss": 1.9356164932250977, "eval_runtime": 0.4076, "eval_samples_per_second": 78.509, "eval_steps_per_second": 4.907, "step": 2747 }, { "epoch": 111.11, "learning_rate": 2.006172839506173e-06, "loss": 0.0326, "step": 2750 }, { "epoch": 111.52, "learning_rate": 1.8518518518518519e-06, "loss": 0.066, "step": 2760 }, { "epoch": 111.92, "learning_rate": 1.697530864197531e-06, "loss": 0.0333, "step": 2770 }, { "epoch": 112.0, "eval_accuracy": 0.71875, "eval_loss": 1.980454683303833, "eval_runtime": 0.4112, "eval_samples_per_second": 77.828, "eval_steps_per_second": 4.864, "step": 2772 }, { "epoch": 112.32, "learning_rate": 1.54320987654321e-06, "loss": 0.0524, "step": 2780 }, { "epoch": 112.73, "learning_rate": 1.3888888888888892e-06, "loss": 0.0641, "step": 2790 }, { "epoch": 112.97, "eval_accuracy": 0.6875, "eval_loss": 1.9813566207885742, "eval_runtime": 0.5541, "eval_samples_per_second": 57.748, "eval_steps_per_second": 3.609, "step": 2796 }, { "epoch": 113.13, "learning_rate": 1.234567901234568e-06, "loss": 0.0505, "step": 2800 }, { "epoch": 113.54, "learning_rate": 1.0802469135802469e-06, "loss": 0.0222, "step": 2810 }, { "epoch": 113.94, "learning_rate": 9.259259259259259e-07, "loss": 0.0649, "step": 2820 }, { "epoch": 113.98, "eval_accuracy": 0.6875, "eval_loss": 1.972588062286377, "eval_runtime": 0.4431, "eval_samples_per_second": 72.211, "eval_steps_per_second": 4.513, "step": 2821 }, { "epoch": 114.34, "learning_rate": 7.71604938271605e-07, "loss": 0.0354, "step": 2830 }, { "epoch": 114.75, "learning_rate": 6.17283950617284e-07, "loss": 0.0241, "step": 2840 }, { "epoch": 114.99, "eval_accuracy": 0.6875, "eval_loss": 1.9736533164978027, "eval_runtime": 0.4011, "eval_samples_per_second": 79.777, "eval_steps_per_second": 4.986, "step": 2846 }, { "epoch": 115.15, "learning_rate": 4.6296296296296297e-07, "loss": 0.0418, "step": 2850 }, { "epoch": 115.56, "learning_rate": 3.08641975308642e-07, "loss": 0.0382, "step": 2860 }, { "epoch": 115.96, "learning_rate": 1.54320987654321e-07, "loss": 0.0356, "step": 2870 }, { "epoch": 116.0, "eval_accuracy": 0.6875, "eval_loss": 1.985576868057251, "eval_runtime": 0.4721, "eval_samples_per_second": 67.78, "eval_steps_per_second": 4.236, "step": 2871 }, { "epoch": 116.36, "learning_rate": 0.0, "loss": 0.0601, "step": 2880 }, { "epoch": 116.36, "eval_accuracy": 0.6875, "eval_loss": 1.9852969646453857, "eval_runtime": 0.4181, "eval_samples_per_second": 76.535, "eval_steps_per_second": 4.783, "step": 2880 }, { "epoch": 116.36, "step": 2880, "total_flos": 5.970642494234296e+18, "train_loss": 0.4532645735475752, "train_runtime": 3508.1723, "train_samples_per_second": 53.943, "train_steps_per_second": 0.821 } ], "logging_steps": 10, "max_steps": 2880, "num_input_tokens_seen": 0, "num_train_epochs": 120, "save_steps": 500, "total_flos": 5.970642494234296e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }