{ "best_metric": 0.15389865636825562, "best_model_checkpoint": "mobilevitv2-1.0-imagenet1k-256-finetuned_v2024-10-21-frost/checkpoint-1000", "epoch": 30.0, "eval_steps": 100, "global_step": 1710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17543859649122806, "grad_norm": 0.3124828040599823, "learning_rate": 1.1695906432748537e-05, "loss": 0.6955, "step": 10 }, { "epoch": 0.3508771929824561, "grad_norm": 0.24917739629745483, "learning_rate": 2.3391812865497074e-05, "loss": 0.6942, "step": 20 }, { "epoch": 0.5263157894736842, "grad_norm": 0.2268371284008026, "learning_rate": 3.508771929824561e-05, "loss": 0.6939, "step": 30 }, { "epoch": 0.7017543859649122, "grad_norm": 0.2435961812734604, "learning_rate": 4.678362573099415e-05, "loss": 0.6918, "step": 40 }, { "epoch": 0.8771929824561403, "grad_norm": 0.24638999998569489, "learning_rate": 5.847953216374269e-05, "loss": 0.6889, "step": 50 }, { "epoch": 1.0526315789473684, "grad_norm": 0.2426590472459793, "learning_rate": 7.017543859649122e-05, "loss": 0.6854, "step": 60 }, { "epoch": 1.2280701754385965, "grad_norm": 0.26534757018089294, "learning_rate": 8.187134502923976e-05, "loss": 0.6803, "step": 70 }, { "epoch": 1.4035087719298245, "grad_norm": 0.2573549449443817, "learning_rate": 9.35672514619883e-05, "loss": 0.6763, "step": 80 }, { "epoch": 1.5789473684210527, "grad_norm": 0.2639031410217285, "learning_rate": 0.00010526315789473685, "loss": 0.6701, "step": 90 }, { "epoch": 1.7543859649122808, "grad_norm": 0.26114630699157715, "learning_rate": 0.00011695906432748539, "loss": 0.6635, "step": 100 }, { "epoch": 1.7543859649122808, "eval_accuracy": 0.7604444444444445, "eval_f1": 0.5705179282868525, "eval_loss": 0.6512863039970398, "eval_precision": 0.43552311435523117, "eval_recall": 0.8267898383371824, "eval_runtime": 2.9095, "eval_samples_per_second": 77.332, "eval_steps_per_second": 9.967, "step": 100 }, { "epoch": 1.9298245614035088, "grad_norm": 0.3371104896068573, "learning_rate": 0.0001286549707602339, "loss": 0.6502, "step": 110 }, { "epoch": 2.1052631578947367, "grad_norm": 0.31244638562202454, "learning_rate": 0.00014035087719298245, "loss": 0.6343, "step": 120 }, { "epoch": 2.280701754385965, "grad_norm": 0.47065746784210205, "learning_rate": 0.00015204678362573098, "loss": 0.6161, "step": 130 }, { "epoch": 2.456140350877193, "grad_norm": 0.41640815138816833, "learning_rate": 0.00016374269005847952, "loss": 0.588, "step": 140 }, { "epoch": 2.6315789473684212, "grad_norm": 0.34670090675354004, "learning_rate": 0.00017543859649122806, "loss": 0.5565, "step": 150 }, { "epoch": 2.807017543859649, "grad_norm": 0.384328693151474, "learning_rate": 0.0001871345029239766, "loss": 0.5242, "step": 160 }, { "epoch": 2.982456140350877, "grad_norm": 0.4133964478969574, "learning_rate": 0.00019883040935672513, "loss": 0.5158, "step": 170 }, { "epoch": 3.1578947368421053, "grad_norm": 0.4693595767021179, "learning_rate": 0.00019883040935672513, "loss": 0.4658, "step": 180 }, { "epoch": 3.3333333333333335, "grad_norm": 0.41811782121658325, "learning_rate": 0.00019753086419753085, "loss": 0.4297, "step": 190 }, { "epoch": 3.5087719298245617, "grad_norm": 0.8540976643562317, "learning_rate": 0.00019623131903833657, "loss": 0.4461, "step": 200 }, { "epoch": 3.5087719298245617, "eval_accuracy": 0.8768888888888889, "eval_f1": 0.729227761485826, "eval_loss": 0.3972250819206238, "eval_precision": 0.6322033898305085, "eval_recall": 0.8614318706697459, "eval_runtime": 1.766, "eval_samples_per_second": 127.406, "eval_steps_per_second": 16.421, "step": 200 }, { "epoch": 3.6842105263157894, "grad_norm": 0.8259305357933044, "learning_rate": 0.0001949317738791423, "loss": 0.3914, "step": 210 }, { "epoch": 3.8596491228070176, "grad_norm": 0.8546284437179565, "learning_rate": 0.00019363222871994802, "loss": 0.384, "step": 220 }, { "epoch": 4.035087719298246, "grad_norm": 0.3827027678489685, "learning_rate": 0.00019233268356075374, "loss": 0.3497, "step": 230 }, { "epoch": 4.2105263157894735, "grad_norm": 0.6248043775558472, "learning_rate": 0.00019103313840155946, "loss": 0.3648, "step": 240 }, { "epoch": 4.385964912280702, "grad_norm": 0.5684685111045837, "learning_rate": 0.00018973359324236518, "loss": 0.3112, "step": 250 }, { "epoch": 4.56140350877193, "grad_norm": 0.5080260634422302, "learning_rate": 0.0001884340480831709, "loss": 0.3059, "step": 260 }, { "epoch": 4.7368421052631575, "grad_norm": 0.5282370448112488, "learning_rate": 0.0001871345029239766, "loss": 0.2922, "step": 270 }, { "epoch": 4.912280701754386, "grad_norm": 0.7253307104110718, "learning_rate": 0.00018583495776478232, "loss": 0.2909, "step": 280 }, { "epoch": 5.087719298245614, "grad_norm": 0.7058104276657104, "learning_rate": 0.00018453541260558804, "loss": 0.2922, "step": 290 }, { "epoch": 5.2631578947368425, "grad_norm": 1.1993378400802612, "learning_rate": 0.00018323586744639376, "loss": 0.2599, "step": 300 }, { "epoch": 5.2631578947368425, "eval_accuracy": 0.9226666666666666, "eval_f1": 0.804932735426009, "eval_loss": 0.2404223531484604, "eval_precision": 0.7821350762527233, "eval_recall": 0.8290993071593533, "eval_runtime": 2.7313, "eval_samples_per_second": 82.378, "eval_steps_per_second": 10.618, "step": 300 }, { "epoch": 5.43859649122807, "grad_norm": 0.8134835362434387, "learning_rate": 0.00018193632228719948, "loss": 0.2645, "step": 310 }, { "epoch": 5.614035087719298, "grad_norm": 0.7742730975151062, "learning_rate": 0.0001806367771280052, "loss": 0.2345, "step": 320 }, { "epoch": 5.7894736842105265, "grad_norm": 0.5191880464553833, "learning_rate": 0.00017933723196881092, "loss": 0.2504, "step": 330 }, { "epoch": 5.964912280701754, "grad_norm": 0.7682189345359802, "learning_rate": 0.00017803768680961664, "loss": 0.2654, "step": 340 }, { "epoch": 6.140350877192983, "grad_norm": 0.7704707384109497, "learning_rate": 0.00017673814165042236, "loss": 0.2431, "step": 350 }, { "epoch": 6.315789473684211, "grad_norm": 0.9333469867706299, "learning_rate": 0.00017543859649122806, "loss": 0.2382, "step": 360 }, { "epoch": 6.491228070175438, "grad_norm": 0.8412513136863708, "learning_rate": 0.00017413905133203378, "loss": 0.2207, "step": 370 }, { "epoch": 6.666666666666667, "grad_norm": 0.7568041086196899, "learning_rate": 0.0001728395061728395, "loss": 0.2271, "step": 380 }, { "epoch": 6.842105263157895, "grad_norm": 0.689445436000824, "learning_rate": 0.00017153996101364522, "loss": 0.2076, "step": 390 }, { "epoch": 7.017543859649122, "grad_norm": 0.7390238046646118, "learning_rate": 0.00017024041585445094, "loss": 0.2074, "step": 400 }, { "epoch": 7.017543859649122, "eval_accuracy": 0.9346666666666666, "eval_f1": 0.8256227758007118, "eval_loss": 0.1941838562488556, "eval_precision": 0.848780487804878, "eval_recall": 0.8036951501154734, "eval_runtime": 1.7733, "eval_samples_per_second": 126.88, "eval_steps_per_second": 16.353, "step": 400 }, { "epoch": 7.192982456140351, "grad_norm": 0.4645775258541107, "learning_rate": 0.00016894087069525666, "loss": 0.2233, "step": 410 }, { "epoch": 7.368421052631579, "grad_norm": 0.6826916337013245, "learning_rate": 0.00016764132553606238, "loss": 0.1846, "step": 420 }, { "epoch": 7.543859649122807, "grad_norm": 0.6299170851707458, "learning_rate": 0.0001663417803768681, "loss": 0.1807, "step": 430 }, { "epoch": 7.719298245614035, "grad_norm": 0.40688008069992065, "learning_rate": 0.00016504223521767383, "loss": 0.1925, "step": 440 }, { "epoch": 7.894736842105263, "grad_norm": 0.8310642242431641, "learning_rate": 0.00016374269005847952, "loss": 0.1906, "step": 450 }, { "epoch": 8.070175438596491, "grad_norm": 0.7561126351356506, "learning_rate": 0.00016244314489928524, "loss": 0.2537, "step": 460 }, { "epoch": 8.24561403508772, "grad_norm": 1.5505608320236206, "learning_rate": 0.00016114359974009096, "loss": 0.2134, "step": 470 }, { "epoch": 8.421052631578947, "grad_norm": 0.5844523310661316, "learning_rate": 0.00015984405458089668, "loss": 0.1927, "step": 480 }, { "epoch": 8.596491228070175, "grad_norm": 0.6846328377723694, "learning_rate": 0.0001585445094217024, "loss": 0.1843, "step": 490 }, { "epoch": 8.771929824561404, "grad_norm": 0.5246126651763916, "learning_rate": 0.00015724496426250813, "loss": 0.167, "step": 500 }, { "epoch": 8.771929824561404, "eval_accuracy": 0.9364444444444444, "eval_f1": 0.8354430379746836, "eval_loss": 0.17720411717891693, "eval_precision": 0.8325688073394495, "eval_recall": 0.8383371824480369, "eval_runtime": 2.7456, "eval_samples_per_second": 81.95, "eval_steps_per_second": 10.562, "step": 500 }, { "epoch": 8.947368421052632, "grad_norm": 0.9557002782821655, "learning_rate": 0.00015594541910331385, "loss": 0.1752, "step": 510 }, { "epoch": 9.12280701754386, "grad_norm": 1.115300178527832, "learning_rate": 0.00015464587394411957, "loss": 0.2, "step": 520 }, { "epoch": 9.298245614035087, "grad_norm": 0.6540657877922058, "learning_rate": 0.00015334632878492526, "loss": 0.158, "step": 530 }, { "epoch": 9.473684210526315, "grad_norm": 0.8491069078445435, "learning_rate": 0.00015204678362573098, "loss": 0.1813, "step": 540 }, { "epoch": 9.649122807017545, "grad_norm": 1.3543705940246582, "learning_rate": 0.0001507472384665367, "loss": 0.1951, "step": 550 }, { "epoch": 9.824561403508772, "grad_norm": 0.8627998232841492, "learning_rate": 0.00014944769330734243, "loss": 0.1945, "step": 560 }, { "epoch": 10.0, "grad_norm": 1.2822953462600708, "learning_rate": 0.00014814814814814815, "loss": 0.1591, "step": 570 }, { "epoch": 10.175438596491228, "grad_norm": 0.6904670596122742, "learning_rate": 0.00014684860298895387, "loss": 0.1545, "step": 580 }, { "epoch": 10.350877192982455, "grad_norm": 1.3155221939086914, "learning_rate": 0.0001455490578297596, "loss": 0.1385, "step": 590 }, { "epoch": 10.526315789473685, "grad_norm": 0.8683547973632812, "learning_rate": 0.0001442495126705653, "loss": 0.1661, "step": 600 }, { "epoch": 10.526315789473685, "eval_accuracy": 0.9342222222222222, "eval_f1": 0.8258823529411765, "eval_loss": 0.16532927751541138, "eval_precision": 0.841726618705036, "eval_recall": 0.8106235565819861, "eval_runtime": 1.7784, "eval_samples_per_second": 126.515, "eval_steps_per_second": 16.306, "step": 600 }, { "epoch": 10.701754385964913, "grad_norm": 0.7406933307647705, "learning_rate": 0.00014294996751137103, "loss": 0.1569, "step": 610 }, { "epoch": 10.87719298245614, "grad_norm": 1.5100739002227783, "learning_rate": 0.00014165042235217672, "loss": 0.1873, "step": 620 }, { "epoch": 11.052631578947368, "grad_norm": 0.8658424019813538, "learning_rate": 0.00014035087719298245, "loss": 0.1771, "step": 630 }, { "epoch": 11.228070175438596, "grad_norm": 0.761426568031311, "learning_rate": 0.00013905133203378817, "loss": 0.1522, "step": 640 }, { "epoch": 11.403508771929825, "grad_norm": 0.6994770765304565, "learning_rate": 0.0001377517868745939, "loss": 0.1462, "step": 650 }, { "epoch": 11.578947368421053, "grad_norm": 0.6044259071350098, "learning_rate": 0.0001364522417153996, "loss": 0.1688, "step": 660 }, { "epoch": 11.75438596491228, "grad_norm": 0.6377450227737427, "learning_rate": 0.00013515269655620533, "loss": 0.1726, "step": 670 }, { "epoch": 11.929824561403509, "grad_norm": 0.45792627334594727, "learning_rate": 0.00013385315139701105, "loss": 0.1578, "step": 680 }, { "epoch": 12.105263157894736, "grad_norm": 0.5658883452415466, "learning_rate": 0.00013255360623781677, "loss": 0.1528, "step": 690 }, { "epoch": 12.280701754385966, "grad_norm": 0.568031370639801, "learning_rate": 0.0001312540610786225, "loss": 0.1603, "step": 700 }, { "epoch": 12.280701754385966, "eval_accuracy": 0.9408888888888889, "eval_f1": 0.8473019517795637, "eval_loss": 0.16492225229740143, "eval_precision": 0.8424657534246576, "eval_recall": 0.8521939953810623, "eval_runtime": 2.4488, "eval_samples_per_second": 91.883, "eval_steps_per_second": 11.843, "step": 700 }, { "epoch": 12.456140350877194, "grad_norm": 0.8529219031333923, "learning_rate": 0.0001299545159194282, "loss": 0.1438, "step": 710 }, { "epoch": 12.631578947368421, "grad_norm": 0.7960824370384216, "learning_rate": 0.0001286549707602339, "loss": 0.1245, "step": 720 }, { "epoch": 12.807017543859649, "grad_norm": 0.8270284533500671, "learning_rate": 0.00012748538011695908, "loss": 0.1775, "step": 730 }, { "epoch": 12.982456140350877, "grad_norm": 0.407463014125824, "learning_rate": 0.0001261858349577648, "loss": 0.1583, "step": 740 }, { "epoch": 13.157894736842104, "grad_norm": 1.2405822277069092, "learning_rate": 0.0001248862897985705, "loss": 0.1412, "step": 750 }, { "epoch": 13.333333333333334, "grad_norm": 0.7762990593910217, "learning_rate": 0.00012358674463937622, "loss": 0.137, "step": 760 }, { "epoch": 13.508771929824562, "grad_norm": 0.7772154808044434, "learning_rate": 0.00012228719948018194, "loss": 0.1618, "step": 770 }, { "epoch": 13.68421052631579, "grad_norm": 0.3346017599105835, "learning_rate": 0.00012098765432098766, "loss": 0.1276, "step": 780 }, { "epoch": 13.859649122807017, "grad_norm": 0.7661828994750977, "learning_rate": 0.00011968810916179338, "loss": 0.1606, "step": 790 }, { "epoch": 14.035087719298245, "grad_norm": 1.2454911470413208, "learning_rate": 0.0001183885640025991, "loss": 0.1523, "step": 800 }, { "epoch": 14.035087719298245, "eval_accuracy": 0.9466666666666667, "eval_f1": 0.8591549295774648, "eval_loss": 0.15682315826416016, "eval_precision": 0.8735083532219571, "eval_recall": 0.8452655889145496, "eval_runtime": 1.8011, "eval_samples_per_second": 124.926, "eval_steps_per_second": 16.102, "step": 800 }, { "epoch": 14.210526315789474, "grad_norm": 3.0044612884521484, "learning_rate": 0.00011708901884340481, "loss": 0.1331, "step": 810 }, { "epoch": 14.385964912280702, "grad_norm": 0.7117482423782349, "learning_rate": 0.00011578947368421053, "loss": 0.1619, "step": 820 }, { "epoch": 14.56140350877193, "grad_norm": 0.6939218044281006, "learning_rate": 0.00011448992852501626, "loss": 0.1531, "step": 830 }, { "epoch": 14.736842105263158, "grad_norm": 0.5622960329055786, "learning_rate": 0.00011319038336582198, "loss": 0.131, "step": 840 }, { "epoch": 14.912280701754385, "grad_norm": 0.9399430155754089, "learning_rate": 0.0001118908382066277, "loss": 0.1276, "step": 850 }, { "epoch": 15.087719298245615, "grad_norm": 1.6480320692062378, "learning_rate": 0.0001105912930474334, "loss": 0.1656, "step": 860 }, { "epoch": 15.263157894736842, "grad_norm": 0.7238647937774658, "learning_rate": 0.00010929174788823913, "loss": 0.1261, "step": 870 }, { "epoch": 15.43859649122807, "grad_norm": 1.0423846244812012, "learning_rate": 0.00010799220272904485, "loss": 0.1328, "step": 880 }, { "epoch": 15.614035087719298, "grad_norm": 1.1374431848526, "learning_rate": 0.00010669265756985057, "loss": 0.1427, "step": 890 }, { "epoch": 15.789473684210526, "grad_norm": 0.7375030517578125, "learning_rate": 0.00010539311241065628, "loss": 0.1506, "step": 900 }, { "epoch": 15.789473684210526, "eval_accuracy": 0.9431111111111111, "eval_f1": 0.8494117647058823, "eval_loss": 0.15481138229370117, "eval_precision": 0.8657074340527577, "eval_recall": 0.8337182448036952, "eval_runtime": 1.8243, "eval_samples_per_second": 123.334, "eval_steps_per_second": 15.896, "step": 900 }, { "epoch": 15.964912280701755, "grad_norm": 0.7035567164421082, "learning_rate": 0.000104093567251462, "loss": 0.1324, "step": 910 }, { "epoch": 16.140350877192983, "grad_norm": 0.6969211101531982, "learning_rate": 0.00010279402209226772, "loss": 0.1257, "step": 920 }, { "epoch": 16.31578947368421, "grad_norm": 0.3633826673030853, "learning_rate": 0.00010149447693307344, "loss": 0.1306, "step": 930 }, { "epoch": 16.49122807017544, "grad_norm": 0.8118802309036255, "learning_rate": 0.00010019493177387915, "loss": 0.1091, "step": 940 }, { "epoch": 16.666666666666668, "grad_norm": 0.6684471964836121, "learning_rate": 9.889538661468485e-05, "loss": 0.1323, "step": 950 }, { "epoch": 16.842105263157894, "grad_norm": 0.6080668568611145, "learning_rate": 9.759584145549058e-05, "loss": 0.1168, "step": 960 }, { "epoch": 17.017543859649123, "grad_norm": 0.7799493670463562, "learning_rate": 9.62962962962963e-05, "loss": 0.141, "step": 970 }, { "epoch": 17.19298245614035, "grad_norm": 0.5670738816261292, "learning_rate": 9.499675113710202e-05, "loss": 0.1244, "step": 980 }, { "epoch": 17.36842105263158, "grad_norm": 0.9652756452560425, "learning_rate": 9.369720597790773e-05, "loss": 0.1354, "step": 990 }, { "epoch": 17.54385964912281, "grad_norm": 0.8537412881851196, "learning_rate": 9.239766081871345e-05, "loss": 0.1485, "step": 1000 }, { "epoch": 17.54385964912281, "eval_accuracy": 0.9444444444444444, "eval_f1": 0.8544819557625145, "eval_loss": 0.15389865636825562, "eval_precision": 0.8615023474178404, "eval_recall": 0.8475750577367206, "eval_runtime": 1.7887, "eval_samples_per_second": 125.789, "eval_steps_per_second": 16.213, "step": 1000 }, { "epoch": 17.719298245614034, "grad_norm": 0.9258742928504944, "learning_rate": 9.109811565951917e-05, "loss": 0.1284, "step": 1010 }, { "epoch": 17.894736842105264, "grad_norm": 0.6817509531974792, "learning_rate": 8.979857050032489e-05, "loss": 0.1226, "step": 1020 }, { "epoch": 18.07017543859649, "grad_norm": 0.8437041640281677, "learning_rate": 8.849902534113061e-05, "loss": 0.1527, "step": 1030 }, { "epoch": 18.24561403508772, "grad_norm": 1.2362749576568604, "learning_rate": 8.719948018193632e-05, "loss": 0.1224, "step": 1040 }, { "epoch": 18.42105263157895, "grad_norm": 0.4136218726634979, "learning_rate": 8.589993502274204e-05, "loss": 0.1293, "step": 1050 }, { "epoch": 18.596491228070175, "grad_norm": 0.8913040161132812, "learning_rate": 8.460038986354776e-05, "loss": 0.1305, "step": 1060 }, { "epoch": 18.771929824561404, "grad_norm": 1.0768448114395142, "learning_rate": 8.330084470435348e-05, "loss": 0.1134, "step": 1070 }, { "epoch": 18.94736842105263, "grad_norm": 0.9289010763168335, "learning_rate": 8.200129954515919e-05, "loss": 0.1551, "step": 1080 }, { "epoch": 19.12280701754386, "grad_norm": 0.4481465220451355, "learning_rate": 8.070175438596491e-05, "loss": 0.1263, "step": 1090 }, { "epoch": 19.29824561403509, "grad_norm": 0.7408900260925293, "learning_rate": 7.940220922677063e-05, "loss": 0.1263, "step": 1100 }, { "epoch": 19.29824561403509, "eval_accuracy": 0.944, "eval_f1": 0.8534883720930233, "eval_loss": 0.15210777521133423, "eval_precision": 0.8594847775175644, "eval_recall": 0.8475750577367206, "eval_runtime": 1.7885, "eval_samples_per_second": 125.802, "eval_steps_per_second": 16.214, "step": 1100 }, { "epoch": 19.473684210526315, "grad_norm": 0.8939012289047241, "learning_rate": 7.810266406757635e-05, "loss": 0.1206, "step": 1110 }, { "epoch": 19.649122807017545, "grad_norm": 0.6809560656547546, "learning_rate": 7.680311890838207e-05, "loss": 0.1225, "step": 1120 }, { "epoch": 19.82456140350877, "grad_norm": 1.1481623649597168, "learning_rate": 7.550357374918778e-05, "loss": 0.1291, "step": 1130 }, { "epoch": 20.0, "grad_norm": 2.0011980533599854, "learning_rate": 7.42040285899935e-05, "loss": 0.1482, "step": 1140 }, { "epoch": 20.17543859649123, "grad_norm": 0.6619019508361816, "learning_rate": 7.290448343079922e-05, "loss": 0.1123, "step": 1150 }, { "epoch": 20.350877192982455, "grad_norm": 0.796700656414032, "learning_rate": 7.160493827160494e-05, "loss": 0.1166, "step": 1160 }, { "epoch": 20.526315789473685, "grad_norm": 0.9634900689125061, "learning_rate": 7.030539311241065e-05, "loss": 0.1263, "step": 1170 }, { "epoch": 20.70175438596491, "grad_norm": 0.505535900592804, "learning_rate": 6.900584795321637e-05, "loss": 0.1117, "step": 1180 }, { "epoch": 20.87719298245614, "grad_norm": 0.5166471600532532, "learning_rate": 6.770630279402209e-05, "loss": 0.1279, "step": 1190 }, { "epoch": 21.05263157894737, "grad_norm": 1.2773476839065552, "learning_rate": 6.640675763482781e-05, "loss": 0.1444, "step": 1200 }, { "epoch": 21.05263157894737, "eval_accuracy": 0.9417777777777778, "eval_f1": 0.8471411901983664, "eval_loss": 0.155166357755661, "eval_precision": 0.8561320754716981, "eval_recall": 0.8383371824480369, "eval_runtime": 2.37, "eval_samples_per_second": 94.937, "eval_steps_per_second": 12.236, "step": 1200 }, { "epoch": 21.228070175438596, "grad_norm": 0.793021559715271, "learning_rate": 6.510721247563352e-05, "loss": 0.1168, "step": 1210 }, { "epoch": 21.403508771929825, "grad_norm": 1.2551689147949219, "learning_rate": 6.380766731643924e-05, "loss": 0.1089, "step": 1220 }, { "epoch": 21.57894736842105, "grad_norm": 0.6803563237190247, "learning_rate": 6.250812215724496e-05, "loss": 0.1186, "step": 1230 }, { "epoch": 21.75438596491228, "grad_norm": 1.2632770538330078, "learning_rate": 6.120857699805068e-05, "loss": 0.1116, "step": 1240 }, { "epoch": 21.92982456140351, "grad_norm": 0.525141716003418, "learning_rate": 5.99090318388564e-05, "loss": 0.0979, "step": 1250 }, { "epoch": 22.105263157894736, "grad_norm": 0.5942980647087097, "learning_rate": 5.860948667966212e-05, "loss": 0.1483, "step": 1260 }, { "epoch": 22.280701754385966, "grad_norm": 1.0624207258224487, "learning_rate": 5.7309941520467835e-05, "loss": 0.1155, "step": 1270 }, { "epoch": 22.45614035087719, "grad_norm": 0.6244792938232422, "learning_rate": 5.6010396361273556e-05, "loss": 0.1159, "step": 1280 }, { "epoch": 22.63157894736842, "grad_norm": 1.9767743349075317, "learning_rate": 5.471085120207927e-05, "loss": 0.1165, "step": 1290 }, { "epoch": 22.80701754385965, "grad_norm": 2.270113468170166, "learning_rate": 5.341130604288499e-05, "loss": 0.1133, "step": 1300 }, { "epoch": 22.80701754385965, "eval_accuracy": 0.9448888888888889, "eval_f1": 0.8561484918793504, "eval_loss": 0.1531468778848648, "eval_precision": 0.8601398601398601, "eval_recall": 0.8521939953810623, "eval_runtime": 4.5112, "eval_samples_per_second": 49.875, "eval_steps_per_second": 6.428, "step": 1300 }, { "epoch": 22.982456140350877, "grad_norm": 2.3252851963043213, "learning_rate": 5.2111760883690706e-05, "loss": 0.1018, "step": 1310 }, { "epoch": 23.157894736842106, "grad_norm": 1.3282454013824463, "learning_rate": 5.081221572449643e-05, "loss": 0.1194, "step": 1320 }, { "epoch": 23.333333333333332, "grad_norm": 0.652642548084259, "learning_rate": 4.951267056530214e-05, "loss": 0.1016, "step": 1330 }, { "epoch": 23.50877192982456, "grad_norm": 1.584074854850769, "learning_rate": 4.821312540610786e-05, "loss": 0.1109, "step": 1340 }, { "epoch": 23.68421052631579, "grad_norm": 0.5799722075462341, "learning_rate": 4.691358024691358e-05, "loss": 0.0901, "step": 1350 }, { "epoch": 23.859649122807017, "grad_norm": 1.9589979648590088, "learning_rate": 4.56140350877193e-05, "loss": 0.1195, "step": 1360 }, { "epoch": 24.035087719298247, "grad_norm": 0.784710705280304, "learning_rate": 4.431448992852502e-05, "loss": 0.1318, "step": 1370 }, { "epoch": 24.210526315789473, "grad_norm": 1.0715792179107666, "learning_rate": 4.301494476933073e-05, "loss": 0.1236, "step": 1380 }, { "epoch": 24.385964912280702, "grad_norm": 0.8761755228042603, "learning_rate": 4.1715399610136454e-05, "loss": 0.1076, "step": 1390 }, { "epoch": 24.56140350877193, "grad_norm": 0.8874859809875488, "learning_rate": 4.041585445094217e-05, "loss": 0.1019, "step": 1400 }, { "epoch": 24.56140350877193, "eval_accuracy": 0.9431111111111111, "eval_f1": 0.8490566037735849, "eval_loss": 0.15768744051456451, "eval_precision": 0.8674698795180723, "eval_recall": 0.8314087759815243, "eval_runtime": 1.817, "eval_samples_per_second": 123.828, "eval_steps_per_second": 15.96, "step": 1400 }, { "epoch": 24.736842105263158, "grad_norm": 0.569615364074707, "learning_rate": 3.911630929174789e-05, "loss": 0.1114, "step": 1410 }, { "epoch": 24.912280701754387, "grad_norm": 0.4636388123035431, "learning_rate": 3.7816764132553604e-05, "loss": 0.1016, "step": 1420 }, { "epoch": 25.087719298245613, "grad_norm": 0.7966068983078003, "learning_rate": 3.6517218973359325e-05, "loss": 0.1181, "step": 1430 }, { "epoch": 25.263157894736842, "grad_norm": 0.7331326603889465, "learning_rate": 3.521767381416504e-05, "loss": 0.1037, "step": 1440 }, { "epoch": 25.43859649122807, "grad_norm": 1.1376439332962036, "learning_rate": 3.391812865497076e-05, "loss": 0.091, "step": 1450 }, { "epoch": 25.614035087719298, "grad_norm": 0.43491020798683167, "learning_rate": 3.2618583495776475e-05, "loss": 0.102, "step": 1460 }, { "epoch": 25.789473684210527, "grad_norm": 0.9410120844841003, "learning_rate": 3.1319038336582196e-05, "loss": 0.1108, "step": 1470 }, { "epoch": 25.964912280701753, "grad_norm": 0.9321810603141785, "learning_rate": 3.0019493177387914e-05, "loss": 0.1059, "step": 1480 }, { "epoch": 26.140350877192983, "grad_norm": 0.5571371912956238, "learning_rate": 2.871994801819363e-05, "loss": 0.0926, "step": 1490 }, { "epoch": 26.31578947368421, "grad_norm": 1.9081007242202759, "learning_rate": 2.742040285899935e-05, "loss": 0.1141, "step": 1500 }, { "epoch": 26.31578947368421, "eval_accuracy": 0.9413333333333334, "eval_f1": 0.8472222222222222, "eval_loss": 0.15601032972335815, "eval_precision": 0.8491879350348028, "eval_recall": 0.8452655889145496, "eval_runtime": 1.867, "eval_samples_per_second": 120.511, "eval_steps_per_second": 15.533, "step": 1500 }, { "epoch": 26.49122807017544, "grad_norm": 0.8356673121452332, "learning_rate": 2.6120857699805067e-05, "loss": 0.1077, "step": 1510 }, { "epoch": 26.666666666666668, "grad_norm": 1.3644295930862427, "learning_rate": 2.4821312540610784e-05, "loss": 0.1212, "step": 1520 }, { "epoch": 26.842105263157894, "grad_norm": 0.779222309589386, "learning_rate": 2.3521767381416506e-05, "loss": 0.1229, "step": 1530 }, { "epoch": 27.017543859649123, "grad_norm": 0.5873481631278992, "learning_rate": 2.2222222222222223e-05, "loss": 0.0998, "step": 1540 }, { "epoch": 27.19298245614035, "grad_norm": 0.9948704242706299, "learning_rate": 2.092267706302794e-05, "loss": 0.1435, "step": 1550 }, { "epoch": 27.36842105263158, "grad_norm": 0.32820120453834534, "learning_rate": 1.962313190383366e-05, "loss": 0.0992, "step": 1560 }, { "epoch": 27.54385964912281, "grad_norm": 1.0797744989395142, "learning_rate": 1.8323586744639376e-05, "loss": 0.1095, "step": 1570 }, { "epoch": 27.719298245614034, "grad_norm": 1.5036197900772095, "learning_rate": 1.7024041585445094e-05, "loss": 0.119, "step": 1580 }, { "epoch": 27.894736842105264, "grad_norm": 1.0871007442474365, "learning_rate": 1.5724496426250812e-05, "loss": 0.0974, "step": 1590 }, { "epoch": 28.07017543859649, "grad_norm": 0.6861986517906189, "learning_rate": 1.442495126705653e-05, "loss": 0.1087, "step": 1600 }, { "epoch": 28.07017543859649, "eval_accuracy": 0.9422222222222222, "eval_f1": 0.8491879350348028, "eval_loss": 0.15734025835990906, "eval_precision": 0.8531468531468531, "eval_recall": 0.8452655889145496, "eval_runtime": 3.5904, "eval_samples_per_second": 62.668, "eval_steps_per_second": 8.077, "step": 1600 }, { "epoch": 28.24561403508772, "grad_norm": 1.5399742126464844, "learning_rate": 1.3125406107862247e-05, "loss": 0.1243, "step": 1610 }, { "epoch": 28.42105263157895, "grad_norm": 0.7721771001815796, "learning_rate": 1.1825860948667967e-05, "loss": 0.0965, "step": 1620 }, { "epoch": 28.596491228070175, "grad_norm": 1.040131688117981, "learning_rate": 1.0526315789473684e-05, "loss": 0.1133, "step": 1630 }, { "epoch": 28.771929824561404, "grad_norm": 0.9755656123161316, "learning_rate": 9.226770630279402e-06, "loss": 0.0885, "step": 1640 }, { "epoch": 28.94736842105263, "grad_norm": 0.5838367342948914, "learning_rate": 7.92722547108512e-06, "loss": 0.1134, "step": 1650 }, { "epoch": 29.12280701754386, "grad_norm": 1.698116421699524, "learning_rate": 6.6276803118908384e-06, "loss": 0.1278, "step": 1660 }, { "epoch": 29.29824561403509, "grad_norm": 0.581572413444519, "learning_rate": 5.328135152696556e-06, "loss": 0.1209, "step": 1670 }, { "epoch": 29.473684210526315, "grad_norm": 0.4100797772407532, "learning_rate": 4.028589993502274e-06, "loss": 0.1108, "step": 1680 }, { "epoch": 29.649122807017545, "grad_norm": 1.5013538599014282, "learning_rate": 2.729044834307992e-06, "loss": 0.1195, "step": 1690 }, { "epoch": 29.82456140350877, "grad_norm": 1.0121512413024902, "learning_rate": 1.4294996751137102e-06, "loss": 0.1015, "step": 1700 }, { "epoch": 29.82456140350877, "eval_accuracy": 0.9422222222222222, "eval_f1": 0.8488372093023255, "eval_loss": 0.15452326834201813, "eval_precision": 0.8548009367681498, "eval_recall": 0.8429561200923787, "eval_runtime": 1.8193, "eval_samples_per_second": 123.672, "eval_steps_per_second": 15.94, "step": 1700 }, { "epoch": 30.0, "grad_norm": 2.770343780517578, "learning_rate": 1.299545159194282e-07, "loss": 0.1342, "step": 1710 }, { "epoch": 30.0, "step": 1710, "total_flos": 1.77124415883264e+17, "train_loss": 0.20865077226482637, "train_runtime": 373.9101, "train_samples_per_second": 72.21, "train_steps_per_second": 4.573 } ], "logging_steps": 10, "max_steps": 1710, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.77124415883264e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }