{ "best_metric": 0.1725098043680191, "best_model_checkpoint": "frost-mobile-apple/mobilevit-xx-small-v2024-10-22/checkpoint-1500", "epoch": 30.0, "eval_steps": 100, "global_step": 1710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17543859649122806, "grad_norm": 0.2625730037689209, "learning_rate": 1.1695906432748537e-05, "loss": 0.6928, "step": 10 }, { "epoch": 0.3508771929824561, "grad_norm": 0.2961116135120392, "learning_rate": 2.3391812865497074e-05, "loss": 0.6936, "step": 20 }, { "epoch": 0.5263157894736842, "grad_norm": 0.24333663284778595, "learning_rate": 3.508771929824561e-05, "loss": 0.6917, "step": 30 }, { "epoch": 0.7017543859649122, "grad_norm": 0.2218523770570755, "learning_rate": 4.678362573099415e-05, "loss": 0.6887, "step": 40 }, { "epoch": 0.8771929824561403, "grad_norm": 0.23965124785900116, "learning_rate": 5.847953216374269e-05, "loss": 0.685, "step": 50 }, { "epoch": 1.0526315789473684, "grad_norm": 0.23081418871879578, "learning_rate": 7.017543859649122e-05, "loss": 0.6815, "step": 60 }, { "epoch": 1.2280701754385965, "grad_norm": 0.23212119936943054, "learning_rate": 8.187134502923976e-05, "loss": 0.676, "step": 70 }, { "epoch": 1.4035087719298245, "grad_norm": 0.2775309383869171, "learning_rate": 9.35672514619883e-05, "loss": 0.6711, "step": 80 }, { "epoch": 1.5789473684210527, "grad_norm": 0.38230618834495544, "learning_rate": 0.00010526315789473685, "loss": 0.6617, "step": 90 }, { "epoch": 1.7543859649122808, "grad_norm": 0.29050251841545105, "learning_rate": 0.00011695906432748539, "loss": 0.6549, "step": 100 }, { "epoch": 1.7543859649122808, "eval_accuracy": 0.82, "eval_f1": 0.6260387811634349, "eval_loss": 0.6288657784461975, "eval_precision": 0.5191424196018377, "eval_recall": 0.7883720930232558, "eval_runtime": 2.6915, "eval_samples_per_second": 83.597, "eval_steps_per_second": 10.775, "step": 100 }, { "epoch": 1.9298245614035088, "grad_norm": 0.3310299217700958, "learning_rate": 0.0001286549707602339, "loss": 0.6389, "step": 110 }, { "epoch": 2.1052631578947367, "grad_norm": 0.35385212302207947, "learning_rate": 0.00014035087719298245, "loss": 0.6276, "step": 120 }, { "epoch": 2.280701754385965, "grad_norm": 0.31887122988700867, "learning_rate": 0.00015204678362573098, "loss": 0.6068, "step": 130 }, { "epoch": 2.456140350877193, "grad_norm": 0.38656044006347656, "learning_rate": 0.00016374269005847952, "loss": 0.5876, "step": 140 }, { "epoch": 2.6315789473684212, "grad_norm": 0.40553656220436096, "learning_rate": 0.00017543859649122806, "loss": 0.5782, "step": 150 }, { "epoch": 2.807017543859649, "grad_norm": 0.5055739879608154, "learning_rate": 0.0001871345029239766, "loss": 0.546, "step": 160 }, { "epoch": 2.982456140350877, "grad_norm": 0.6473321318626404, "learning_rate": 0.00019883040935672513, "loss": 0.5322, "step": 170 }, { "epoch": 3.1578947368421053, "grad_norm": 0.5542100667953491, "learning_rate": 0.00019883040935672513, "loss": 0.5081, "step": 180 }, { "epoch": 3.3333333333333335, "grad_norm": 0.525965690612793, "learning_rate": 0.00019753086419753085, "loss": 0.4906, "step": 190 }, { "epoch": 3.5087719298245617, "grad_norm": 0.6686927676200867, "learning_rate": 0.00019623131903833657, "loss": 0.4616, "step": 200 }, { "epoch": 3.5087719298245617, "eval_accuracy": 0.8866666666666667, "eval_f1": 0.7295864262990456, "eval_loss": 0.41918542981147766, "eval_precision": 0.6705653021442495, "eval_recall": 0.8, "eval_runtime": 2.7897, "eval_samples_per_second": 80.654, "eval_steps_per_second": 10.395, "step": 200 }, { "epoch": 3.6842105263157894, "grad_norm": 1.5412182807922363, "learning_rate": 0.0001949317738791423, "loss": 0.4506, "step": 210 }, { "epoch": 3.8596491228070176, "grad_norm": 0.4173012375831604, "learning_rate": 0.00019363222871994802, "loss": 0.4371, "step": 220 }, { "epoch": 4.035087719298246, "grad_norm": 0.42248570919036865, "learning_rate": 0.00019233268356075374, "loss": 0.4064, "step": 230 }, { "epoch": 4.2105263157894735, "grad_norm": 0.5491617918014526, "learning_rate": 0.00019103313840155946, "loss": 0.3724, "step": 240 }, { "epoch": 4.385964912280702, "grad_norm": 0.35062703490257263, "learning_rate": 0.00018973359324236518, "loss": 0.3671, "step": 250 }, { "epoch": 4.56140350877193, "grad_norm": 0.40491071343421936, "learning_rate": 0.0001884340480831709, "loss": 0.3683, "step": 260 }, { "epoch": 4.7368421052631575, "grad_norm": 0.9965174794197083, "learning_rate": 0.0001871345029239766, "loss": 0.3402, "step": 270 }, { "epoch": 4.912280701754386, "grad_norm": 0.7184051275253296, "learning_rate": 0.00018583495776478232, "loss": 0.3348, "step": 280 }, { "epoch": 5.087719298245614, "grad_norm": 1.8915038108825684, "learning_rate": 0.00018453541260558804, "loss": 0.32, "step": 290 }, { "epoch": 5.2631578947368425, "grad_norm": 0.5761589407920837, "learning_rate": 0.00018323586744639376, "loss": 0.3101, "step": 300 }, { "epoch": 5.2631578947368425, "eval_accuracy": 0.9035555555555556, "eval_f1": 0.7317676143386898, "eval_loss": 0.30708780884742737, "eval_precision": 0.7810026385224275, "eval_recall": 0.6883720930232559, "eval_runtime": 2.8811, "eval_samples_per_second": 78.095, "eval_steps_per_second": 10.066, "step": 300 }, { "epoch": 5.43859649122807, "grad_norm": 1.1592423915863037, "learning_rate": 0.00018193632228719948, "loss": 0.3258, "step": 310 }, { "epoch": 5.614035087719298, "grad_norm": 0.8307028412818909, "learning_rate": 0.0001806367771280052, "loss": 0.3149, "step": 320 }, { "epoch": 5.7894736842105265, "grad_norm": 0.9469823837280273, "learning_rate": 0.00017933723196881092, "loss": 0.3033, "step": 330 }, { "epoch": 5.964912280701754, "grad_norm": 2.199500322341919, "learning_rate": 0.00017803768680961664, "loss": 0.3164, "step": 340 }, { "epoch": 6.140350877192983, "grad_norm": 0.6772398948669434, "learning_rate": 0.00017673814165042236, "loss": 0.2806, "step": 350 }, { "epoch": 6.315789473684211, "grad_norm": 0.4862241744995117, "learning_rate": 0.00017543859649122806, "loss": 0.2817, "step": 360 }, { "epoch": 6.491228070175438, "grad_norm": 1.2349482774734497, "learning_rate": 0.00017413905133203378, "loss": 0.288, "step": 370 }, { "epoch": 6.666666666666667, "grad_norm": 2.9781813621520996, "learning_rate": 0.00017296946068875895, "loss": 0.3039, "step": 380 }, { "epoch": 6.842105263157895, "grad_norm": 0.7632750272750854, "learning_rate": 0.00017166991552956468, "loss": 0.2836, "step": 390 }, { "epoch": 7.017543859649122, "grad_norm": 1.2420198917388916, "learning_rate": 0.00017037037037037037, "loss": 0.2932, "step": 400 }, { "epoch": 7.017543859649122, "eval_accuracy": 0.908, "eval_f1": 0.7460122699386503, "eval_loss": 0.24856920540332794, "eval_precision": 0.7896103896103897, "eval_recall": 0.7069767441860465, "eval_runtime": 2.8347, "eval_samples_per_second": 79.373, "eval_steps_per_second": 10.23, "step": 400 }, { "epoch": 7.192982456140351, "grad_norm": 0.8554529547691345, "learning_rate": 0.0001690708252111761, "loss": 0.2583, "step": 410 }, { "epoch": 7.368421052631579, "grad_norm": 0.5736662745475769, "learning_rate": 0.0001677712800519818, "loss": 0.2809, "step": 420 }, { "epoch": 7.543859649122807, "grad_norm": 0.7552086114883423, "learning_rate": 0.00016647173489278753, "loss": 0.2774, "step": 430 }, { "epoch": 7.719298245614035, "grad_norm": 0.6094131469726562, "learning_rate": 0.00016517218973359325, "loss": 0.2771, "step": 440 }, { "epoch": 7.894736842105263, "grad_norm": 0.5392113924026489, "learning_rate": 0.00016387264457439898, "loss": 0.2755, "step": 450 }, { "epoch": 8.070175438596491, "grad_norm": 0.4927959740161896, "learning_rate": 0.0001625730994152047, "loss": 0.2572, "step": 460 }, { "epoch": 8.24561403508772, "grad_norm": 0.9484465718269348, "learning_rate": 0.00016127355425601042, "loss": 0.2354, "step": 470 }, { "epoch": 8.421052631578947, "grad_norm": 0.71286940574646, "learning_rate": 0.0001599740090968161, "loss": 0.2611, "step": 480 }, { "epoch": 8.596491228070175, "grad_norm": 1.9641995429992676, "learning_rate": 0.00015867446393762183, "loss": 0.2547, "step": 490 }, { "epoch": 8.771929824561404, "grad_norm": 1.1893583536148071, "learning_rate": 0.00015737491877842755, "loss": 0.2652, "step": 500 }, { "epoch": 8.771929824561404, "eval_accuracy": 0.9137777777777778, "eval_f1": 0.7673860911270983, "eval_loss": 0.22792504727840424, "eval_precision": 0.7920792079207921, "eval_recall": 0.7441860465116279, "eval_runtime": 1.8141, "eval_samples_per_second": 124.03, "eval_steps_per_second": 15.986, "step": 500 }, { "epoch": 8.947368421052632, "grad_norm": 1.0071460008621216, "learning_rate": 0.00015607537361923327, "loss": 0.244, "step": 510 }, { "epoch": 9.12280701754386, "grad_norm": 1.22650146484375, "learning_rate": 0.000154775828460039, "loss": 0.2377, "step": 520 }, { "epoch": 9.298245614035087, "grad_norm": 2.428567886352539, "learning_rate": 0.00015347628330084472, "loss": 0.2494, "step": 530 }, { "epoch": 9.473684210526315, "grad_norm": 1.8254860639572144, "learning_rate": 0.00015217673814165044, "loss": 0.2603, "step": 540 }, { "epoch": 9.649122807017545, "grad_norm": 0.6592786908149719, "learning_rate": 0.00015087719298245616, "loss": 0.2597, "step": 550 }, { "epoch": 9.824561403508772, "grad_norm": 1.3194756507873535, "learning_rate": 0.00014957764782326188, "loss": 0.2313, "step": 560 }, { "epoch": 10.0, "grad_norm": 2.1871612071990967, "learning_rate": 0.00014827810266406757, "loss": 0.282, "step": 570 }, { "epoch": 10.175438596491228, "grad_norm": 0.759860098361969, "learning_rate": 0.0001469785575048733, "loss": 0.2319, "step": 580 }, { "epoch": 10.350877192982455, "grad_norm": 1.447387933731079, "learning_rate": 0.00014567901234567902, "loss": 0.2457, "step": 590 }, { "epoch": 10.526315789473685, "grad_norm": 0.9954220056533813, "learning_rate": 0.00014437946718648474, "loss": 0.2253, "step": 600 }, { "epoch": 10.526315789473685, "eval_accuracy": 0.9217777777777778, "eval_f1": 0.7858880778588808, "eval_loss": 0.21004962921142578, "eval_precision": 0.8239795918367347, "eval_recall": 0.7511627906976744, "eval_runtime": 4.2925, "eval_samples_per_second": 52.417, "eval_steps_per_second": 6.756, "step": 600 }, { "epoch": 10.701754385964913, "grad_norm": 0.715815544128418, "learning_rate": 0.00014307992202729046, "loss": 0.2391, "step": 610 }, { "epoch": 10.87719298245614, "grad_norm": 0.6449007391929626, "learning_rate": 0.00014178037686809618, "loss": 0.2516, "step": 620 }, { "epoch": 11.052631578947368, "grad_norm": 0.9613096117973328, "learning_rate": 0.0001404808317089019, "loss": 0.2157, "step": 630 }, { "epoch": 11.228070175438596, "grad_norm": 2.206623077392578, "learning_rate": 0.00013918128654970762, "loss": 0.2365, "step": 640 }, { "epoch": 11.403508771929825, "grad_norm": 1.8694980144500732, "learning_rate": 0.00013788174139051334, "loss": 0.2263, "step": 650 }, { "epoch": 11.578947368421053, "grad_norm": 0.7060205340385437, "learning_rate": 0.00013658219623131904, "loss": 0.2173, "step": 660 }, { "epoch": 11.75438596491228, "grad_norm": 0.8581671714782715, "learning_rate": 0.00013528265107212476, "loss": 0.2204, "step": 670 }, { "epoch": 11.929824561403509, "grad_norm": 1.196590781211853, "learning_rate": 0.00013398310591293048, "loss": 0.2519, "step": 680 }, { "epoch": 12.105263157894736, "grad_norm": 1.0726817846298218, "learning_rate": 0.0001326835607537362, "loss": 0.2184, "step": 690 }, { "epoch": 12.280701754385966, "grad_norm": 0.6241493821144104, "learning_rate": 0.00013138401559454192, "loss": 0.2257, "step": 700 }, { "epoch": 12.280701754385966, "eval_accuracy": 0.9248888888888889, "eval_f1": 0.8018757327080891, "eval_loss": 0.19510744512081146, "eval_precision": 0.8085106382978723, "eval_recall": 0.7953488372093023, "eval_runtime": 2.9139, "eval_samples_per_second": 77.217, "eval_steps_per_second": 9.952, "step": 700 }, { "epoch": 12.456140350877194, "grad_norm": 1.382541298866272, "learning_rate": 0.00013008447043534764, "loss": 0.217, "step": 710 }, { "epoch": 12.631578947368421, "grad_norm": 0.7372106909751892, "learning_rate": 0.00012878492527615336, "loss": 0.2209, "step": 720 }, { "epoch": 12.807017543859649, "grad_norm": 1.3437495231628418, "learning_rate": 0.00012748538011695908, "loss": 0.2215, "step": 730 }, { "epoch": 12.982456140350877, "grad_norm": 0.8328105807304382, "learning_rate": 0.0001261858349577648, "loss": 0.247, "step": 740 }, { "epoch": 13.157894736842104, "grad_norm": 1.166037917137146, "learning_rate": 0.0001248862897985705, "loss": 0.2362, "step": 750 }, { "epoch": 13.333333333333334, "grad_norm": 1.8687838315963745, "learning_rate": 0.00012358674463937622, "loss": 0.2247, "step": 760 }, { "epoch": 13.508771929824562, "grad_norm": 1.2782139778137207, "learning_rate": 0.00012228719948018194, "loss": 0.2134, "step": 770 }, { "epoch": 13.68421052631579, "grad_norm": 1.114933967590332, "learning_rate": 0.00012098765432098766, "loss": 0.1965, "step": 780 }, { "epoch": 13.859649122807017, "grad_norm": 1.7937145233154297, "learning_rate": 0.00011968810916179338, "loss": 0.2124, "step": 790 }, { "epoch": 14.035087719298245, "grad_norm": 1.6698014736175537, "learning_rate": 0.0001183885640025991, "loss": 0.2468, "step": 800 }, { "epoch": 14.035087719298245, "eval_accuracy": 0.9306666666666666, "eval_f1": 0.8198614318706697, "eval_loss": 0.19064003229141235, "eval_precision": 0.8142201834862385, "eval_recall": 0.8255813953488372, "eval_runtime": 2.8315, "eval_samples_per_second": 79.464, "eval_steps_per_second": 10.242, "step": 800 }, { "epoch": 14.210526315789474, "grad_norm": 0.6950424313545227, "learning_rate": 0.00011708901884340481, "loss": 0.2004, "step": 810 }, { "epoch": 14.385964912280702, "grad_norm": 1.5043634176254272, "learning_rate": 0.00011578947368421053, "loss": 0.2317, "step": 820 }, { "epoch": 14.56140350877193, "grad_norm": 1.2491843700408936, "learning_rate": 0.00011448992852501626, "loss": 0.2027, "step": 830 }, { "epoch": 14.736842105263158, "grad_norm": 0.6502349376678467, "learning_rate": 0.00011319038336582198, "loss": 0.2112, "step": 840 }, { "epoch": 14.912280701754385, "grad_norm": 0.40061789751052856, "learning_rate": 0.0001118908382066277, "loss": 0.1756, "step": 850 }, { "epoch": 15.087719298245615, "grad_norm": 2.8378994464874268, "learning_rate": 0.0001105912930474334, "loss": 0.2216, "step": 860 }, { "epoch": 15.263157894736842, "grad_norm": 1.7187498807907104, "learning_rate": 0.00010929174788823913, "loss": 0.2072, "step": 870 }, { "epoch": 15.43859649122807, "grad_norm": 1.774376392364502, "learning_rate": 0.00010799220272904485, "loss": 0.239, "step": 880 }, { "epoch": 15.614035087719298, "grad_norm": 1.5812989473342896, "learning_rate": 0.00010669265756985057, "loss": 0.2191, "step": 890 }, { "epoch": 15.789473684210526, "grad_norm": 0.9877386689186096, "learning_rate": 0.00010539311241065628, "loss": 0.1796, "step": 900 }, { "epoch": 15.789473684210526, "eval_accuracy": 0.9275555555555556, "eval_f1": 0.81199538638985, "eval_loss": 0.19485591351985931, "eval_precision": 0.8054919908466819, "eval_recall": 0.8186046511627907, "eval_runtime": 1.8216, "eval_samples_per_second": 123.517, "eval_steps_per_second": 15.92, "step": 900 }, { "epoch": 15.964912280701755, "grad_norm": 1.059669017791748, "learning_rate": 0.000104093567251462, "loss": 0.1838, "step": 910 }, { "epoch": 16.140350877192983, "grad_norm": 1.4218086004257202, "learning_rate": 0.00010279402209226772, "loss": 0.2281, "step": 920 }, { "epoch": 16.31578947368421, "grad_norm": 1.2070213556289673, "learning_rate": 0.00010149447693307344, "loss": 0.1997, "step": 930 }, { "epoch": 16.49122807017544, "grad_norm": 2.351250410079956, "learning_rate": 0.00010019493177387915, "loss": 0.1843, "step": 940 }, { "epoch": 16.666666666666668, "grad_norm": 0.8852570056915283, "learning_rate": 9.889538661468485e-05, "loss": 0.2357, "step": 950 }, { "epoch": 16.842105263157894, "grad_norm": 2.0466091632843018, "learning_rate": 9.759584145549058e-05, "loss": 0.2277, "step": 960 }, { "epoch": 17.017543859649123, "grad_norm": 5.798379898071289, "learning_rate": 9.62962962962963e-05, "loss": 0.2246, "step": 970 }, { "epoch": 17.19298245614035, "grad_norm": 1.6754958629608154, "learning_rate": 9.499675113710202e-05, "loss": 0.1904, "step": 980 }, { "epoch": 17.36842105263158, "grad_norm": 0.6962611675262451, "learning_rate": 9.369720597790773e-05, "loss": 0.202, "step": 990 }, { "epoch": 17.54385964912281, "grad_norm": 0.5351881384849548, "learning_rate": 9.239766081871345e-05, "loss": 0.1888, "step": 1000 }, { "epoch": 17.54385964912281, "eval_accuracy": 0.9306666666666666, "eval_f1": 0.8177570093457944, "eval_loss": 0.18066002428531647, "eval_precision": 0.8215962441314554, "eval_recall": 0.813953488372093, "eval_runtime": 1.8596, "eval_samples_per_second": 120.996, "eval_steps_per_second": 15.595, "step": 1000 }, { "epoch": 17.719298245614034, "grad_norm": 1.2162110805511475, "learning_rate": 9.109811565951917e-05, "loss": 0.1789, "step": 1010 }, { "epoch": 17.894736842105264, "grad_norm": 1.2040334939956665, "learning_rate": 8.979857050032489e-05, "loss": 0.2109, "step": 1020 }, { "epoch": 18.07017543859649, "grad_norm": 0.8599823117256165, "learning_rate": 8.849902534113061e-05, "loss": 0.2113, "step": 1030 }, { "epoch": 18.24561403508772, "grad_norm": 1.0291296243667603, "learning_rate": 8.719948018193632e-05, "loss": 0.1981, "step": 1040 }, { "epoch": 18.42105263157895, "grad_norm": 3.214996576309204, "learning_rate": 8.589993502274204e-05, "loss": 0.1903, "step": 1050 }, { "epoch": 18.596491228070175, "grad_norm": 1.1698780059814453, "learning_rate": 8.460038986354776e-05, "loss": 0.192, "step": 1060 }, { "epoch": 18.771929824561404, "grad_norm": 3.0040793418884277, "learning_rate": 8.330084470435348e-05, "loss": 0.2062, "step": 1070 }, { "epoch": 18.94736842105263, "grad_norm": 1.365694522857666, "learning_rate": 8.200129954515919e-05, "loss": 0.1885, "step": 1080 }, { "epoch": 19.12280701754386, "grad_norm": 0.5183665156364441, "learning_rate": 8.070175438596491e-05, "loss": 0.2089, "step": 1090 }, { "epoch": 19.29824561403509, "grad_norm": 0.6474595069885254, "learning_rate": 7.940220922677063e-05, "loss": 0.202, "step": 1100 }, { "epoch": 19.29824561403509, "eval_accuracy": 0.9342222222222222, "eval_f1": 0.8287037037037037, "eval_loss": 0.1772110015153885, "eval_precision": 0.8248847926267281, "eval_recall": 0.8325581395348837, "eval_runtime": 2.2017, "eval_samples_per_second": 102.193, "eval_steps_per_second": 13.171, "step": 1100 }, { "epoch": 19.473684210526315, "grad_norm": 0.7569323778152466, "learning_rate": 7.810266406757635e-05, "loss": 0.2037, "step": 1110 }, { "epoch": 19.649122807017545, "grad_norm": 1.068310260772705, "learning_rate": 7.680311890838207e-05, "loss": 0.1842, "step": 1120 }, { "epoch": 19.82456140350877, "grad_norm": 1.1388903856277466, "learning_rate": 7.550357374918778e-05, "loss": 0.2057, "step": 1130 }, { "epoch": 20.0, "grad_norm": 5.906609535217285, "learning_rate": 7.42040285899935e-05, "loss": 0.2088, "step": 1140 }, { "epoch": 20.17543859649123, "grad_norm": 0.9702988862991333, "learning_rate": 7.290448343079922e-05, "loss": 0.1939, "step": 1150 }, { "epoch": 20.350877192982455, "grad_norm": 3.627027988433838, "learning_rate": 7.160493827160494e-05, "loss": 0.1953, "step": 1160 }, { "epoch": 20.526315789473685, "grad_norm": 1.11257004737854, "learning_rate": 7.030539311241065e-05, "loss": 0.1908, "step": 1170 }, { "epoch": 20.70175438596491, "grad_norm": 1.626079797744751, "learning_rate": 6.900584795321637e-05, "loss": 0.2011, "step": 1180 }, { "epoch": 20.87719298245614, "grad_norm": 1.8711522817611694, "learning_rate": 6.770630279402209e-05, "loss": 0.2106, "step": 1190 }, { "epoch": 21.05263157894737, "grad_norm": 2.9188010692596436, "learning_rate": 6.640675763482781e-05, "loss": 0.1824, "step": 1200 }, { "epoch": 21.05263157894737, "eval_accuracy": 0.9275555555555556, "eval_f1": 0.8080094228504122, "eval_loss": 0.18258829414844513, "eval_precision": 0.8186157517899761, "eval_recall": 0.7976744186046512, "eval_runtime": 1.8368, "eval_samples_per_second": 122.496, "eval_steps_per_second": 15.788, "step": 1200 }, { "epoch": 21.228070175438596, "grad_norm": 1.2156211137771606, "learning_rate": 6.510721247563352e-05, "loss": 0.1926, "step": 1210 }, { "epoch": 21.403508771929825, "grad_norm": 0.5184522271156311, "learning_rate": 6.380766731643924e-05, "loss": 0.1695, "step": 1220 }, { "epoch": 21.57894736842105, "grad_norm": 1.8020312786102295, "learning_rate": 6.250812215724496e-05, "loss": 0.2222, "step": 1230 }, { "epoch": 21.75438596491228, "grad_norm": 1.808860421180725, "learning_rate": 6.120857699805068e-05, "loss": 0.2026, "step": 1240 }, { "epoch": 21.92982456140351, "grad_norm": 0.5891908407211304, "learning_rate": 5.99090318388564e-05, "loss": 0.1861, "step": 1250 }, { "epoch": 22.105263157894736, "grad_norm": 0.7829120755195618, "learning_rate": 5.860948667966212e-05, "loss": 0.1911, "step": 1260 }, { "epoch": 22.280701754385966, "grad_norm": 0.8304038643836975, "learning_rate": 5.7309941520467835e-05, "loss": 0.1595, "step": 1270 }, { "epoch": 22.45614035087719, "grad_norm": 0.9477715492248535, "learning_rate": 5.6010396361273556e-05, "loss": 0.2207, "step": 1280 }, { "epoch": 22.63157894736842, "grad_norm": 1.6679517030715942, "learning_rate": 5.471085120207927e-05, "loss": 0.1885, "step": 1290 }, { "epoch": 22.80701754385965, "grad_norm": 1.945037603378296, "learning_rate": 5.341130604288499e-05, "loss": 0.1808, "step": 1300 }, { "epoch": 22.80701754385965, "eval_accuracy": 0.9346666666666666, "eval_f1": 0.8296639629200464, "eval_loss": 0.16815528273582458, "eval_precision": 0.8267898383371824, "eval_recall": 0.8325581395348837, "eval_runtime": 2.4881, "eval_samples_per_second": 90.429, "eval_steps_per_second": 11.655, "step": 1300 }, { "epoch": 22.982456140350877, "grad_norm": 0.7030972242355347, "learning_rate": 5.2111760883690706e-05, "loss": 0.1717, "step": 1310 }, { "epoch": 23.157894736842106, "grad_norm": 1.0622111558914185, "learning_rate": 5.081221572449643e-05, "loss": 0.2162, "step": 1320 }, { "epoch": 23.333333333333332, "grad_norm": 1.3687249422073364, "learning_rate": 4.951267056530214e-05, "loss": 0.197, "step": 1330 }, { "epoch": 23.50877192982456, "grad_norm": 1.218827724456787, "learning_rate": 4.821312540610786e-05, "loss": 0.1811, "step": 1340 }, { "epoch": 23.68421052631579, "grad_norm": 3.9379024505615234, "learning_rate": 4.691358024691358e-05, "loss": 0.1896, "step": 1350 }, { "epoch": 23.859649122807017, "grad_norm": 0.9299766421318054, "learning_rate": 4.56140350877193e-05, "loss": 0.1663, "step": 1360 }, { "epoch": 24.035087719298247, "grad_norm": 4.373446941375732, "learning_rate": 4.431448992852502e-05, "loss": 0.1883, "step": 1370 }, { "epoch": 24.210526315789473, "grad_norm": 1.0416285991668701, "learning_rate": 4.301494476933073e-05, "loss": 0.1884, "step": 1380 }, { "epoch": 24.385964912280702, "grad_norm": 1.9816950559616089, "learning_rate": 4.1715399610136454e-05, "loss": 0.1949, "step": 1390 }, { "epoch": 24.56140350877193, "grad_norm": 1.6888455152511597, "learning_rate": 4.041585445094217e-05, "loss": 0.1792, "step": 1400 }, { "epoch": 24.56140350877193, "eval_accuracy": 0.9364444444444444, "eval_f1": 0.8323563892145369, "eval_loss": 0.16882646083831787, "eval_precision": 0.8392434988179669, "eval_recall": 0.8255813953488372, "eval_runtime": 1.8209, "eval_samples_per_second": 123.562, "eval_steps_per_second": 15.926, "step": 1400 }, { "epoch": 24.736842105263158, "grad_norm": 0.6522326469421387, "learning_rate": 3.911630929174789e-05, "loss": 0.149, "step": 1410 }, { "epoch": 24.912280701754387, "grad_norm": 1.053612470626831, "learning_rate": 3.7816764132553604e-05, "loss": 0.1983, "step": 1420 }, { "epoch": 25.087719298245613, "grad_norm": 1.025525689125061, "learning_rate": 3.664717348927875e-05, "loss": 0.1973, "step": 1430 }, { "epoch": 25.263157894736842, "grad_norm": 2.1537649631500244, "learning_rate": 3.534762833008447e-05, "loss": 0.1797, "step": 1440 }, { "epoch": 25.43859649122807, "grad_norm": 2.6327617168426514, "learning_rate": 3.404808317089019e-05, "loss": 0.1693, "step": 1450 }, { "epoch": 25.614035087719298, "grad_norm": 1.1369807720184326, "learning_rate": 3.274853801169591e-05, "loss": 0.1826, "step": 1460 }, { "epoch": 25.789473684210527, "grad_norm": 2.0842247009277344, "learning_rate": 3.1448992852501624e-05, "loss": 0.1778, "step": 1470 }, { "epoch": 25.964912280701753, "grad_norm": 0.8993640542030334, "learning_rate": 3.014944769330734e-05, "loss": 0.1688, "step": 1480 }, { "epoch": 26.140350877192983, "grad_norm": 0.9640088677406311, "learning_rate": 2.8979857050032487e-05, "loss": 0.2139, "step": 1490 }, { "epoch": 26.31578947368421, "grad_norm": 1.134974718093872, "learning_rate": 2.7680311890838205e-05, "loss": 0.1852, "step": 1500 }, { "epoch": 26.31578947368421, "eval_accuracy": 0.9337777777777778, "eval_f1": 0.826945412311266, "eval_loss": 0.1725098043680191, "eval_precision": 0.8259860788863109, "eval_recall": 0.827906976744186, "eval_runtime": 1.8397, "eval_samples_per_second": 122.305, "eval_steps_per_second": 15.764, "step": 1500 }, { "epoch": 26.49122807017544, "grad_norm": 0.3995600640773773, "learning_rate": 2.6380766731643926e-05, "loss": 0.1703, "step": 1510 }, { "epoch": 26.666666666666668, "grad_norm": 1.8065487146377563, "learning_rate": 2.5081221572449644e-05, "loss": 0.2017, "step": 1520 }, { "epoch": 26.842105263157894, "grad_norm": 2.3725926876068115, "learning_rate": 2.378167641325536e-05, "loss": 0.1926, "step": 1530 }, { "epoch": 27.017543859649123, "grad_norm": 1.9128490686416626, "learning_rate": 2.248213125406108e-05, "loss": 0.1771, "step": 1540 }, { "epoch": 27.19298245614035, "grad_norm": 1.2254141569137573, "learning_rate": 2.1182586094866797e-05, "loss": 0.1791, "step": 1550 }, { "epoch": 27.36842105263158, "grad_norm": 1.3266674280166626, "learning_rate": 1.9883040935672515e-05, "loss": 0.1671, "step": 1560 }, { "epoch": 27.54385964912281, "grad_norm": 1.2818776369094849, "learning_rate": 1.8583495776478232e-05, "loss": 0.17, "step": 1570 }, { "epoch": 27.719298245614034, "grad_norm": 1.0659555196762085, "learning_rate": 1.728395061728395e-05, "loss": 0.1712, "step": 1580 }, { "epoch": 27.894736842105264, "grad_norm": 1.0451716184616089, "learning_rate": 1.5984405458089668e-05, "loss": 0.1854, "step": 1590 }, { "epoch": 28.07017543859649, "grad_norm": 2.3844401836395264, "learning_rate": 1.4684860298895387e-05, "loss": 0.177, "step": 1600 }, { "epoch": 28.07017543859649, "eval_accuracy": 0.9351111111111111, "eval_f1": 0.8282352941176471, "eval_loss": 0.16903221607208252, "eval_precision": 0.8380952380952381, "eval_recall": 0.8186046511627907, "eval_runtime": 2.2907, "eval_samples_per_second": 98.225, "eval_steps_per_second": 12.66, "step": 1600 }, { "epoch": 28.24561403508772, "grad_norm": 1.8458149433135986, "learning_rate": 1.3385315139701105e-05, "loss": 0.2091, "step": 1610 }, { "epoch": 28.42105263157895, "grad_norm": 0.7621822953224182, "learning_rate": 1.2085769980506823e-05, "loss": 0.1626, "step": 1620 }, { "epoch": 28.596491228070175, "grad_norm": 0.9533030986785889, "learning_rate": 1.078622482131254e-05, "loss": 0.1872, "step": 1630 }, { "epoch": 28.771929824561404, "grad_norm": 1.495856761932373, "learning_rate": 9.486679662118258e-06, "loss": 0.1816, "step": 1640 }, { "epoch": 28.94736842105263, "grad_norm": 1.397376537322998, "learning_rate": 8.187134502923977e-06, "loss": 0.1821, "step": 1650 }, { "epoch": 29.12280701754386, "grad_norm": 2.07928729057312, "learning_rate": 6.887589343729694e-06, "loss": 0.1801, "step": 1660 }, { "epoch": 29.29824561403509, "grad_norm": 1.2872428894042969, "learning_rate": 5.588044184535413e-06, "loss": 0.1835, "step": 1670 }, { "epoch": 29.473684210526315, "grad_norm": 0.40397679805755615, "learning_rate": 4.2884990253411305e-06, "loss": 0.1597, "step": 1680 }, { "epoch": 29.649122807017545, "grad_norm": 1.12138032913208, "learning_rate": 2.9889538661468487e-06, "loss": 0.1771, "step": 1690 }, { "epoch": 29.82456140350877, "grad_norm": 1.8918460607528687, "learning_rate": 1.6894087069525666e-06, "loss": 0.1857, "step": 1700 }, { "epoch": 29.82456140350877, "eval_accuracy": 0.9297777777777778, "eval_f1": 0.8175519630484989, "eval_loss": 0.17081834375858307, "eval_precision": 0.8119266055045872, "eval_recall": 0.8232558139534883, "eval_runtime": 1.7795, "eval_samples_per_second": 126.443, "eval_steps_per_second": 16.297, "step": 1700 }, { "epoch": 30.0, "grad_norm": 1.7220489978790283, "learning_rate": 3.898635477582846e-07, "loss": 0.166, "step": 1710 }, { "epoch": 30.0, "step": 1710, "total_flos": 3.8465920659456e+16, "train_loss": 0.2702594916025797, "train_runtime": 403.8981, "train_samples_per_second": 66.849, "train_steps_per_second": 4.234 } ], "logging_steps": 10, "max_steps": 1710, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.8465920659456e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }