{ "best_metric": 0.17164453864097595, "best_model_checkpoint": "frost-vision-v2-google_vit-base-patch16-224-v2024-11-09/checkpoint-1000", "epoch": 30.0, "eval_steps": 100, "global_step": 2130, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14084507042253522, "grad_norm": 1.7827035188674927, "learning_rate": 9.389671361502347e-06, "loss": 0.6939, "step": 10 }, { "epoch": 0.28169014084507044, "grad_norm": 1.6746207475662231, "learning_rate": 1.8779342723004694e-05, "loss": 0.6183, "step": 20 }, { "epoch": 0.4225352112676056, "grad_norm": 1.225101351737976, "learning_rate": 2.8169014084507046e-05, "loss": 0.4717, "step": 30 }, { "epoch": 0.5633802816901409, "grad_norm": 1.1545004844665527, "learning_rate": 3.755868544600939e-05, "loss": 0.4007, "step": 40 }, { "epoch": 0.704225352112676, "grad_norm": 1.03401517868042, "learning_rate": 4.694835680751174e-05, "loss": 0.3307, "step": 50 }, { "epoch": 0.8450704225352113, "grad_norm": 1.1851330995559692, "learning_rate": 5.633802816901409e-05, "loss": 0.3059, "step": 60 }, { "epoch": 0.9859154929577465, "grad_norm": 1.1718066930770874, "learning_rate": 6.572769953051644e-05, "loss": 0.307, "step": 70 }, { "epoch": 1.1267605633802817, "grad_norm": 1.0279697179794312, "learning_rate": 7.511737089201878e-05, "loss": 0.247, "step": 80 }, { "epoch": 1.267605633802817, "grad_norm": 0.9676464796066284, "learning_rate": 8.450704225352113e-05, "loss": 0.2487, "step": 90 }, { "epoch": 1.408450704225352, "grad_norm": 1.0950922966003418, "learning_rate": 9.389671361502347e-05, "loss": 0.2398, "step": 100 }, { "epoch": 1.408450704225352, "eval_accuracy": 0.9214788732394367, "eval_f1": 0.783284742468416, "eval_loss": 0.20955488085746765, "eval_precision": 0.8502109704641351, "eval_recall": 0.7261261261261261, "eval_runtime": 2.9059, "eval_samples_per_second": 97.732, "eval_steps_per_second": 12.389, "step": 100 }, { "epoch": 1.5492957746478875, "grad_norm": 0.7596128582954407, "learning_rate": 0.00010328638497652582, "loss": 0.2263, "step": 110 }, { "epoch": 1.6901408450704225, "grad_norm": 0.9664576649665833, "learning_rate": 0.00011267605633802819, "loss": 0.2153, "step": 120 }, { "epoch": 1.8309859154929577, "grad_norm": 1.1117621660232544, "learning_rate": 0.00012206572769953053, "loss": 0.2372, "step": 130 }, { "epoch": 1.971830985915493, "grad_norm": 0.805321216583252, "learning_rate": 0.00013145539906103288, "loss": 0.2102, "step": 140 }, { "epoch": 2.112676056338028, "grad_norm": 1.1108847856521606, "learning_rate": 0.00014084507042253522, "loss": 0.2271, "step": 150 }, { "epoch": 2.2535211267605635, "grad_norm": 0.7654628753662109, "learning_rate": 0.00015023474178403755, "loss": 0.1863, "step": 160 }, { "epoch": 2.3943661971830985, "grad_norm": 0.8311775326728821, "learning_rate": 0.00015962441314553992, "loss": 0.1649, "step": 170 }, { "epoch": 2.535211267605634, "grad_norm": 0.9679107069969177, "learning_rate": 0.00016901408450704225, "loss": 0.19, "step": 180 }, { "epoch": 2.676056338028169, "grad_norm": 1.0160714387893677, "learning_rate": 0.00017840375586854461, "loss": 0.2102, "step": 190 }, { "epoch": 2.816901408450704, "grad_norm": 0.6983464956283569, "learning_rate": 0.00018779342723004695, "loss": 0.1746, "step": 200 }, { "epoch": 2.816901408450704, "eval_accuracy": 0.9369718309859155, "eval_f1": 0.8362305580969808, "eval_loss": 0.16762028634548187, "eval_precision": 0.8494423791821561, "eval_recall": 0.8234234234234235, "eval_runtime": 2.0426, "eval_samples_per_second": 139.041, "eval_steps_per_second": 17.625, "step": 200 }, { "epoch": 2.9577464788732395, "grad_norm": 0.7636697888374329, "learning_rate": 0.0001971830985915493, "loss": 0.2038, "step": 210 }, { "epoch": 3.0985915492957745, "grad_norm": 0.7589529156684875, "learning_rate": 0.00019926969222743872, "loss": 0.2047, "step": 220 }, { "epoch": 3.23943661971831, "grad_norm": 0.785400927066803, "learning_rate": 0.00019822639540949402, "loss": 0.1831, "step": 230 }, { "epoch": 3.380281690140845, "grad_norm": 0.9033864736557007, "learning_rate": 0.0001971830985915493, "loss": 0.1743, "step": 240 }, { "epoch": 3.52112676056338, "grad_norm": 1.0347667932510376, "learning_rate": 0.0001961398017736046, "loss": 0.1691, "step": 250 }, { "epoch": 3.6619718309859155, "grad_norm": 0.9231723546981812, "learning_rate": 0.0001950965049556599, "loss": 0.1649, "step": 260 }, { "epoch": 3.802816901408451, "grad_norm": 1.205672264099121, "learning_rate": 0.0001940532081377152, "loss": 0.2018, "step": 270 }, { "epoch": 3.943661971830986, "grad_norm": 0.6101861000061035, "learning_rate": 0.0001930099113197705, "loss": 0.1805, "step": 280 }, { "epoch": 4.084507042253521, "grad_norm": 0.8451747298240662, "learning_rate": 0.0001919666145018258, "loss": 0.1479, "step": 290 }, { "epoch": 4.225352112676056, "grad_norm": 1.2933802604675293, "learning_rate": 0.0001909233176838811, "loss": 0.1316, "step": 300 }, { "epoch": 4.225352112676056, "eval_accuracy": 0.928169014084507, "eval_f1": 0.8125, "eval_loss": 0.17500653862953186, "eval_precision": 0.8292682926829268, "eval_recall": 0.7963963963963964, "eval_runtime": 2.0499, "eval_samples_per_second": 138.543, "eval_steps_per_second": 17.562, "step": 300 }, { "epoch": 4.366197183098592, "grad_norm": 1.1605525016784668, "learning_rate": 0.00018988002086593636, "loss": 0.1595, "step": 310 }, { "epoch": 4.507042253521127, "grad_norm": 0.7514552474021912, "learning_rate": 0.00018883672404799165, "loss": 0.1455, "step": 320 }, { "epoch": 4.647887323943662, "grad_norm": 0.9141556620597839, "learning_rate": 0.00018779342723004695, "loss": 0.1737, "step": 330 }, { "epoch": 4.788732394366197, "grad_norm": 0.7212592363357544, "learning_rate": 0.00018675013041210225, "loss": 0.1464, "step": 340 }, { "epoch": 4.929577464788732, "grad_norm": 1.1275196075439453, "learning_rate": 0.00018570683359415754, "loss": 0.2212, "step": 350 }, { "epoch": 5.070422535211268, "grad_norm": 0.8327043652534485, "learning_rate": 0.00018466353677621284, "loss": 0.144, "step": 360 }, { "epoch": 5.211267605633803, "grad_norm": 0.8596853017807007, "learning_rate": 0.00018362023995826813, "loss": 0.1347, "step": 370 }, { "epoch": 5.352112676056338, "grad_norm": 0.8283455967903137, "learning_rate": 0.00018257694314032343, "loss": 0.1558, "step": 380 }, { "epoch": 5.492957746478873, "grad_norm": 0.7472031116485596, "learning_rate": 0.00018153364632237873, "loss": 0.1336, "step": 390 }, { "epoch": 5.633802816901408, "grad_norm": 0.8499220013618469, "learning_rate": 0.00018049034950443402, "loss": 0.1305, "step": 400 }, { "epoch": 5.633802816901408, "eval_accuracy": 0.9341549295774648, "eval_f1": 0.8270120259019427, "eval_loss": 0.16712401807308197, "eval_precision": 0.8498098859315589, "eval_recall": 0.8054054054054054, "eval_runtime": 4.4386, "eval_samples_per_second": 63.984, "eval_steps_per_second": 8.111, "step": 400 }, { "epoch": 5.774647887323944, "grad_norm": 0.8095440864562988, "learning_rate": 0.00017944705268648932, "loss": 0.1625, "step": 410 }, { "epoch": 5.915492957746479, "grad_norm": 0.7673202753067017, "learning_rate": 0.00017840375586854461, "loss": 0.1327, "step": 420 }, { "epoch": 6.056338028169014, "grad_norm": 1.032628059387207, "learning_rate": 0.0001773604590505999, "loss": 0.1508, "step": 430 }, { "epoch": 6.197183098591549, "grad_norm": 0.7713437676429749, "learning_rate": 0.0001763171622326552, "loss": 0.1286, "step": 440 }, { "epoch": 6.338028169014084, "grad_norm": 0.5040615200996399, "learning_rate": 0.0001752738654147105, "loss": 0.1253, "step": 450 }, { "epoch": 6.47887323943662, "grad_norm": 0.8310524225234985, "learning_rate": 0.0001742305685967658, "loss": 0.1268, "step": 460 }, { "epoch": 6.619718309859155, "grad_norm": 0.9123954176902771, "learning_rate": 0.00017318727177882107, "loss": 0.1325, "step": 470 }, { "epoch": 6.76056338028169, "grad_norm": 0.5047963857650757, "learning_rate": 0.00017214397496087636, "loss": 0.1225, "step": 480 }, { "epoch": 6.901408450704225, "grad_norm": 0.5679495930671692, "learning_rate": 0.00017110067814293166, "loss": 0.1156, "step": 490 }, { "epoch": 7.042253521126761, "grad_norm": 0.6002945899963379, "learning_rate": 0.00017005738132498696, "loss": 0.1119, "step": 500 }, { "epoch": 7.042253521126761, "eval_accuracy": 0.9316901408450704, "eval_f1": 0.8239564428312159, "eval_loss": 0.17465578019618988, "eval_precision": 0.829981718464351, "eval_recall": 0.818018018018018, "eval_runtime": 2.9059, "eval_samples_per_second": 97.733, "eval_steps_per_second": 12.389, "step": 500 }, { "epoch": 7.183098591549296, "grad_norm": 0.7332452535629272, "learning_rate": 0.00016901408450704225, "loss": 0.1237, "step": 510 }, { "epoch": 7.323943661971831, "grad_norm": 0.5455431342124939, "learning_rate": 0.00016797078768909755, "loss": 0.1333, "step": 520 }, { "epoch": 7.464788732394366, "grad_norm": 0.7222278714179993, "learning_rate": 0.00016692749087115284, "loss": 0.1126, "step": 530 }, { "epoch": 7.605633802816901, "grad_norm": 0.6804931163787842, "learning_rate": 0.00016588419405320814, "loss": 0.1242, "step": 540 }, { "epoch": 7.746478873239437, "grad_norm": 0.6327610015869141, "learning_rate": 0.00016484089723526344, "loss": 0.1079, "step": 550 }, { "epoch": 7.887323943661972, "grad_norm": 0.7840393781661987, "learning_rate": 0.00016379760041731873, "loss": 0.0932, "step": 560 }, { "epoch": 8.028169014084508, "grad_norm": 0.9229692220687866, "learning_rate": 0.00016275430359937403, "loss": 0.1163, "step": 570 }, { "epoch": 8.169014084507042, "grad_norm": 0.7363346815109253, "learning_rate": 0.00016171100678142932, "loss": 0.116, "step": 580 }, { "epoch": 8.309859154929578, "grad_norm": 0.6889787912368774, "learning_rate": 0.00016066770996348462, "loss": 0.1043, "step": 590 }, { "epoch": 8.450704225352112, "grad_norm": 0.43690377473831177, "learning_rate": 0.00015962441314553992, "loss": 0.0913, "step": 600 }, { "epoch": 8.450704225352112, "eval_accuracy": 0.9415492957746479, "eval_f1": 0.8504504504504504, "eval_loss": 0.15154092013835907, "eval_precision": 0.8504504504504504, "eval_recall": 0.8504504504504504, "eval_runtime": 2.0762, "eval_samples_per_second": 136.789, "eval_steps_per_second": 17.339, "step": 600 }, { "epoch": 8.591549295774648, "grad_norm": 0.6768948435783386, "learning_rate": 0.0001585811163275952, "loss": 0.1107, "step": 610 }, { "epoch": 8.732394366197184, "grad_norm": 0.308359295129776, "learning_rate": 0.0001575378195096505, "loss": 0.0979, "step": 620 }, { "epoch": 8.873239436619718, "grad_norm": 0.825429379940033, "learning_rate": 0.0001564945226917058, "loss": 0.1135, "step": 630 }, { "epoch": 9.014084507042254, "grad_norm": 0.5222116708755493, "learning_rate": 0.00015545122587376107, "loss": 0.1038, "step": 640 }, { "epoch": 9.154929577464788, "grad_norm": 0.36319026350975037, "learning_rate": 0.00015440792905581637, "loss": 0.0911, "step": 650 }, { "epoch": 9.295774647887324, "grad_norm": 0.7786665558815002, "learning_rate": 0.00015336463223787167, "loss": 0.0948, "step": 660 }, { "epoch": 9.43661971830986, "grad_norm": 0.462139368057251, "learning_rate": 0.00015232133541992696, "loss": 0.0854, "step": 670 }, { "epoch": 9.577464788732394, "grad_norm": 0.5465391278266907, "learning_rate": 0.00015127803860198226, "loss": 0.069, "step": 680 }, { "epoch": 9.71830985915493, "grad_norm": 0.4502487778663635, "learning_rate": 0.00015023474178403755, "loss": 0.0854, "step": 690 }, { "epoch": 9.859154929577464, "grad_norm": 0.7790795564651489, "learning_rate": 0.00014919144496609285, "loss": 0.0964, "step": 700 }, { "epoch": 9.859154929577464, "eval_accuracy": 0.9376760563380282, "eval_f1": 0.8418230563002681, "eval_loss": 0.16803742945194244, "eval_precision": 0.8351063829787234, "eval_recall": 0.8486486486486486, "eval_runtime": 2.0298, "eval_samples_per_second": 139.919, "eval_steps_per_second": 17.736, "step": 700 }, { "epoch": 10.0, "grad_norm": 0.28997573256492615, "learning_rate": 0.00014814814814814815, "loss": 0.0819, "step": 710 }, { "epoch": 10.140845070422536, "grad_norm": 0.49839815497398376, "learning_rate": 0.00014710485133020344, "loss": 0.0639, "step": 720 }, { "epoch": 10.28169014084507, "grad_norm": 0.6128919720649719, "learning_rate": 0.00014606155451225874, "loss": 0.0783, "step": 730 }, { "epoch": 10.422535211267606, "grad_norm": 0.7849855422973633, "learning_rate": 0.00014501825769431403, "loss": 0.0762, "step": 740 }, { "epoch": 10.56338028169014, "grad_norm": 0.3956754803657532, "learning_rate": 0.00014397496087636933, "loss": 0.082, "step": 750 }, { "epoch": 10.704225352112676, "grad_norm": 0.9166898131370544, "learning_rate": 0.00014293166405842463, "loss": 0.0856, "step": 760 }, { "epoch": 10.845070422535212, "grad_norm": 0.7876815795898438, "learning_rate": 0.00014188836724047992, "loss": 0.0822, "step": 770 }, { "epoch": 10.985915492957746, "grad_norm": 0.41309139132499695, "learning_rate": 0.00014084507042253522, "loss": 0.0822, "step": 780 }, { "epoch": 11.126760563380282, "grad_norm": 0.5138004422187805, "learning_rate": 0.00013980177360459051, "loss": 0.0666, "step": 790 }, { "epoch": 11.267605633802816, "grad_norm": 0.6155378222465515, "learning_rate": 0.00013875847678664578, "loss": 0.0659, "step": 800 }, { "epoch": 11.267605633802816, "eval_accuracy": 0.9274647887323944, "eval_f1": 0.8144144144144144, "eval_loss": 0.1891278326511383, "eval_precision": 0.8144144144144144, "eval_recall": 0.8144144144144144, "eval_runtime": 2.8812, "eval_samples_per_second": 98.569, "eval_steps_per_second": 12.495, "step": 800 }, { "epoch": 11.408450704225352, "grad_norm": 0.7656810283660889, "learning_rate": 0.00013771517996870108, "loss": 0.0947, "step": 810 }, { "epoch": 11.549295774647888, "grad_norm": 0.4831065237522125, "learning_rate": 0.00013667188315075638, "loss": 0.0836, "step": 820 }, { "epoch": 11.690140845070422, "grad_norm": 0.7897108793258667, "learning_rate": 0.0001356285863328117, "loss": 0.0679, "step": 830 }, { "epoch": 11.830985915492958, "grad_norm": 0.6471636891365051, "learning_rate": 0.000134585289514867, "loss": 0.1061, "step": 840 }, { "epoch": 11.971830985915492, "grad_norm": 0.6389226317405701, "learning_rate": 0.0001335419926969223, "loss": 0.0735, "step": 850 }, { "epoch": 12.112676056338028, "grad_norm": 0.4462195336818695, "learning_rate": 0.0001324986958789776, "loss": 0.0827, "step": 860 }, { "epoch": 12.253521126760564, "grad_norm": 0.42086052894592285, "learning_rate": 0.00013145539906103288, "loss": 0.0662, "step": 870 }, { "epoch": 12.394366197183098, "grad_norm": 0.7582985162734985, "learning_rate": 0.00013041210224308818, "loss": 0.0877, "step": 880 }, { "epoch": 12.535211267605634, "grad_norm": 0.6410930156707764, "learning_rate": 0.00012936880542514348, "loss": 0.0659, "step": 890 }, { "epoch": 12.676056338028168, "grad_norm": 1.0315440893173218, "learning_rate": 0.00012832550860719877, "loss": 0.0706, "step": 900 }, { "epoch": 12.676056338028168, "eval_accuracy": 0.9320422535211268, "eval_f1": 0.8234217749313815, "eval_loss": 0.17875343561172485, "eval_precision": 0.8364312267657993, "eval_recall": 0.8108108108108109, "eval_runtime": 5.3131, "eval_samples_per_second": 53.453, "eval_steps_per_second": 6.776, "step": 900 }, { "epoch": 12.816901408450704, "grad_norm": 0.7598835825920105, "learning_rate": 0.00012728221178925407, "loss": 0.0675, "step": 910 }, { "epoch": 12.95774647887324, "grad_norm": 0.425029993057251, "learning_rate": 0.00012623891497130936, "loss": 0.0691, "step": 920 }, { "epoch": 13.098591549295774, "grad_norm": 0.39021769165992737, "learning_rate": 0.00012519561815336466, "loss": 0.0645, "step": 930 }, { "epoch": 13.23943661971831, "grad_norm": 0.6953460574150085, "learning_rate": 0.00012415232133541993, "loss": 0.0695, "step": 940 }, { "epoch": 13.380281690140846, "grad_norm": 0.8735091090202332, "learning_rate": 0.00012310902451747523, "loss": 0.0523, "step": 950 }, { "epoch": 13.52112676056338, "grad_norm": 0.9906308650970459, "learning_rate": 0.00012206572769953053, "loss": 0.0591, "step": 960 }, { "epoch": 13.661971830985916, "grad_norm": 0.7933890223503113, "learning_rate": 0.00012102243088158583, "loss": 0.0614, "step": 970 }, { "epoch": 13.80281690140845, "grad_norm": 0.4813683331012726, "learning_rate": 0.00011997913406364111, "loss": 0.056, "step": 980 }, { "epoch": 13.943661971830986, "grad_norm": 0.4669698476791382, "learning_rate": 0.00011893583724569641, "loss": 0.0583, "step": 990 }, { "epoch": 14.084507042253522, "grad_norm": 0.6160480976104736, "learning_rate": 0.0001178925404277517, "loss": 0.069, "step": 1000 }, { "epoch": 14.084507042253522, "eval_accuracy": 0.9411971830985916, "eval_f1": 0.8485947416137806, "eval_loss": 0.17164453864097595, "eval_precision": 0.8540145985401459, "eval_recall": 0.8432432432432433, "eval_runtime": 3.5722, "eval_samples_per_second": 79.503, "eval_steps_per_second": 10.078, "step": 1000 }, { "epoch": 14.225352112676056, "grad_norm": 0.3227427899837494, "learning_rate": 0.000116849243609807, "loss": 0.0677, "step": 1010 }, { "epoch": 14.366197183098592, "grad_norm": 0.6062753796577454, "learning_rate": 0.0001158059467918623, "loss": 0.0551, "step": 1020 }, { "epoch": 14.507042253521126, "grad_norm": 0.37269070744514465, "learning_rate": 0.0001147626499739176, "loss": 0.0722, "step": 1030 }, { "epoch": 14.647887323943662, "grad_norm": 0.5302610397338867, "learning_rate": 0.00011371935315597289, "loss": 0.069, "step": 1040 }, { "epoch": 14.788732394366198, "grad_norm": 0.3961109519004822, "learning_rate": 0.00011267605633802819, "loss": 0.0508, "step": 1050 }, { "epoch": 14.929577464788732, "grad_norm": 0.5787925720214844, "learning_rate": 0.00011163275952008347, "loss": 0.0513, "step": 1060 }, { "epoch": 15.070422535211268, "grad_norm": 0.4569196403026581, "learning_rate": 0.00011058946270213876, "loss": 0.0504, "step": 1070 }, { "epoch": 15.211267605633802, "grad_norm": 0.21465392410755157, "learning_rate": 0.00010954616588419406, "loss": 0.0588, "step": 1080 }, { "epoch": 15.352112676056338, "grad_norm": 0.35163068771362305, "learning_rate": 0.00010850286906624936, "loss": 0.0537, "step": 1090 }, { "epoch": 15.492957746478874, "grad_norm": 0.43309277296066284, "learning_rate": 0.00010745957224830465, "loss": 0.0543, "step": 1100 }, { "epoch": 15.492957746478874, "eval_accuracy": 0.9362676056338028, "eval_f1": 0.8340971585701191, "eval_loss": 0.18467187881469727, "eval_precision": 0.8488805970149254, "eval_recall": 0.8198198198198198, "eval_runtime": 3.083, "eval_samples_per_second": 92.117, "eval_steps_per_second": 11.677, "step": 1100 }, { "epoch": 15.633802816901408, "grad_norm": 0.3860970139503479, "learning_rate": 0.00010641627543035995, "loss": 0.0508, "step": 1110 }, { "epoch": 15.774647887323944, "grad_norm": 0.3090944290161133, "learning_rate": 0.00010537297861241524, "loss": 0.0505, "step": 1120 }, { "epoch": 15.915492957746478, "grad_norm": 0.2525678277015686, "learning_rate": 0.00010432968179447054, "loss": 0.0587, "step": 1130 }, { "epoch": 16.056338028169016, "grad_norm": 0.7144160866737366, "learning_rate": 0.00010328638497652582, "loss": 0.0667, "step": 1140 }, { "epoch": 16.197183098591548, "grad_norm": 0.30514514446258545, "learning_rate": 0.00010224308815858112, "loss": 0.0479, "step": 1150 }, { "epoch": 16.338028169014084, "grad_norm": 0.3105055093765259, "learning_rate": 0.00010119979134063642, "loss": 0.0382, "step": 1160 }, { "epoch": 16.47887323943662, "grad_norm": 0.20530584454536438, "learning_rate": 0.00010015649452269171, "loss": 0.044, "step": 1170 }, { "epoch": 16.619718309859156, "grad_norm": 0.31594333052635193, "learning_rate": 9.911319770474701e-05, "loss": 0.0418, "step": 1180 }, { "epoch": 16.760563380281692, "grad_norm": 0.8795649409294128, "learning_rate": 9.80699008868023e-05, "loss": 0.0567, "step": 1190 }, { "epoch": 16.901408450704224, "grad_norm": 0.655049741268158, "learning_rate": 9.70266040688576e-05, "loss": 0.0515, "step": 1200 }, { "epoch": 16.901408450704224, "eval_accuracy": 0.9408450704225352, "eval_f1": 0.8469945355191257, "eval_loss": 0.17406770586967468, "eval_precision": 0.856353591160221, "eval_recall": 0.8378378378378378, "eval_runtime": 6.0044, "eval_samples_per_second": 47.299, "eval_steps_per_second": 5.996, "step": 1200 }, { "epoch": 17.04225352112676, "grad_norm": 0.5397161841392517, "learning_rate": 9.59833072509129e-05, "loss": 0.0345, "step": 1210 }, { "epoch": 17.183098591549296, "grad_norm": 0.4117436408996582, "learning_rate": 9.494001043296818e-05, "loss": 0.0343, "step": 1220 }, { "epoch": 17.323943661971832, "grad_norm": 0.6277669072151184, "learning_rate": 9.389671361502347e-05, "loss": 0.0447, "step": 1230 }, { "epoch": 17.464788732394368, "grad_norm": 0.46144264936447144, "learning_rate": 9.285341679707877e-05, "loss": 0.0593, "step": 1240 }, { "epoch": 17.6056338028169, "grad_norm": 0.25574731826782227, "learning_rate": 9.181011997913407e-05, "loss": 0.0556, "step": 1250 }, { "epoch": 17.746478873239436, "grad_norm": 0.6915749311447144, "learning_rate": 9.076682316118936e-05, "loss": 0.05, "step": 1260 }, { "epoch": 17.887323943661972, "grad_norm": 1.0586333274841309, "learning_rate": 8.972352634324466e-05, "loss": 0.058, "step": 1270 }, { "epoch": 18.028169014084508, "grad_norm": 0.4597637951374054, "learning_rate": 8.868022952529996e-05, "loss": 0.0559, "step": 1280 }, { "epoch": 18.169014084507044, "grad_norm": 0.33155715465545654, "learning_rate": 8.763693270735525e-05, "loss": 0.0466, "step": 1290 }, { "epoch": 18.309859154929576, "grad_norm": 1.1297364234924316, "learning_rate": 8.659363588941053e-05, "loss": 0.0489, "step": 1300 }, { "epoch": 18.309859154929576, "eval_accuracy": 0.9461267605633803, "eval_f1": 0.8620378719567178, "eval_loss": 0.17934167385101318, "eval_precision": 0.8628158844765343, "eval_recall": 0.8612612612612612, "eval_runtime": 4.5996, "eval_samples_per_second": 61.744, "eval_steps_per_second": 7.827, "step": 1300 }, { "epoch": 18.450704225352112, "grad_norm": 0.4485849142074585, "learning_rate": 8.555033907146583e-05, "loss": 0.0346, "step": 1310 }, { "epoch": 18.591549295774648, "grad_norm": 0.43537047505378723, "learning_rate": 8.450704225352113e-05, "loss": 0.0475, "step": 1320 }, { "epoch": 18.732394366197184, "grad_norm": 1.3907153606414795, "learning_rate": 8.346374543557642e-05, "loss": 0.053, "step": 1330 }, { "epoch": 18.87323943661972, "grad_norm": 0.093208909034729, "learning_rate": 8.242044861763172e-05, "loss": 0.0261, "step": 1340 }, { "epoch": 19.014084507042252, "grad_norm": 0.5435425043106079, "learning_rate": 8.137715179968701e-05, "loss": 0.0443, "step": 1350 }, { "epoch": 19.154929577464788, "grad_norm": 0.1869378536939621, "learning_rate": 8.033385498174231e-05, "loss": 0.0519, "step": 1360 }, { "epoch": 19.295774647887324, "grad_norm": 0.4740372896194458, "learning_rate": 7.92905581637976e-05, "loss": 0.0421, "step": 1370 }, { "epoch": 19.43661971830986, "grad_norm": 0.30357396602630615, "learning_rate": 7.82472613458529e-05, "loss": 0.0352, "step": 1380 }, { "epoch": 19.577464788732396, "grad_norm": 0.33824267983436584, "learning_rate": 7.720396452790818e-05, "loss": 0.0382, "step": 1390 }, { "epoch": 19.718309859154928, "grad_norm": 0.4455321133136749, "learning_rate": 7.616066770996348e-05, "loss": 0.0339, "step": 1400 }, { "epoch": 19.718309859154928, "eval_accuracy": 0.9443661971830986, "eval_f1": 0.8568840579710145, "eval_loss": 0.18061767518520355, "eval_precision": 0.8615664845173042, "eval_recall": 0.8522522522522522, "eval_runtime": 5.3442, "eval_samples_per_second": 53.142, "eval_steps_per_second": 6.736, "step": 1400 }, { "epoch": 19.859154929577464, "grad_norm": 0.35027405619621277, "learning_rate": 7.511737089201878e-05, "loss": 0.0372, "step": 1410 }, { "epoch": 20.0, "grad_norm": 0.6206357479095459, "learning_rate": 7.407407407407407e-05, "loss": 0.0517, "step": 1420 }, { "epoch": 20.140845070422536, "grad_norm": 0.403296560049057, "learning_rate": 7.303077725612937e-05, "loss": 0.0337, "step": 1430 }, { "epoch": 20.281690140845072, "grad_norm": 0.11548905074596405, "learning_rate": 7.198748043818467e-05, "loss": 0.026, "step": 1440 }, { "epoch": 20.422535211267604, "grad_norm": 0.21008001267910004, "learning_rate": 7.094418362023996e-05, "loss": 0.045, "step": 1450 }, { "epoch": 20.56338028169014, "grad_norm": 0.537829577922821, "learning_rate": 6.990088680229526e-05, "loss": 0.0616, "step": 1460 }, { "epoch": 20.704225352112676, "grad_norm": 0.7347171306610107, "learning_rate": 6.885758998435054e-05, "loss": 0.0405, "step": 1470 }, { "epoch": 20.845070422535212, "grad_norm": 0.26924213767051697, "learning_rate": 6.781429316640585e-05, "loss": 0.0305, "step": 1480 }, { "epoch": 20.985915492957748, "grad_norm": 0.20689117908477783, "learning_rate": 6.677099634846115e-05, "loss": 0.0396, "step": 1490 }, { "epoch": 21.12676056338028, "grad_norm": 0.9461146593093872, "learning_rate": 6.572769953051644e-05, "loss": 0.0409, "step": 1500 }, { "epoch": 21.12676056338028, "eval_accuracy": 0.9440140845070423, "eval_f1": 0.8568856885688569, "eval_loss": 0.1784217804670334, "eval_precision": 0.8561151079136691, "eval_recall": 0.8576576576576577, "eval_runtime": 4.9417, "eval_samples_per_second": 57.471, "eval_steps_per_second": 7.285, "step": 1500 }, { "epoch": 21.267605633802816, "grad_norm": 0.32038381695747375, "learning_rate": 6.468440271257174e-05, "loss": 0.0378, "step": 1510 }, { "epoch": 21.408450704225352, "grad_norm": 0.14526519179344177, "learning_rate": 6.364110589462703e-05, "loss": 0.0325, "step": 1520 }, { "epoch": 21.549295774647888, "grad_norm": 0.3381955921649933, "learning_rate": 6.259780907668233e-05, "loss": 0.0397, "step": 1530 }, { "epoch": 21.690140845070424, "grad_norm": 0.622305691242218, "learning_rate": 6.155451225873761e-05, "loss": 0.0515, "step": 1540 }, { "epoch": 21.830985915492956, "grad_norm": 0.5114145874977112, "learning_rate": 6.0511215440792915e-05, "loss": 0.0353, "step": 1550 }, { "epoch": 21.971830985915492, "grad_norm": 0.2956676781177521, "learning_rate": 5.9467918622848205e-05, "loss": 0.0391, "step": 1560 }, { "epoch": 22.112676056338028, "grad_norm": 0.44752395153045654, "learning_rate": 5.84246218049035e-05, "loss": 0.037, "step": 1570 }, { "epoch": 22.253521126760564, "grad_norm": 0.633104681968689, "learning_rate": 5.73813249869588e-05, "loss": 0.0316, "step": 1580 }, { "epoch": 22.3943661971831, "grad_norm": 0.16630919277668, "learning_rate": 5.633802816901409e-05, "loss": 0.028, "step": 1590 }, { "epoch": 22.535211267605632, "grad_norm": 0.24732300639152527, "learning_rate": 5.529473135106938e-05, "loss": 0.0275, "step": 1600 }, { "epoch": 22.535211267605632, "eval_accuracy": 0.9436619718309859, "eval_f1": 0.8548094373865699, "eval_loss": 0.18389663100242615, "eval_precision": 0.8610603290676416, "eval_recall": 0.8486486486486486, "eval_runtime": 2.0239, "eval_samples_per_second": 140.322, "eval_steps_per_second": 17.787, "step": 1600 }, { "epoch": 22.676056338028168, "grad_norm": 0.7257568836212158, "learning_rate": 5.425143453312468e-05, "loss": 0.025, "step": 1610 }, { "epoch": 22.816901408450704, "grad_norm": 0.22960269451141357, "learning_rate": 5.3208137715179974e-05, "loss": 0.0197, "step": 1620 }, { "epoch": 22.95774647887324, "grad_norm": 0.5778278112411499, "learning_rate": 5.216484089723527e-05, "loss": 0.0304, "step": 1630 }, { "epoch": 23.098591549295776, "grad_norm": 0.2570374608039856, "learning_rate": 5.112154407929056e-05, "loss": 0.0314, "step": 1640 }, { "epoch": 23.239436619718308, "grad_norm": 0.9027012586593628, "learning_rate": 5.0078247261345856e-05, "loss": 0.0323, "step": 1650 }, { "epoch": 23.380281690140844, "grad_norm": 0.14204645156860352, "learning_rate": 4.903495044340115e-05, "loss": 0.0248, "step": 1660 }, { "epoch": 23.52112676056338, "grad_norm": 1.2641935348510742, "learning_rate": 4.799165362545645e-05, "loss": 0.0279, "step": 1670 }, { "epoch": 23.661971830985916, "grad_norm": 0.4698958098888397, "learning_rate": 4.694835680751174e-05, "loss": 0.0414, "step": 1680 }, { "epoch": 23.802816901408452, "grad_norm": 0.35781243443489075, "learning_rate": 4.5905059989567033e-05, "loss": 0.0297, "step": 1690 }, { "epoch": 23.943661971830984, "grad_norm": 0.21168813109397888, "learning_rate": 4.486176317162233e-05, "loss": 0.0231, "step": 1700 }, { "epoch": 23.943661971830984, "eval_accuracy": 0.9415492957746479, "eval_f1": 0.847985347985348, "eval_loss": 0.18647325038909912, "eval_precision": 0.8621973929236499, "eval_recall": 0.8342342342342343, "eval_runtime": 2.0446, "eval_samples_per_second": 138.905, "eval_steps_per_second": 17.608, "step": 1700 }, { "epoch": 24.08450704225352, "grad_norm": 0.10770827531814575, "learning_rate": 4.3818466353677626e-05, "loss": 0.0287, "step": 1710 }, { "epoch": 24.225352112676056, "grad_norm": 0.4609077274799347, "learning_rate": 4.2775169535732915e-05, "loss": 0.0233, "step": 1720 }, { "epoch": 24.366197183098592, "grad_norm": 0.25236785411834717, "learning_rate": 4.173187271778821e-05, "loss": 0.0217, "step": 1730 }, { "epoch": 24.507042253521128, "grad_norm": 0.5101929903030396, "learning_rate": 4.068857589984351e-05, "loss": 0.034, "step": 1740 }, { "epoch": 24.647887323943664, "grad_norm": 0.47603026032447815, "learning_rate": 3.96452790818988e-05, "loss": 0.0259, "step": 1750 }, { "epoch": 24.788732394366196, "grad_norm": 0.27863284945487976, "learning_rate": 3.860198226395409e-05, "loss": 0.0238, "step": 1760 }, { "epoch": 24.929577464788732, "grad_norm": 0.3325282633304596, "learning_rate": 3.755868544600939e-05, "loss": 0.0361, "step": 1770 }, { "epoch": 25.070422535211268, "grad_norm": 0.05602555721998215, "learning_rate": 3.6515388628064685e-05, "loss": 0.0257, "step": 1780 }, { "epoch": 25.211267605633804, "grad_norm": 0.447354257106781, "learning_rate": 3.547209181011998e-05, "loss": 0.0223, "step": 1790 }, { "epoch": 25.352112676056336, "grad_norm": 0.6506637334823608, "learning_rate": 3.442879499217527e-05, "loss": 0.0204, "step": 1800 }, { "epoch": 25.352112676056336, "eval_accuracy": 0.9404929577464789, "eval_f1": 0.8481581311769991, "eval_loss": 0.18843932449817657, "eval_precision": 0.8458781362007168, "eval_recall": 0.8504504504504504, "eval_runtime": 2.5356, "eval_samples_per_second": 112.004, "eval_steps_per_second": 14.198, "step": 1800 }, { "epoch": 25.492957746478872, "grad_norm": 0.920504629611969, "learning_rate": 3.338549817423057e-05, "loss": 0.0348, "step": 1810 }, { "epoch": 25.633802816901408, "grad_norm": 0.6100931763648987, "learning_rate": 3.234220135628587e-05, "loss": 0.0364, "step": 1820 }, { "epoch": 25.774647887323944, "grad_norm": 0.3321876525878906, "learning_rate": 3.1298904538341165e-05, "loss": 0.0237, "step": 1830 }, { "epoch": 25.91549295774648, "grad_norm": 0.14392893016338348, "learning_rate": 3.0255607720396458e-05, "loss": 0.0292, "step": 1840 }, { "epoch": 26.056338028169016, "grad_norm": 0.2463379055261612, "learning_rate": 2.921231090245175e-05, "loss": 0.0227, "step": 1850 }, { "epoch": 26.197183098591548, "grad_norm": 0.13013498485088348, "learning_rate": 2.8169014084507046e-05, "loss": 0.0238, "step": 1860 }, { "epoch": 26.338028169014084, "grad_norm": 0.11781849712133408, "learning_rate": 2.712571726656234e-05, "loss": 0.0348, "step": 1870 }, { "epoch": 26.47887323943662, "grad_norm": 0.19677963852882385, "learning_rate": 2.6082420448617635e-05, "loss": 0.0222, "step": 1880 }, { "epoch": 26.619718309859156, "grad_norm": 0.32697793841362, "learning_rate": 2.5039123630672928e-05, "loss": 0.0178, "step": 1890 }, { "epoch": 26.760563380281692, "grad_norm": 0.32760560512542725, "learning_rate": 2.3995826812728224e-05, "loss": 0.0245, "step": 1900 }, { "epoch": 26.760563380281692, "eval_accuracy": 0.9376760563380282, "eval_f1": 0.8409703504043127, "eval_loss": 0.19348740577697754, "eval_precision": 0.8387096774193549, "eval_recall": 0.8432432432432433, "eval_runtime": 3.7052, "eval_samples_per_second": 76.649, "eval_steps_per_second": 9.716, "step": 1900 }, { "epoch": 26.901408450704224, "grad_norm": 0.23730838298797607, "learning_rate": 2.2952529994783517e-05, "loss": 0.0246, "step": 1910 }, { "epoch": 27.04225352112676, "grad_norm": 1.0503803491592407, "learning_rate": 2.1909233176838813e-05, "loss": 0.0235, "step": 1920 }, { "epoch": 27.183098591549296, "grad_norm": 0.8191851377487183, "learning_rate": 2.0865936358894105e-05, "loss": 0.0158, "step": 1930 }, { "epoch": 27.323943661971832, "grad_norm": 0.2159469872713089, "learning_rate": 1.98226395409494e-05, "loss": 0.0123, "step": 1940 }, { "epoch": 27.464788732394368, "grad_norm": 0.11513730883598328, "learning_rate": 1.8779342723004694e-05, "loss": 0.0202, "step": 1950 }, { "epoch": 27.6056338028169, "grad_norm": 0.43825531005859375, "learning_rate": 1.773604590505999e-05, "loss": 0.0189, "step": 1960 }, { "epoch": 27.746478873239436, "grad_norm": 0.6861417293548584, "learning_rate": 1.6692749087115286e-05, "loss": 0.0293, "step": 1970 }, { "epoch": 27.887323943661972, "grad_norm": 0.4493369162082672, "learning_rate": 1.5649452269170582e-05, "loss": 0.0169, "step": 1980 }, { "epoch": 28.028169014084508, "grad_norm": 0.07595561444759369, "learning_rate": 1.4606155451225875e-05, "loss": 0.0284, "step": 1990 }, { "epoch": 28.169014084507044, "grad_norm": 0.29891517758369446, "learning_rate": 1.356285863328117e-05, "loss": 0.0202, "step": 2000 }, { "epoch": 28.169014084507044, "eval_accuracy": 0.9394366197183098, "eval_f1": 0.8456014362657092, "eval_loss": 0.18883755803108215, "eval_precision": 0.8425760286225402, "eval_recall": 0.8486486486486486, "eval_runtime": 2.034, "eval_samples_per_second": 139.628, "eval_steps_per_second": 17.699, "step": 2000 }, { "epoch": 28.309859154929576, "grad_norm": 0.08975574374198914, "learning_rate": 1.2519561815336464e-05, "loss": 0.0133, "step": 2010 }, { "epoch": 28.450704225352112, "grad_norm": 0.14954060316085815, "learning_rate": 1.1476264997391758e-05, "loss": 0.0155, "step": 2020 }, { "epoch": 28.591549295774648, "grad_norm": 0.32284414768218994, "learning_rate": 1.0432968179447053e-05, "loss": 0.0213, "step": 2030 }, { "epoch": 28.732394366197184, "grad_norm": 0.3323783278465271, "learning_rate": 9.389671361502347e-06, "loss": 0.0186, "step": 2040 }, { "epoch": 28.87323943661972, "grad_norm": 0.22159124910831451, "learning_rate": 8.346374543557643e-06, "loss": 0.0298, "step": 2050 }, { "epoch": 29.014084507042252, "grad_norm": 0.13430196046829224, "learning_rate": 7.303077725612938e-06, "loss": 0.0156, "step": 2060 }, { "epoch": 29.154929577464788, "grad_norm": 0.19941723346710205, "learning_rate": 6.259780907668232e-06, "loss": 0.0144, "step": 2070 }, { "epoch": 29.295774647887324, "grad_norm": 0.049271322786808014, "learning_rate": 5.216484089723526e-06, "loss": 0.0243, "step": 2080 }, { "epoch": 29.43661971830986, "grad_norm": 0.5687671899795532, "learning_rate": 4.173187271778822e-06, "loss": 0.0143, "step": 2090 }, { "epoch": 29.577464788732396, "grad_norm": 0.10554461926221848, "learning_rate": 3.129890453834116e-06, "loss": 0.0187, "step": 2100 }, { "epoch": 29.577464788732396, "eval_accuracy": 0.9415492957746479, "eval_f1": 0.8501805054151624, "eval_loss": 0.19139410555362701, "eval_precision": 0.8517179023508138, "eval_recall": 0.8486486486486486, "eval_runtime": 2.1198, "eval_samples_per_second": 133.977, "eval_steps_per_second": 16.983, "step": 2100 }, { "epoch": 29.718309859154928, "grad_norm": 0.6691368818283081, "learning_rate": 2.086593635889411e-06, "loss": 0.0132, "step": 2110 }, { "epoch": 29.859154929577464, "grad_norm": 0.6403772234916687, "learning_rate": 1.0432968179447054e-06, "loss": 0.0282, "step": 2120 }, { "epoch": 30.0, "grad_norm": 0.040005020797252655, "learning_rate": 0.0, "loss": 0.0234, "step": 2130 }, { "epoch": 30.0, "step": 2130, "total_flos": 2.6364665116584346e+18, "train_loss": 0.08832058483656023, "train_runtime": 625.5786, "train_samples_per_second": 54.382, "train_steps_per_second": 3.405 }, { "epoch": 30.0, "eval_accuracy": 0.9411971830985916, "eval_f1": 0.8485947416137806, "eval_loss": 0.17164453864097595, "eval_precision": 0.8540145985401459, "eval_recall": 0.8432432432432433, "eval_runtime": 2.933, "eval_samples_per_second": 96.828, "eval_steps_per_second": 12.274, "step": 2130 }, { "epoch": 30.0, "eval_accuracy": 0.9411971830985916, "eval_f1": 0.8485947416137806, "eval_loss": 0.17164453864097595, "eval_precision": 0.8540145985401459, "eval_recall": 0.8432432432432433, "eval_runtime": 4.3995, "eval_samples_per_second": 64.552, "eval_steps_per_second": 8.183, "step": 2130 }, { "epoch": 30.0, "eval_accuracy": 0.9411971830985916, "eval_f1": 0.8485947416137806, "eval_loss": 0.17164453864097595, "eval_precision": 0.8540145985401459, "eval_recall": 0.8432432432432433, "eval_runtime": 2.3539, "eval_samples_per_second": 120.651, "eval_steps_per_second": 15.294, "step": 2130 }, { "epoch": 30.0, "eval_accuracy": 0.9411971830985916, "eval_f1": 0.8485947416137806, "eval_loss": 0.17164453864097595, "eval_precision": 0.8540145985401459, "eval_recall": 0.8432432432432433, "eval_runtime": 3.881, "eval_samples_per_second": 73.177, "eval_steps_per_second": 9.276, "step": 2130 } ], "logging_steps": 10, "max_steps": 2130, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.6364665116584346e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }