|
{ |
|
"best_metric": 0.18472303450107574, |
|
"best_model_checkpoint": "frost-vision-v2-google_vit-base-patch16-384-v2024-11-10/checkpoint-500", |
|
"epoch": 30.0, |
|
"eval_steps": 100, |
|
"global_step": 2130, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14084507042253522, |
|
"grad_norm": 1.9532430171966553, |
|
"learning_rate": 9.389671361502347e-06, |
|
"loss": 0.7081, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.28169014084507044, |
|
"grad_norm": 1.4905153512954712, |
|
"learning_rate": 1.8779342723004694e-05, |
|
"loss": 0.5966, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4225352112676056, |
|
"grad_norm": 1.0629397630691528, |
|
"learning_rate": 2.8169014084507046e-05, |
|
"loss": 0.4634, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"grad_norm": 1.1944268941879272, |
|
"learning_rate": 3.755868544600939e-05, |
|
"loss": 0.4103, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.704225352112676, |
|
"grad_norm": 1.0600364208221436, |
|
"learning_rate": 4.694835680751174e-05, |
|
"loss": 0.3456, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8450704225352113, |
|
"grad_norm": 0.9409961700439453, |
|
"learning_rate": 5.633802816901409e-05, |
|
"loss": 0.2917, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.9859154929577465, |
|
"grad_norm": 0.9866936206817627, |
|
"learning_rate": 6.572769953051644e-05, |
|
"loss": 0.2536, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1267605633802817, |
|
"grad_norm": 0.8040021657943726, |
|
"learning_rate": 7.511737089201878e-05, |
|
"loss": 0.236, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.267605633802817, |
|
"grad_norm": 0.7174156904220581, |
|
"learning_rate": 8.450704225352113e-05, |
|
"loss": 0.2305, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"grad_norm": 1.468618631362915, |
|
"learning_rate": 9.389671361502347e-05, |
|
"loss": 0.2243, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.408450704225352, |
|
"eval_accuracy": 0.9242957746478874, |
|
"eval_f1": 0.7981220657276995, |
|
"eval_loss": 0.20880411565303802, |
|
"eval_precision": 0.8534136546184738, |
|
"eval_recall": 0.7495590828924162, |
|
"eval_runtime": 4.7453, |
|
"eval_samples_per_second": 59.848, |
|
"eval_steps_per_second": 7.586, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.5492957746478875, |
|
"grad_norm": 1.7064496278762817, |
|
"learning_rate": 0.00010328638497652582, |
|
"loss": 0.2272, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.6901408450704225, |
|
"grad_norm": 0.8617244362831116, |
|
"learning_rate": 0.00011267605633802819, |
|
"loss": 0.2142, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.8309859154929577, |
|
"grad_norm": 0.8898526430130005, |
|
"learning_rate": 0.00012206572769953053, |
|
"loss": 0.2208, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.971830985915493, |
|
"grad_norm": 1.2177144289016724, |
|
"learning_rate": 0.00013145539906103288, |
|
"loss": 0.2147, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.112676056338028, |
|
"grad_norm": 1.7734003067016602, |
|
"learning_rate": 0.00014084507042253522, |
|
"loss": 0.1993, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.2535211267605635, |
|
"grad_norm": 0.9507364630699158, |
|
"learning_rate": 0.00015023474178403755, |
|
"loss": 0.1806, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.3943661971830985, |
|
"grad_norm": 0.9511057734489441, |
|
"learning_rate": 0.00015962441314553992, |
|
"loss": 0.1913, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.535211267605634, |
|
"grad_norm": 1.419704556465149, |
|
"learning_rate": 0.00016901408450704225, |
|
"loss": 0.1949, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.676056338028169, |
|
"grad_norm": 0.8608860373497009, |
|
"learning_rate": 0.00017840375586854461, |
|
"loss": 0.1907, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"grad_norm": 1.1266043186187744, |
|
"learning_rate": 0.00018779342723004695, |
|
"loss": 0.2438, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.816901408450704, |
|
"eval_accuracy": 0.9299295774647888, |
|
"eval_f1": 0.8102955195424214, |
|
"eval_loss": 0.18185150623321533, |
|
"eval_precision": 0.8817427385892116, |
|
"eval_recall": 0.7495590828924162, |
|
"eval_runtime": 4.6977, |
|
"eval_samples_per_second": 60.455, |
|
"eval_steps_per_second": 7.663, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.9577464788732395, |
|
"grad_norm": 0.9358466267585754, |
|
"learning_rate": 0.0001971830985915493, |
|
"loss": 0.1805, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.0985915492957745, |
|
"grad_norm": 0.5238935351371765, |
|
"learning_rate": 0.00019926969222743872, |
|
"loss": 0.1614, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.23943661971831, |
|
"grad_norm": 0.8152980208396912, |
|
"learning_rate": 0.00019822639540949402, |
|
"loss": 0.1632, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.380281690140845, |
|
"grad_norm": 0.8670392036437988, |
|
"learning_rate": 0.0001971830985915493, |
|
"loss": 0.17, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.52112676056338, |
|
"grad_norm": 0.719203531742096, |
|
"learning_rate": 0.0001961398017736046, |
|
"loss": 0.1843, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.6619718309859155, |
|
"grad_norm": 1.1024322509765625, |
|
"learning_rate": 0.0001950965049556599, |
|
"loss": 0.1843, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.802816901408451, |
|
"grad_norm": 1.002933382987976, |
|
"learning_rate": 0.0001940532081377152, |
|
"loss": 0.1657, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.943661971830986, |
|
"grad_norm": 0.6921798586845398, |
|
"learning_rate": 0.0001930099113197705, |
|
"loss": 0.1881, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.084507042253521, |
|
"grad_norm": 0.5872893333435059, |
|
"learning_rate": 0.0001919666145018258, |
|
"loss": 0.1775, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"grad_norm": 0.7991660833358765, |
|
"learning_rate": 0.0001909233176838811, |
|
"loss": 0.1338, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.225352112676056, |
|
"eval_accuracy": 0.9376760563380282, |
|
"eval_f1": 0.8448729184925504, |
|
"eval_loss": 0.1607813537120819, |
|
"eval_precision": 0.8397212543554007, |
|
"eval_recall": 0.8500881834215167, |
|
"eval_runtime": 4.716, |
|
"eval_samples_per_second": 60.22, |
|
"eval_steps_per_second": 7.634, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.366197183098592, |
|
"grad_norm": 0.3925558030605316, |
|
"learning_rate": 0.00018988002086593636, |
|
"loss": 0.1397, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.507042253521127, |
|
"grad_norm": 0.9107651114463806, |
|
"learning_rate": 0.00018883672404799165, |
|
"loss": 0.1328, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.647887323943662, |
|
"grad_norm": 0.6482114791870117, |
|
"learning_rate": 0.00018779342723004695, |
|
"loss": 0.1343, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.788732394366197, |
|
"grad_norm": 0.8836609125137329, |
|
"learning_rate": 0.00018675013041210225, |
|
"loss": 0.1527, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.929577464788732, |
|
"grad_norm": 0.8424317240715027, |
|
"learning_rate": 0.00018570683359415754, |
|
"loss": 0.1338, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.070422535211268, |
|
"grad_norm": 0.824109673500061, |
|
"learning_rate": 0.00018466353677621284, |
|
"loss": 0.1455, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.211267605633803, |
|
"grad_norm": 0.7583167552947998, |
|
"learning_rate": 0.00018362023995826813, |
|
"loss": 0.1045, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.352112676056338, |
|
"grad_norm": 0.9834734797477722, |
|
"learning_rate": 0.00018257694314032343, |
|
"loss": 0.1297, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.492957746478873, |
|
"grad_norm": 0.6775982975959778, |
|
"learning_rate": 0.00018153364632237873, |
|
"loss": 0.1382, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"grad_norm": 0.4346470236778259, |
|
"learning_rate": 0.00018049034950443402, |
|
"loss": 0.1224, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.633802816901408, |
|
"eval_accuracy": 0.9271126760563381, |
|
"eval_f1": 0.8179419525065963, |
|
"eval_loss": 0.17346476018428802, |
|
"eval_precision": 0.8157894736842105, |
|
"eval_recall": 0.8201058201058201, |
|
"eval_runtime": 4.3151, |
|
"eval_samples_per_second": 65.816, |
|
"eval_steps_per_second": 8.343, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.774647887323944, |
|
"grad_norm": 0.6250389218330383, |
|
"learning_rate": 0.00017944705268648932, |
|
"loss": 0.1535, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.915492957746479, |
|
"grad_norm": 0.7581302523612976, |
|
"learning_rate": 0.00017840375586854461, |
|
"loss": 0.1217, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.056338028169014, |
|
"grad_norm": 1.0700663328170776, |
|
"learning_rate": 0.0001773604590505999, |
|
"loss": 0.1206, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.197183098591549, |
|
"grad_norm": 0.9656069278717041, |
|
"learning_rate": 0.0001763171622326552, |
|
"loss": 0.1063, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.338028169014084, |
|
"grad_norm": 0.8910039067268372, |
|
"learning_rate": 0.0001752738654147105, |
|
"loss": 0.1105, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.47887323943662, |
|
"grad_norm": 1.0498372316360474, |
|
"learning_rate": 0.0001742305685967658, |
|
"loss": 0.1064, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.619718309859155, |
|
"grad_norm": 0.8038674592971802, |
|
"learning_rate": 0.00017318727177882107, |
|
"loss": 0.1048, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.76056338028169, |
|
"grad_norm": 0.5430222749710083, |
|
"learning_rate": 0.00017214397496087636, |
|
"loss": 0.1081, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.901408450704225, |
|
"grad_norm": 0.6888221502304077, |
|
"learning_rate": 0.00017110067814293166, |
|
"loss": 0.1176, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"grad_norm": 0.5913430452346802, |
|
"learning_rate": 0.00017005738132498696, |
|
"loss": 0.1065, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.042253521126761, |
|
"eval_accuracy": 0.9274647887323944, |
|
"eval_f1": 0.8186619718309859, |
|
"eval_loss": 0.18472303450107574, |
|
"eval_precision": 0.8172231985940246, |
|
"eval_recall": 0.8201058201058201, |
|
"eval_runtime": 4.2833, |
|
"eval_samples_per_second": 66.305, |
|
"eval_steps_per_second": 8.405, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.183098591549296, |
|
"grad_norm": 0.7211888432502747, |
|
"learning_rate": 0.00016901408450704225, |
|
"loss": 0.0907, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.323943661971831, |
|
"grad_norm": 0.9712802767753601, |
|
"learning_rate": 0.00016797078768909755, |
|
"loss": 0.1174, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 7.464788732394366, |
|
"grad_norm": 0.5647371411323547, |
|
"learning_rate": 0.00016692749087115284, |
|
"loss": 0.1188, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 7.605633802816901, |
|
"grad_norm": 0.8738060593605042, |
|
"learning_rate": 0.00016588419405320814, |
|
"loss": 0.1261, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 7.746478873239437, |
|
"grad_norm": 0.4535655081272125, |
|
"learning_rate": 0.00016484089723526344, |
|
"loss": 0.0865, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 7.887323943661972, |
|
"grad_norm": 0.4072222411632538, |
|
"learning_rate": 0.00016379760041731873, |
|
"loss": 0.0974, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.028169014084508, |
|
"grad_norm": 0.36978864669799805, |
|
"learning_rate": 0.00016275430359937403, |
|
"loss": 0.0989, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.169014084507042, |
|
"grad_norm": 0.6507691740989685, |
|
"learning_rate": 0.00016171100678142932, |
|
"loss": 0.0989, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 8.309859154929578, |
|
"grad_norm": 0.8923565149307251, |
|
"learning_rate": 0.00016066770996348462, |
|
"loss": 0.0901, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 8.450704225352112, |
|
"grad_norm": 0.8466383814811707, |
|
"learning_rate": 0.00015962441314553992, |
|
"loss": 0.1008, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.450704225352112, |
|
"eval_accuracy": 0.9404929577464789, |
|
"eval_f1": 0.8505747126436781, |
|
"eval_loss": 0.17097999155521393, |
|
"eval_precision": 0.8528368794326241, |
|
"eval_recall": 0.8483245149911817, |
|
"eval_runtime": 3.9495, |
|
"eval_samples_per_second": 71.908, |
|
"eval_steps_per_second": 9.115, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.591549295774648, |
|
"grad_norm": 0.568720817565918, |
|
"learning_rate": 0.0001585811163275952, |
|
"loss": 0.084, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 8.732394366197184, |
|
"grad_norm": 0.6025291085243225, |
|
"learning_rate": 0.0001575378195096505, |
|
"loss": 0.1059, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 8.873239436619718, |
|
"grad_norm": 0.6547625660896301, |
|
"learning_rate": 0.0001564945226917058, |
|
"loss": 0.0871, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.014084507042254, |
|
"grad_norm": 0.46392133831977844, |
|
"learning_rate": 0.00015545122587376107, |
|
"loss": 0.0856, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 9.154929577464788, |
|
"grad_norm": 0.29296088218688965, |
|
"learning_rate": 0.00015440792905581637, |
|
"loss": 0.0883, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 9.295774647887324, |
|
"grad_norm": 0.6499446630477905, |
|
"learning_rate": 0.00015336463223787167, |
|
"loss": 0.0632, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.43661971830986, |
|
"grad_norm": 0.27808037400245667, |
|
"learning_rate": 0.00015232133541992696, |
|
"loss": 0.0731, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 9.577464788732394, |
|
"grad_norm": 0.6585447788238525, |
|
"learning_rate": 0.00015127803860198226, |
|
"loss": 0.073, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 9.71830985915493, |
|
"grad_norm": 0.5550310611724854, |
|
"learning_rate": 0.00015023474178403755, |
|
"loss": 0.0762, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 9.859154929577464, |
|
"grad_norm": 0.8603278398513794, |
|
"learning_rate": 0.00014919144496609285, |
|
"loss": 0.1005, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 9.859154929577464, |
|
"eval_accuracy": 0.9383802816901409, |
|
"eval_f1": 0.8404740200546946, |
|
"eval_loss": 0.18234650790691376, |
|
"eval_precision": 0.869811320754717, |
|
"eval_recall": 0.8130511463844797, |
|
"eval_runtime": 4.0053, |
|
"eval_samples_per_second": 70.906, |
|
"eval_steps_per_second": 8.988, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.5497912168502808, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.0828, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 10.140845070422536, |
|
"grad_norm": 0.5562108755111694, |
|
"learning_rate": 0.00014710485133020344, |
|
"loss": 0.0771, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.28169014084507, |
|
"grad_norm": 0.5177081823348999, |
|
"learning_rate": 0.00014606155451225874, |
|
"loss": 0.0699, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 10.422535211267606, |
|
"grad_norm": 1.1959189176559448, |
|
"learning_rate": 0.00014501825769431403, |
|
"loss": 0.0706, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 10.56338028169014, |
|
"grad_norm": 0.41302013397216797, |
|
"learning_rate": 0.00014397496087636933, |
|
"loss": 0.0562, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 10.704225352112676, |
|
"grad_norm": 0.8963214159011841, |
|
"learning_rate": 0.00014293166405842463, |
|
"loss": 0.0795, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 10.845070422535212, |
|
"grad_norm": 0.5553390979766846, |
|
"learning_rate": 0.00014188836724047992, |
|
"loss": 0.0841, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 10.985915492957746, |
|
"grad_norm": 0.5120792984962463, |
|
"learning_rate": 0.00014084507042253522, |
|
"loss": 0.0571, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.126760563380282, |
|
"grad_norm": 0.8299622535705566, |
|
"learning_rate": 0.00013980177360459051, |
|
"loss": 0.0674, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 11.267605633802816, |
|
"grad_norm": 0.8283889889717102, |
|
"learning_rate": 0.00013875847678664578, |
|
"loss": 0.0756, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.267605633802816, |
|
"eval_accuracy": 0.9415492957746479, |
|
"eval_f1": 0.8520499108734403, |
|
"eval_loss": 0.1770765483379364, |
|
"eval_precision": 0.8612612612612612, |
|
"eval_recall": 0.8430335097001763, |
|
"eval_runtime": 3.9753, |
|
"eval_samples_per_second": 71.441, |
|
"eval_steps_per_second": 9.056, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.408450704225352, |
|
"grad_norm": 0.7964696288108826, |
|
"learning_rate": 0.00013771517996870108, |
|
"loss": 0.0674, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 11.549295774647888, |
|
"grad_norm": 0.5819689631462097, |
|
"learning_rate": 0.00013667188315075638, |
|
"loss": 0.0744, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 11.690140845070422, |
|
"grad_norm": 0.4807089865207672, |
|
"learning_rate": 0.0001356285863328117, |
|
"loss": 0.09, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 11.830985915492958, |
|
"grad_norm": 0.4166727364063263, |
|
"learning_rate": 0.000134585289514867, |
|
"loss": 0.057, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 11.971830985915492, |
|
"grad_norm": 0.5973660349845886, |
|
"learning_rate": 0.0001335419926969223, |
|
"loss": 0.0678, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 12.112676056338028, |
|
"grad_norm": 0.8271676898002625, |
|
"learning_rate": 0.0001324986958789776, |
|
"loss": 0.0676, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 12.253521126760564, |
|
"grad_norm": 0.4148211181163788, |
|
"learning_rate": 0.00013145539906103288, |
|
"loss": 0.0724, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 12.394366197183098, |
|
"grad_norm": 0.8119566440582275, |
|
"learning_rate": 0.00013041210224308818, |
|
"loss": 0.0513, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 12.535211267605634, |
|
"grad_norm": 0.3651484549045563, |
|
"learning_rate": 0.00012936880542514348, |
|
"loss": 0.0681, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 12.676056338028168, |
|
"grad_norm": 0.5199844241142273, |
|
"learning_rate": 0.00012832550860719877, |
|
"loss": 0.0653, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.676056338028168, |
|
"eval_accuracy": 0.9323943661971831, |
|
"eval_f1": 0.8309859154929577, |
|
"eval_loss": 0.1971179097890854, |
|
"eval_precision": 0.8295254833040422, |
|
"eval_recall": 0.8324514991181657, |
|
"eval_runtime": 4.0066, |
|
"eval_samples_per_second": 70.884, |
|
"eval_steps_per_second": 8.985, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.816901408450704, |
|
"grad_norm": 0.5670329332351685, |
|
"learning_rate": 0.00012728221178925407, |
|
"loss": 0.0619, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 12.95774647887324, |
|
"grad_norm": 0.5461100935935974, |
|
"learning_rate": 0.00012623891497130936, |
|
"loss": 0.0411, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 13.098591549295774, |
|
"grad_norm": 0.5733127593994141, |
|
"learning_rate": 0.00012519561815336466, |
|
"loss": 0.0544, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.23943661971831, |
|
"grad_norm": 1.001535177230835, |
|
"learning_rate": 0.00012415232133541993, |
|
"loss": 0.0723, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 13.380281690140846, |
|
"grad_norm": 0.4734277129173279, |
|
"learning_rate": 0.00012310902451747523, |
|
"loss": 0.0457, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 13.52112676056338, |
|
"grad_norm": 0.41569235920906067, |
|
"learning_rate": 0.00012206572769953053, |
|
"loss": 0.0611, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 13.661971830985916, |
|
"grad_norm": 0.656200110912323, |
|
"learning_rate": 0.00012102243088158583, |
|
"loss": 0.0578, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 13.80281690140845, |
|
"grad_norm": 0.4374938905239105, |
|
"learning_rate": 0.00011997913406364111, |
|
"loss": 0.0493, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 13.943661971830986, |
|
"grad_norm": 0.4476602077484131, |
|
"learning_rate": 0.00011893583724569641, |
|
"loss": 0.0645, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.084507042253522, |
|
"grad_norm": 0.4841475784778595, |
|
"learning_rate": 0.0001178925404277517, |
|
"loss": 0.0367, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.084507042253522, |
|
"eval_accuracy": 0.9295774647887324, |
|
"eval_f1": 0.8220640569395018, |
|
"eval_loss": 0.2123415768146515, |
|
"eval_precision": 0.829443447037702, |
|
"eval_recall": 0.8148148148148148, |
|
"eval_runtime": 3.9803, |
|
"eval_samples_per_second": 71.351, |
|
"eval_steps_per_second": 9.045, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.225352112676056, |
|
"grad_norm": 0.36940839886665344, |
|
"learning_rate": 0.000116849243609807, |
|
"loss": 0.0583, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 14.366197183098592, |
|
"grad_norm": 0.360748291015625, |
|
"learning_rate": 0.0001158059467918623, |
|
"loss": 0.0557, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 14.507042253521126, |
|
"grad_norm": 0.5215537548065186, |
|
"learning_rate": 0.0001147626499739176, |
|
"loss": 0.0687, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 14.647887323943662, |
|
"grad_norm": 0.504537045955658, |
|
"learning_rate": 0.00011371935315597289, |
|
"loss": 0.07, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 14.788732394366198, |
|
"grad_norm": 0.6165650486946106, |
|
"learning_rate": 0.00011267605633802819, |
|
"loss": 0.0527, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 14.929577464788732, |
|
"grad_norm": 0.4196755588054657, |
|
"learning_rate": 0.00011163275952008347, |
|
"loss": 0.039, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 15.070422535211268, |
|
"grad_norm": 0.789492130279541, |
|
"learning_rate": 0.00011058946270213876, |
|
"loss": 0.0518, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 15.211267605633802, |
|
"grad_norm": 0.3040258288383484, |
|
"learning_rate": 0.00010954616588419406, |
|
"loss": 0.0426, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 15.352112676056338, |
|
"grad_norm": 0.5353887677192688, |
|
"learning_rate": 0.00010850286906624936, |
|
"loss": 0.06, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 15.492957746478874, |
|
"grad_norm": 0.4748779833316803, |
|
"learning_rate": 0.00010745957224830465, |
|
"loss": 0.0459, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.492957746478874, |
|
"eval_accuracy": 0.9334507042253521, |
|
"eval_f1": 0.832, |
|
"eval_loss": 0.20056356489658356, |
|
"eval_precision": 0.8387096774193549, |
|
"eval_recall": 0.8253968253968254, |
|
"eval_runtime": 3.8873, |
|
"eval_samples_per_second": 73.058, |
|
"eval_steps_per_second": 9.261, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.633802816901408, |
|
"grad_norm": 1.0769853591918945, |
|
"learning_rate": 0.00010641627543035995, |
|
"loss": 0.0564, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 15.774647887323944, |
|
"grad_norm": 0.2092517763376236, |
|
"learning_rate": 0.00010537297861241524, |
|
"loss": 0.037, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 15.915492957746478, |
|
"grad_norm": 0.4689568877220154, |
|
"learning_rate": 0.00010432968179447054, |
|
"loss": 0.0401, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 16.056338028169016, |
|
"grad_norm": 0.2354661524295807, |
|
"learning_rate": 0.00010328638497652582, |
|
"loss": 0.0459, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 16.197183098591548, |
|
"grad_norm": 0.11875198781490326, |
|
"learning_rate": 0.00010224308815858112, |
|
"loss": 0.0369, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 16.338028169014084, |
|
"grad_norm": 0.5551555156707764, |
|
"learning_rate": 0.00010119979134063642, |
|
"loss": 0.034, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 16.47887323943662, |
|
"grad_norm": 0.35518983006477356, |
|
"learning_rate": 0.00010015649452269171, |
|
"loss": 0.0523, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 16.619718309859156, |
|
"grad_norm": 0.6546155214309692, |
|
"learning_rate": 9.911319770474701e-05, |
|
"loss": 0.0472, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 16.760563380281692, |
|
"grad_norm": 1.1401140689849854, |
|
"learning_rate": 9.80699008868023e-05, |
|
"loss": 0.0565, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 16.901408450704224, |
|
"grad_norm": 0.5762555599212646, |
|
"learning_rate": 9.70266040688576e-05, |
|
"loss": 0.0559, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.901408450704224, |
|
"eval_accuracy": 0.9313380281690141, |
|
"eval_f1": 0.8232094288304623, |
|
"eval_loss": 0.20966318249702454, |
|
"eval_precision": 0.8470149253731343, |
|
"eval_recall": 0.800705467372134, |
|
"eval_runtime": 3.9873, |
|
"eval_samples_per_second": 71.227, |
|
"eval_steps_per_second": 9.029, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 17.04225352112676, |
|
"grad_norm": 0.5705350637435913, |
|
"learning_rate": 9.59833072509129e-05, |
|
"loss": 0.0455, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 17.183098591549296, |
|
"grad_norm": 0.08219115436077118, |
|
"learning_rate": 9.494001043296818e-05, |
|
"loss": 0.029, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 17.323943661971832, |
|
"grad_norm": 0.41505807638168335, |
|
"learning_rate": 9.389671361502347e-05, |
|
"loss": 0.05, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 17.464788732394368, |
|
"grad_norm": 0.5585283637046814, |
|
"learning_rate": 9.285341679707877e-05, |
|
"loss": 0.0424, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 17.6056338028169, |
|
"grad_norm": 0.4773949980735779, |
|
"learning_rate": 9.181011997913407e-05, |
|
"loss": 0.043, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 17.746478873239436, |
|
"grad_norm": 0.5886436700820923, |
|
"learning_rate": 9.076682316118936e-05, |
|
"loss": 0.043, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 17.887323943661972, |
|
"grad_norm": 0.45917659997940063, |
|
"learning_rate": 8.972352634324466e-05, |
|
"loss": 0.0444, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 18.028169014084508, |
|
"grad_norm": 0.5295466780662537, |
|
"learning_rate": 8.868022952529996e-05, |
|
"loss": 0.0385, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 18.169014084507044, |
|
"grad_norm": 0.24455229938030243, |
|
"learning_rate": 8.763693270735525e-05, |
|
"loss": 0.0263, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 18.309859154929576, |
|
"grad_norm": 0.34424206614494324, |
|
"learning_rate": 8.659363588941053e-05, |
|
"loss": 0.0382, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.309859154929576, |
|
"eval_accuracy": 0.9352112676056338, |
|
"eval_f1": 0.8371681415929203, |
|
"eval_loss": 0.2055191993713379, |
|
"eval_precision": 0.8401420959147424, |
|
"eval_recall": 0.8342151675485009, |
|
"eval_runtime": 4.0365, |
|
"eval_samples_per_second": 70.358, |
|
"eval_steps_per_second": 8.919, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 18.450704225352112, |
|
"grad_norm": 0.10903073102235794, |
|
"learning_rate": 8.555033907146583e-05, |
|
"loss": 0.0276, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 18.591549295774648, |
|
"grad_norm": 0.4288780987262726, |
|
"learning_rate": 8.450704225352113e-05, |
|
"loss": 0.0457, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 18.732394366197184, |
|
"grad_norm": 0.9041640758514404, |
|
"learning_rate": 8.346374543557642e-05, |
|
"loss": 0.0404, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 18.87323943661972, |
|
"grad_norm": 0.3118440806865692, |
|
"learning_rate": 8.242044861763172e-05, |
|
"loss": 0.0404, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 19.014084507042252, |
|
"grad_norm": 0.39209580421447754, |
|
"learning_rate": 8.137715179968701e-05, |
|
"loss": 0.029, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 19.154929577464788, |
|
"grad_norm": 0.6325451731681824, |
|
"learning_rate": 8.033385498174231e-05, |
|
"loss": 0.0299, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 19.295774647887324, |
|
"grad_norm": 0.33151936531066895, |
|
"learning_rate": 7.92905581637976e-05, |
|
"loss": 0.0277, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 19.43661971830986, |
|
"grad_norm": 0.40647652745246887, |
|
"learning_rate": 7.82472613458529e-05, |
|
"loss": 0.0285, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 19.577464788732396, |
|
"grad_norm": 0.3072868585586548, |
|
"learning_rate": 7.720396452790818e-05, |
|
"loss": 0.0314, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 19.718309859154928, |
|
"grad_norm": 0.6823632121086121, |
|
"learning_rate": 7.616066770996348e-05, |
|
"loss": 0.0361, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.718309859154928, |
|
"eval_accuracy": 0.9334507042253521, |
|
"eval_f1": 0.8304932735426009, |
|
"eval_loss": 0.20700237154960632, |
|
"eval_precision": 0.8448905109489051, |
|
"eval_recall": 0.8165784832451499, |
|
"eval_runtime": 4.3433, |
|
"eval_samples_per_second": 65.388, |
|
"eval_steps_per_second": 8.289, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 19.859154929577464, |
|
"grad_norm": 0.4812707006931305, |
|
"learning_rate": 7.511737089201878e-05, |
|
"loss": 0.0515, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.9425677061080933, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.0408, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 20.140845070422536, |
|
"grad_norm": 0.23266366124153137, |
|
"learning_rate": 7.303077725612937e-05, |
|
"loss": 0.033, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 20.281690140845072, |
|
"grad_norm": 0.6943417191505432, |
|
"learning_rate": 7.198748043818467e-05, |
|
"loss": 0.0371, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 20.422535211267604, |
|
"grad_norm": 0.5533664226531982, |
|
"learning_rate": 7.094418362023996e-05, |
|
"loss": 0.0333, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 20.56338028169014, |
|
"grad_norm": 0.3414551913738251, |
|
"learning_rate": 6.990088680229526e-05, |
|
"loss": 0.0344, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 20.704225352112676, |
|
"grad_norm": 0.21747425198554993, |
|
"learning_rate": 6.885758998435054e-05, |
|
"loss": 0.0507, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 20.845070422535212, |
|
"grad_norm": 0.10106892138719559, |
|
"learning_rate": 6.781429316640585e-05, |
|
"loss": 0.0336, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 20.985915492957748, |
|
"grad_norm": 0.11431819200515747, |
|
"learning_rate": 6.677099634846115e-05, |
|
"loss": 0.0349, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 21.12676056338028, |
|
"grad_norm": 0.24515172839164734, |
|
"learning_rate": 6.572769953051644e-05, |
|
"loss": 0.0358, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.12676056338028, |
|
"eval_accuracy": 0.9397887323943662, |
|
"eval_f1": 0.8458070333633905, |
|
"eval_loss": 0.1959269642829895, |
|
"eval_precision": 0.8653136531365314, |
|
"eval_recall": 0.8271604938271605, |
|
"eval_runtime": 4.6741, |
|
"eval_samples_per_second": 60.76, |
|
"eval_steps_per_second": 7.702, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 21.267605633802816, |
|
"grad_norm": 0.1288166046142578, |
|
"learning_rate": 6.468440271257174e-05, |
|
"loss": 0.0319, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 21.408450704225352, |
|
"grad_norm": 0.3846772611141205, |
|
"learning_rate": 6.364110589462703e-05, |
|
"loss": 0.0292, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 21.549295774647888, |
|
"grad_norm": 0.8348280191421509, |
|
"learning_rate": 6.259780907668233e-05, |
|
"loss": 0.0408, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 21.690140845070424, |
|
"grad_norm": 0.24261720478534698, |
|
"learning_rate": 6.155451225873761e-05, |
|
"loss": 0.0279, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 21.830985915492956, |
|
"grad_norm": 0.5973533391952515, |
|
"learning_rate": 6.0511215440792915e-05, |
|
"loss": 0.0363, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 21.971830985915492, |
|
"grad_norm": 0.3994261622428894, |
|
"learning_rate": 5.9467918622848205e-05, |
|
"loss": 0.0294, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 22.112676056338028, |
|
"grad_norm": 0.2752346992492676, |
|
"learning_rate": 5.84246218049035e-05, |
|
"loss": 0.0355, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 22.253521126760564, |
|
"grad_norm": 0.0635056421160698, |
|
"learning_rate": 5.73813249869588e-05, |
|
"loss": 0.0192, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 22.3943661971831, |
|
"grad_norm": 0.4133341908454895, |
|
"learning_rate": 5.633802816901409e-05, |
|
"loss": 0.0412, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 22.535211267605632, |
|
"grad_norm": 0.30646753311157227, |
|
"learning_rate": 5.529473135106938e-05, |
|
"loss": 0.0382, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.535211267605632, |
|
"eval_accuracy": 0.9320422535211268, |
|
"eval_f1": 0.8269058295964126, |
|
"eval_loss": 0.20972707867622375, |
|
"eval_precision": 0.8412408759124088, |
|
"eval_recall": 0.8130511463844797, |
|
"eval_runtime": 4.7317, |
|
"eval_samples_per_second": 60.021, |
|
"eval_steps_per_second": 7.608, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 22.676056338028168, |
|
"grad_norm": 1.3331828117370605, |
|
"learning_rate": 5.425143453312468e-05, |
|
"loss": 0.0331, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 22.816901408450704, |
|
"grad_norm": 0.5091800093650818, |
|
"learning_rate": 5.3208137715179974e-05, |
|
"loss": 0.0187, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 22.95774647887324, |
|
"grad_norm": 0.1486244797706604, |
|
"learning_rate": 5.216484089723527e-05, |
|
"loss": 0.02, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 23.098591549295776, |
|
"grad_norm": 0.07892175763845444, |
|
"learning_rate": 5.112154407929056e-05, |
|
"loss": 0.0254, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 23.239436619718308, |
|
"grad_norm": 0.7007431387901306, |
|
"learning_rate": 5.0078247261345856e-05, |
|
"loss": 0.0179, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 23.380281690140844, |
|
"grad_norm": 0.4025111496448517, |
|
"learning_rate": 4.903495044340115e-05, |
|
"loss": 0.0256, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 23.52112676056338, |
|
"grad_norm": 0.30540022253990173, |
|
"learning_rate": 4.799165362545645e-05, |
|
"loss": 0.0254, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 23.661971830985916, |
|
"grad_norm": 0.45088985562324524, |
|
"learning_rate": 4.694835680751174e-05, |
|
"loss": 0.0326, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 23.802816901408452, |
|
"grad_norm": 0.15087296068668365, |
|
"learning_rate": 4.5905059989567033e-05, |
|
"loss": 0.0229, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 23.943661971830984, |
|
"grad_norm": 0.419587641954422, |
|
"learning_rate": 4.486176317162233e-05, |
|
"loss": 0.0285, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 23.943661971830984, |
|
"eval_accuracy": 0.9415492957746479, |
|
"eval_f1": 0.851520572450805, |
|
"eval_loss": 0.20160087943077087, |
|
"eval_precision": 0.8638838475499092, |
|
"eval_recall": 0.8395061728395061, |
|
"eval_runtime": 4.681, |
|
"eval_samples_per_second": 60.671, |
|
"eval_steps_per_second": 7.691, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 24.08450704225352, |
|
"grad_norm": 0.2833576202392578, |
|
"learning_rate": 4.3818466353677626e-05, |
|
"loss": 0.0136, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 24.225352112676056, |
|
"grad_norm": 0.13959386944770813, |
|
"learning_rate": 4.2775169535732915e-05, |
|
"loss": 0.0195, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 24.366197183098592, |
|
"grad_norm": 0.27296531200408936, |
|
"learning_rate": 4.173187271778821e-05, |
|
"loss": 0.0283, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 24.507042253521128, |
|
"grad_norm": 0.21670201420783997, |
|
"learning_rate": 4.068857589984351e-05, |
|
"loss": 0.0196, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 24.647887323943664, |
|
"grad_norm": 0.5724736452102661, |
|
"learning_rate": 3.96452790818988e-05, |
|
"loss": 0.031, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 24.788732394366196, |
|
"grad_norm": 0.5493142604827881, |
|
"learning_rate": 3.860198226395409e-05, |
|
"loss": 0.0265, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 24.929577464788732, |
|
"grad_norm": 0.21168898046016693, |
|
"learning_rate": 3.755868544600939e-05, |
|
"loss": 0.0224, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 25.070422535211268, |
|
"grad_norm": 0.5312497019767761, |
|
"learning_rate": 3.6515388628064685e-05, |
|
"loss": 0.024, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 25.211267605633804, |
|
"grad_norm": 0.5924656391143799, |
|
"learning_rate": 3.547209181011998e-05, |
|
"loss": 0.0151, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 25.352112676056336, |
|
"grad_norm": 0.05471556633710861, |
|
"learning_rate": 3.442879499217527e-05, |
|
"loss": 0.0141, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 25.352112676056336, |
|
"eval_accuracy": 0.9366197183098591, |
|
"eval_f1": 0.8384201077199281, |
|
"eval_loss": 0.2161274552345276, |
|
"eval_precision": 0.8537477148080439, |
|
"eval_recall": 0.8236331569664903, |
|
"eval_runtime": 4.5245, |
|
"eval_samples_per_second": 62.77, |
|
"eval_steps_per_second": 7.957, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 25.492957746478872, |
|
"grad_norm": 0.2500435709953308, |
|
"learning_rate": 3.338549817423057e-05, |
|
"loss": 0.0177, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 25.633802816901408, |
|
"grad_norm": 0.2633652985095978, |
|
"learning_rate": 3.234220135628587e-05, |
|
"loss": 0.0372, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 25.774647887323944, |
|
"grad_norm": 0.2851497232913971, |
|
"learning_rate": 3.1298904538341165e-05, |
|
"loss": 0.0253, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 25.91549295774648, |
|
"grad_norm": 0.13328520953655243, |
|
"learning_rate": 3.0255607720396458e-05, |
|
"loss": 0.0311, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 26.056338028169016, |
|
"grad_norm": 0.5192949771881104, |
|
"learning_rate": 2.921231090245175e-05, |
|
"loss": 0.0155, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 26.197183098591548, |
|
"grad_norm": 0.16768410801887512, |
|
"learning_rate": 2.8169014084507046e-05, |
|
"loss": 0.0282, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 26.338028169014084, |
|
"grad_norm": 0.045551054179668427, |
|
"learning_rate": 2.712571726656234e-05, |
|
"loss": 0.0194, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 26.47887323943662, |
|
"grad_norm": 0.33393070101737976, |
|
"learning_rate": 2.6082420448617635e-05, |
|
"loss": 0.0139, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 26.619718309859156, |
|
"grad_norm": 0.2400021106004715, |
|
"learning_rate": 2.5039123630672928e-05, |
|
"loss": 0.0111, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 26.760563380281692, |
|
"grad_norm": 0.5022717118263245, |
|
"learning_rate": 2.3995826812728224e-05, |
|
"loss": 0.0179, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 26.760563380281692, |
|
"eval_accuracy": 0.9376760563380282, |
|
"eval_f1": 0.8426666666666667, |
|
"eval_loss": 0.2072737216949463, |
|
"eval_precision": 0.8494623655913979, |
|
"eval_recall": 0.8359788359788359, |
|
"eval_runtime": 4.0574, |
|
"eval_samples_per_second": 69.995, |
|
"eval_steps_per_second": 8.873, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 26.901408450704224, |
|
"grad_norm": 0.41647252440452576, |
|
"learning_rate": 2.2952529994783517e-05, |
|
"loss": 0.0285, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 27.04225352112676, |
|
"grad_norm": 0.3066710829734802, |
|
"learning_rate": 2.1909233176838813e-05, |
|
"loss": 0.0225, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 27.183098591549296, |
|
"grad_norm": 0.09687885642051697, |
|
"learning_rate": 2.0865936358894105e-05, |
|
"loss": 0.0109, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 27.323943661971832, |
|
"grad_norm": 0.25405797362327576, |
|
"learning_rate": 1.98226395409494e-05, |
|
"loss": 0.0204, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 27.464788732394368, |
|
"grad_norm": 0.06933202594518661, |
|
"learning_rate": 1.8779342723004694e-05, |
|
"loss": 0.0274, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 27.6056338028169, |
|
"grad_norm": 0.13852407038211823, |
|
"learning_rate": 1.773604590505999e-05, |
|
"loss": 0.0194, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 27.746478873239436, |
|
"grad_norm": 0.1922004669904709, |
|
"learning_rate": 1.6692749087115286e-05, |
|
"loss": 0.0228, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 27.887323943661972, |
|
"grad_norm": 0.2851165235042572, |
|
"learning_rate": 1.5649452269170582e-05, |
|
"loss": 0.022, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 28.028169014084508, |
|
"grad_norm": 0.30591297149658203, |
|
"learning_rate": 1.4606155451225875e-05, |
|
"loss": 0.0179, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 28.169014084507044, |
|
"grad_norm": 0.9221152067184448, |
|
"learning_rate": 1.356285863328117e-05, |
|
"loss": 0.0263, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.169014084507044, |
|
"eval_accuracy": 0.9390845070422535, |
|
"eval_f1": 0.8456735057983943, |
|
"eval_loss": 0.20973354578018188, |
|
"eval_precision": 0.855595667870036, |
|
"eval_recall": 0.8359788359788359, |
|
"eval_runtime": 3.8982, |
|
"eval_samples_per_second": 72.853, |
|
"eval_steps_per_second": 9.235, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 28.309859154929576, |
|
"grad_norm": 0.2080974280834198, |
|
"learning_rate": 1.2519561815336464e-05, |
|
"loss": 0.0223, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 28.450704225352112, |
|
"grad_norm": 0.265176385641098, |
|
"learning_rate": 1.1476264997391758e-05, |
|
"loss": 0.0156, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 28.591549295774648, |
|
"grad_norm": 0.6490623950958252, |
|
"learning_rate": 1.0432968179447053e-05, |
|
"loss": 0.0143, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 28.732394366197184, |
|
"grad_norm": 0.08013524860143661, |
|
"learning_rate": 9.389671361502347e-06, |
|
"loss": 0.0175, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 28.87323943661972, |
|
"grad_norm": 0.24310840666294098, |
|
"learning_rate": 8.346374543557643e-06, |
|
"loss": 0.0208, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 29.014084507042252, |
|
"grad_norm": 0.21826568245887756, |
|
"learning_rate": 7.303077725612938e-06, |
|
"loss": 0.0209, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 29.154929577464788, |
|
"grad_norm": 0.10844382643699646, |
|
"learning_rate": 6.259780907668232e-06, |
|
"loss": 0.0122, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 29.295774647887324, |
|
"grad_norm": 0.1837497502565384, |
|
"learning_rate": 5.216484089723526e-06, |
|
"loss": 0.0155, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 29.43661971830986, |
|
"grad_norm": 0.8813931941986084, |
|
"learning_rate": 4.173187271778822e-06, |
|
"loss": 0.0229, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 29.577464788732396, |
|
"grad_norm": 0.10835859924554825, |
|
"learning_rate": 3.129890453834116e-06, |
|
"loss": 0.0191, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 29.577464788732396, |
|
"eval_accuracy": 0.9376760563380282, |
|
"eval_f1": 0.8415398388540735, |
|
"eval_loss": 0.21010643243789673, |
|
"eval_precision": 0.8545454545454545, |
|
"eval_recall": 0.8289241622574955, |
|
"eval_runtime": 4.0827, |
|
"eval_samples_per_second": 69.561, |
|
"eval_steps_per_second": 8.818, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 29.718309859154928, |
|
"grad_norm": 0.5981993079185486, |
|
"learning_rate": 2.086593635889411e-06, |
|
"loss": 0.024, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 29.859154929577464, |
|
"grad_norm": 0.029736319556832314, |
|
"learning_rate": 1.0432968179447054e-06, |
|
"loss": 0.0147, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.1090225800871849, |
|
"learning_rate": 0.0, |
|
"loss": 0.0163, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 2130, |
|
"total_flos": 7.774335248148726e+18, |
|
"train_loss": 0.08187752739294595, |
|
"train_runtime": 1287.2767, |
|
"train_samples_per_second": 26.428, |
|
"train_steps_per_second": 1.655 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.774335248148726e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|