nemik's picture
End of training
aef37d4 verified
{
"best_metric": 0.15389865636825562,
"best_model_checkpoint": "mobilevitv2-1.0-imagenet1k-256-finetuned_v2024-10-21-frost/checkpoint-1000",
"epoch": 30.0,
"eval_steps": 100,
"global_step": 1710,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.17543859649122806,
"grad_norm": 0.3124828040599823,
"learning_rate": 1.1695906432748537e-05,
"loss": 0.6955,
"step": 10
},
{
"epoch": 0.3508771929824561,
"grad_norm": 0.24917739629745483,
"learning_rate": 2.3391812865497074e-05,
"loss": 0.6942,
"step": 20
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.2268371284008026,
"learning_rate": 3.508771929824561e-05,
"loss": 0.6939,
"step": 30
},
{
"epoch": 0.7017543859649122,
"grad_norm": 0.2435961812734604,
"learning_rate": 4.678362573099415e-05,
"loss": 0.6918,
"step": 40
},
{
"epoch": 0.8771929824561403,
"grad_norm": 0.24638999998569489,
"learning_rate": 5.847953216374269e-05,
"loss": 0.6889,
"step": 50
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.2426590472459793,
"learning_rate": 7.017543859649122e-05,
"loss": 0.6854,
"step": 60
},
{
"epoch": 1.2280701754385965,
"grad_norm": 0.26534757018089294,
"learning_rate": 8.187134502923976e-05,
"loss": 0.6803,
"step": 70
},
{
"epoch": 1.4035087719298245,
"grad_norm": 0.2573549449443817,
"learning_rate": 9.35672514619883e-05,
"loss": 0.6763,
"step": 80
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.2639031410217285,
"learning_rate": 0.00010526315789473685,
"loss": 0.6701,
"step": 90
},
{
"epoch": 1.7543859649122808,
"grad_norm": 0.26114630699157715,
"learning_rate": 0.00011695906432748539,
"loss": 0.6635,
"step": 100
},
{
"epoch": 1.7543859649122808,
"eval_accuracy": 0.7604444444444445,
"eval_f1": 0.5705179282868525,
"eval_loss": 0.6512863039970398,
"eval_precision": 0.43552311435523117,
"eval_recall": 0.8267898383371824,
"eval_runtime": 2.9095,
"eval_samples_per_second": 77.332,
"eval_steps_per_second": 9.967,
"step": 100
},
{
"epoch": 1.9298245614035088,
"grad_norm": 0.3371104896068573,
"learning_rate": 0.0001286549707602339,
"loss": 0.6502,
"step": 110
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.31244638562202454,
"learning_rate": 0.00014035087719298245,
"loss": 0.6343,
"step": 120
},
{
"epoch": 2.280701754385965,
"grad_norm": 0.47065746784210205,
"learning_rate": 0.00015204678362573098,
"loss": 0.6161,
"step": 130
},
{
"epoch": 2.456140350877193,
"grad_norm": 0.41640815138816833,
"learning_rate": 0.00016374269005847952,
"loss": 0.588,
"step": 140
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.34670090675354004,
"learning_rate": 0.00017543859649122806,
"loss": 0.5565,
"step": 150
},
{
"epoch": 2.807017543859649,
"grad_norm": 0.384328693151474,
"learning_rate": 0.0001871345029239766,
"loss": 0.5242,
"step": 160
},
{
"epoch": 2.982456140350877,
"grad_norm": 0.4133964478969574,
"learning_rate": 0.00019883040935672513,
"loss": 0.5158,
"step": 170
},
{
"epoch": 3.1578947368421053,
"grad_norm": 0.4693595767021179,
"learning_rate": 0.00019883040935672513,
"loss": 0.4658,
"step": 180
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.41811782121658325,
"learning_rate": 0.00019753086419753085,
"loss": 0.4297,
"step": 190
},
{
"epoch": 3.5087719298245617,
"grad_norm": 0.8540976643562317,
"learning_rate": 0.00019623131903833657,
"loss": 0.4461,
"step": 200
},
{
"epoch": 3.5087719298245617,
"eval_accuracy": 0.8768888888888889,
"eval_f1": 0.729227761485826,
"eval_loss": 0.3972250819206238,
"eval_precision": 0.6322033898305085,
"eval_recall": 0.8614318706697459,
"eval_runtime": 1.766,
"eval_samples_per_second": 127.406,
"eval_steps_per_second": 16.421,
"step": 200
},
{
"epoch": 3.6842105263157894,
"grad_norm": 0.8259305357933044,
"learning_rate": 0.0001949317738791423,
"loss": 0.3914,
"step": 210
},
{
"epoch": 3.8596491228070176,
"grad_norm": 0.8546284437179565,
"learning_rate": 0.00019363222871994802,
"loss": 0.384,
"step": 220
},
{
"epoch": 4.035087719298246,
"grad_norm": 0.3827027678489685,
"learning_rate": 0.00019233268356075374,
"loss": 0.3497,
"step": 230
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.6248043775558472,
"learning_rate": 0.00019103313840155946,
"loss": 0.3648,
"step": 240
},
{
"epoch": 4.385964912280702,
"grad_norm": 0.5684685111045837,
"learning_rate": 0.00018973359324236518,
"loss": 0.3112,
"step": 250
},
{
"epoch": 4.56140350877193,
"grad_norm": 0.5080260634422302,
"learning_rate": 0.0001884340480831709,
"loss": 0.3059,
"step": 260
},
{
"epoch": 4.7368421052631575,
"grad_norm": 0.5282370448112488,
"learning_rate": 0.0001871345029239766,
"loss": 0.2922,
"step": 270
},
{
"epoch": 4.912280701754386,
"grad_norm": 0.7253307104110718,
"learning_rate": 0.00018583495776478232,
"loss": 0.2909,
"step": 280
},
{
"epoch": 5.087719298245614,
"grad_norm": 0.7058104276657104,
"learning_rate": 0.00018453541260558804,
"loss": 0.2922,
"step": 290
},
{
"epoch": 5.2631578947368425,
"grad_norm": 1.1993378400802612,
"learning_rate": 0.00018323586744639376,
"loss": 0.2599,
"step": 300
},
{
"epoch": 5.2631578947368425,
"eval_accuracy": 0.9226666666666666,
"eval_f1": 0.804932735426009,
"eval_loss": 0.2404223531484604,
"eval_precision": 0.7821350762527233,
"eval_recall": 0.8290993071593533,
"eval_runtime": 2.7313,
"eval_samples_per_second": 82.378,
"eval_steps_per_second": 10.618,
"step": 300
},
{
"epoch": 5.43859649122807,
"grad_norm": 0.8134835362434387,
"learning_rate": 0.00018193632228719948,
"loss": 0.2645,
"step": 310
},
{
"epoch": 5.614035087719298,
"grad_norm": 0.7742730975151062,
"learning_rate": 0.0001806367771280052,
"loss": 0.2345,
"step": 320
},
{
"epoch": 5.7894736842105265,
"grad_norm": 0.5191880464553833,
"learning_rate": 0.00017933723196881092,
"loss": 0.2504,
"step": 330
},
{
"epoch": 5.964912280701754,
"grad_norm": 0.7682189345359802,
"learning_rate": 0.00017803768680961664,
"loss": 0.2654,
"step": 340
},
{
"epoch": 6.140350877192983,
"grad_norm": 0.7704707384109497,
"learning_rate": 0.00017673814165042236,
"loss": 0.2431,
"step": 350
},
{
"epoch": 6.315789473684211,
"grad_norm": 0.9333469867706299,
"learning_rate": 0.00017543859649122806,
"loss": 0.2382,
"step": 360
},
{
"epoch": 6.491228070175438,
"grad_norm": 0.8412513136863708,
"learning_rate": 0.00017413905133203378,
"loss": 0.2207,
"step": 370
},
{
"epoch": 6.666666666666667,
"grad_norm": 0.7568041086196899,
"learning_rate": 0.0001728395061728395,
"loss": 0.2271,
"step": 380
},
{
"epoch": 6.842105263157895,
"grad_norm": 0.689445436000824,
"learning_rate": 0.00017153996101364522,
"loss": 0.2076,
"step": 390
},
{
"epoch": 7.017543859649122,
"grad_norm": 0.7390238046646118,
"learning_rate": 0.00017024041585445094,
"loss": 0.2074,
"step": 400
},
{
"epoch": 7.017543859649122,
"eval_accuracy": 0.9346666666666666,
"eval_f1": 0.8256227758007118,
"eval_loss": 0.1941838562488556,
"eval_precision": 0.848780487804878,
"eval_recall": 0.8036951501154734,
"eval_runtime": 1.7733,
"eval_samples_per_second": 126.88,
"eval_steps_per_second": 16.353,
"step": 400
},
{
"epoch": 7.192982456140351,
"grad_norm": 0.4645775258541107,
"learning_rate": 0.00016894087069525666,
"loss": 0.2233,
"step": 410
},
{
"epoch": 7.368421052631579,
"grad_norm": 0.6826916337013245,
"learning_rate": 0.00016764132553606238,
"loss": 0.1846,
"step": 420
},
{
"epoch": 7.543859649122807,
"grad_norm": 0.6299170851707458,
"learning_rate": 0.0001663417803768681,
"loss": 0.1807,
"step": 430
},
{
"epoch": 7.719298245614035,
"grad_norm": 0.40688008069992065,
"learning_rate": 0.00016504223521767383,
"loss": 0.1925,
"step": 440
},
{
"epoch": 7.894736842105263,
"grad_norm": 0.8310642242431641,
"learning_rate": 0.00016374269005847952,
"loss": 0.1906,
"step": 450
},
{
"epoch": 8.070175438596491,
"grad_norm": 0.7561126351356506,
"learning_rate": 0.00016244314489928524,
"loss": 0.2537,
"step": 460
},
{
"epoch": 8.24561403508772,
"grad_norm": 1.5505608320236206,
"learning_rate": 0.00016114359974009096,
"loss": 0.2134,
"step": 470
},
{
"epoch": 8.421052631578947,
"grad_norm": 0.5844523310661316,
"learning_rate": 0.00015984405458089668,
"loss": 0.1927,
"step": 480
},
{
"epoch": 8.596491228070175,
"grad_norm": 0.6846328377723694,
"learning_rate": 0.0001585445094217024,
"loss": 0.1843,
"step": 490
},
{
"epoch": 8.771929824561404,
"grad_norm": 0.5246126651763916,
"learning_rate": 0.00015724496426250813,
"loss": 0.167,
"step": 500
},
{
"epoch": 8.771929824561404,
"eval_accuracy": 0.9364444444444444,
"eval_f1": 0.8354430379746836,
"eval_loss": 0.17720411717891693,
"eval_precision": 0.8325688073394495,
"eval_recall": 0.8383371824480369,
"eval_runtime": 2.7456,
"eval_samples_per_second": 81.95,
"eval_steps_per_second": 10.562,
"step": 500
},
{
"epoch": 8.947368421052632,
"grad_norm": 0.9557002782821655,
"learning_rate": 0.00015594541910331385,
"loss": 0.1752,
"step": 510
},
{
"epoch": 9.12280701754386,
"grad_norm": 1.115300178527832,
"learning_rate": 0.00015464587394411957,
"loss": 0.2,
"step": 520
},
{
"epoch": 9.298245614035087,
"grad_norm": 0.6540657877922058,
"learning_rate": 0.00015334632878492526,
"loss": 0.158,
"step": 530
},
{
"epoch": 9.473684210526315,
"grad_norm": 0.8491069078445435,
"learning_rate": 0.00015204678362573098,
"loss": 0.1813,
"step": 540
},
{
"epoch": 9.649122807017545,
"grad_norm": 1.3543705940246582,
"learning_rate": 0.0001507472384665367,
"loss": 0.1951,
"step": 550
},
{
"epoch": 9.824561403508772,
"grad_norm": 0.8627998232841492,
"learning_rate": 0.00014944769330734243,
"loss": 0.1945,
"step": 560
},
{
"epoch": 10.0,
"grad_norm": 1.2822953462600708,
"learning_rate": 0.00014814814814814815,
"loss": 0.1591,
"step": 570
},
{
"epoch": 10.175438596491228,
"grad_norm": 0.6904670596122742,
"learning_rate": 0.00014684860298895387,
"loss": 0.1545,
"step": 580
},
{
"epoch": 10.350877192982455,
"grad_norm": 1.3155221939086914,
"learning_rate": 0.0001455490578297596,
"loss": 0.1385,
"step": 590
},
{
"epoch": 10.526315789473685,
"grad_norm": 0.8683547973632812,
"learning_rate": 0.0001442495126705653,
"loss": 0.1661,
"step": 600
},
{
"epoch": 10.526315789473685,
"eval_accuracy": 0.9342222222222222,
"eval_f1": 0.8258823529411765,
"eval_loss": 0.16532927751541138,
"eval_precision": 0.841726618705036,
"eval_recall": 0.8106235565819861,
"eval_runtime": 1.7784,
"eval_samples_per_second": 126.515,
"eval_steps_per_second": 16.306,
"step": 600
},
{
"epoch": 10.701754385964913,
"grad_norm": 0.7406933307647705,
"learning_rate": 0.00014294996751137103,
"loss": 0.1569,
"step": 610
},
{
"epoch": 10.87719298245614,
"grad_norm": 1.5100739002227783,
"learning_rate": 0.00014165042235217672,
"loss": 0.1873,
"step": 620
},
{
"epoch": 11.052631578947368,
"grad_norm": 0.8658424019813538,
"learning_rate": 0.00014035087719298245,
"loss": 0.1771,
"step": 630
},
{
"epoch": 11.228070175438596,
"grad_norm": 0.761426568031311,
"learning_rate": 0.00013905133203378817,
"loss": 0.1522,
"step": 640
},
{
"epoch": 11.403508771929825,
"grad_norm": 0.6994770765304565,
"learning_rate": 0.0001377517868745939,
"loss": 0.1462,
"step": 650
},
{
"epoch": 11.578947368421053,
"grad_norm": 0.6044259071350098,
"learning_rate": 0.0001364522417153996,
"loss": 0.1688,
"step": 660
},
{
"epoch": 11.75438596491228,
"grad_norm": 0.6377450227737427,
"learning_rate": 0.00013515269655620533,
"loss": 0.1726,
"step": 670
},
{
"epoch": 11.929824561403509,
"grad_norm": 0.45792627334594727,
"learning_rate": 0.00013385315139701105,
"loss": 0.1578,
"step": 680
},
{
"epoch": 12.105263157894736,
"grad_norm": 0.5658883452415466,
"learning_rate": 0.00013255360623781677,
"loss": 0.1528,
"step": 690
},
{
"epoch": 12.280701754385966,
"grad_norm": 0.568031370639801,
"learning_rate": 0.0001312540610786225,
"loss": 0.1603,
"step": 700
},
{
"epoch": 12.280701754385966,
"eval_accuracy": 0.9408888888888889,
"eval_f1": 0.8473019517795637,
"eval_loss": 0.16492225229740143,
"eval_precision": 0.8424657534246576,
"eval_recall": 0.8521939953810623,
"eval_runtime": 2.4488,
"eval_samples_per_second": 91.883,
"eval_steps_per_second": 11.843,
"step": 700
},
{
"epoch": 12.456140350877194,
"grad_norm": 0.8529219031333923,
"learning_rate": 0.0001299545159194282,
"loss": 0.1438,
"step": 710
},
{
"epoch": 12.631578947368421,
"grad_norm": 0.7960824370384216,
"learning_rate": 0.0001286549707602339,
"loss": 0.1245,
"step": 720
},
{
"epoch": 12.807017543859649,
"grad_norm": 0.8270284533500671,
"learning_rate": 0.00012748538011695908,
"loss": 0.1775,
"step": 730
},
{
"epoch": 12.982456140350877,
"grad_norm": 0.407463014125824,
"learning_rate": 0.0001261858349577648,
"loss": 0.1583,
"step": 740
},
{
"epoch": 13.157894736842104,
"grad_norm": 1.2405822277069092,
"learning_rate": 0.0001248862897985705,
"loss": 0.1412,
"step": 750
},
{
"epoch": 13.333333333333334,
"grad_norm": 0.7762990593910217,
"learning_rate": 0.00012358674463937622,
"loss": 0.137,
"step": 760
},
{
"epoch": 13.508771929824562,
"grad_norm": 0.7772154808044434,
"learning_rate": 0.00012228719948018194,
"loss": 0.1618,
"step": 770
},
{
"epoch": 13.68421052631579,
"grad_norm": 0.3346017599105835,
"learning_rate": 0.00012098765432098766,
"loss": 0.1276,
"step": 780
},
{
"epoch": 13.859649122807017,
"grad_norm": 0.7661828994750977,
"learning_rate": 0.00011968810916179338,
"loss": 0.1606,
"step": 790
},
{
"epoch": 14.035087719298245,
"grad_norm": 1.2454911470413208,
"learning_rate": 0.0001183885640025991,
"loss": 0.1523,
"step": 800
},
{
"epoch": 14.035087719298245,
"eval_accuracy": 0.9466666666666667,
"eval_f1": 0.8591549295774648,
"eval_loss": 0.15682315826416016,
"eval_precision": 0.8735083532219571,
"eval_recall": 0.8452655889145496,
"eval_runtime": 1.8011,
"eval_samples_per_second": 124.926,
"eval_steps_per_second": 16.102,
"step": 800
},
{
"epoch": 14.210526315789474,
"grad_norm": 3.0044612884521484,
"learning_rate": 0.00011708901884340481,
"loss": 0.1331,
"step": 810
},
{
"epoch": 14.385964912280702,
"grad_norm": 0.7117482423782349,
"learning_rate": 0.00011578947368421053,
"loss": 0.1619,
"step": 820
},
{
"epoch": 14.56140350877193,
"grad_norm": 0.6939218044281006,
"learning_rate": 0.00011448992852501626,
"loss": 0.1531,
"step": 830
},
{
"epoch": 14.736842105263158,
"grad_norm": 0.5622960329055786,
"learning_rate": 0.00011319038336582198,
"loss": 0.131,
"step": 840
},
{
"epoch": 14.912280701754385,
"grad_norm": 0.9399430155754089,
"learning_rate": 0.0001118908382066277,
"loss": 0.1276,
"step": 850
},
{
"epoch": 15.087719298245615,
"grad_norm": 1.6480320692062378,
"learning_rate": 0.0001105912930474334,
"loss": 0.1656,
"step": 860
},
{
"epoch": 15.263157894736842,
"grad_norm": 0.7238647937774658,
"learning_rate": 0.00010929174788823913,
"loss": 0.1261,
"step": 870
},
{
"epoch": 15.43859649122807,
"grad_norm": 1.0423846244812012,
"learning_rate": 0.00010799220272904485,
"loss": 0.1328,
"step": 880
},
{
"epoch": 15.614035087719298,
"grad_norm": 1.1374431848526,
"learning_rate": 0.00010669265756985057,
"loss": 0.1427,
"step": 890
},
{
"epoch": 15.789473684210526,
"grad_norm": 0.7375030517578125,
"learning_rate": 0.00010539311241065628,
"loss": 0.1506,
"step": 900
},
{
"epoch": 15.789473684210526,
"eval_accuracy": 0.9431111111111111,
"eval_f1": 0.8494117647058823,
"eval_loss": 0.15481138229370117,
"eval_precision": 0.8657074340527577,
"eval_recall": 0.8337182448036952,
"eval_runtime": 1.8243,
"eval_samples_per_second": 123.334,
"eval_steps_per_second": 15.896,
"step": 900
},
{
"epoch": 15.964912280701755,
"grad_norm": 0.7035567164421082,
"learning_rate": 0.000104093567251462,
"loss": 0.1324,
"step": 910
},
{
"epoch": 16.140350877192983,
"grad_norm": 0.6969211101531982,
"learning_rate": 0.00010279402209226772,
"loss": 0.1257,
"step": 920
},
{
"epoch": 16.31578947368421,
"grad_norm": 0.3633826673030853,
"learning_rate": 0.00010149447693307344,
"loss": 0.1306,
"step": 930
},
{
"epoch": 16.49122807017544,
"grad_norm": 0.8118802309036255,
"learning_rate": 0.00010019493177387915,
"loss": 0.1091,
"step": 940
},
{
"epoch": 16.666666666666668,
"grad_norm": 0.6684471964836121,
"learning_rate": 9.889538661468485e-05,
"loss": 0.1323,
"step": 950
},
{
"epoch": 16.842105263157894,
"grad_norm": 0.6080668568611145,
"learning_rate": 9.759584145549058e-05,
"loss": 0.1168,
"step": 960
},
{
"epoch": 17.017543859649123,
"grad_norm": 0.7799493670463562,
"learning_rate": 9.62962962962963e-05,
"loss": 0.141,
"step": 970
},
{
"epoch": 17.19298245614035,
"grad_norm": 0.5670738816261292,
"learning_rate": 9.499675113710202e-05,
"loss": 0.1244,
"step": 980
},
{
"epoch": 17.36842105263158,
"grad_norm": 0.9652756452560425,
"learning_rate": 9.369720597790773e-05,
"loss": 0.1354,
"step": 990
},
{
"epoch": 17.54385964912281,
"grad_norm": 0.8537412881851196,
"learning_rate": 9.239766081871345e-05,
"loss": 0.1485,
"step": 1000
},
{
"epoch": 17.54385964912281,
"eval_accuracy": 0.9444444444444444,
"eval_f1": 0.8544819557625145,
"eval_loss": 0.15389865636825562,
"eval_precision": 0.8615023474178404,
"eval_recall": 0.8475750577367206,
"eval_runtime": 1.7887,
"eval_samples_per_second": 125.789,
"eval_steps_per_second": 16.213,
"step": 1000
},
{
"epoch": 17.719298245614034,
"grad_norm": 0.9258742928504944,
"learning_rate": 9.109811565951917e-05,
"loss": 0.1284,
"step": 1010
},
{
"epoch": 17.894736842105264,
"grad_norm": 0.6817509531974792,
"learning_rate": 8.979857050032489e-05,
"loss": 0.1226,
"step": 1020
},
{
"epoch": 18.07017543859649,
"grad_norm": 0.8437041640281677,
"learning_rate": 8.849902534113061e-05,
"loss": 0.1527,
"step": 1030
},
{
"epoch": 18.24561403508772,
"grad_norm": 1.2362749576568604,
"learning_rate": 8.719948018193632e-05,
"loss": 0.1224,
"step": 1040
},
{
"epoch": 18.42105263157895,
"grad_norm": 0.4136218726634979,
"learning_rate": 8.589993502274204e-05,
"loss": 0.1293,
"step": 1050
},
{
"epoch": 18.596491228070175,
"grad_norm": 0.8913040161132812,
"learning_rate": 8.460038986354776e-05,
"loss": 0.1305,
"step": 1060
},
{
"epoch": 18.771929824561404,
"grad_norm": 1.0768448114395142,
"learning_rate": 8.330084470435348e-05,
"loss": 0.1134,
"step": 1070
},
{
"epoch": 18.94736842105263,
"grad_norm": 0.9289010763168335,
"learning_rate": 8.200129954515919e-05,
"loss": 0.1551,
"step": 1080
},
{
"epoch": 19.12280701754386,
"grad_norm": 0.4481465220451355,
"learning_rate": 8.070175438596491e-05,
"loss": 0.1263,
"step": 1090
},
{
"epoch": 19.29824561403509,
"grad_norm": 0.7408900260925293,
"learning_rate": 7.940220922677063e-05,
"loss": 0.1263,
"step": 1100
},
{
"epoch": 19.29824561403509,
"eval_accuracy": 0.944,
"eval_f1": 0.8534883720930233,
"eval_loss": 0.15210777521133423,
"eval_precision": 0.8594847775175644,
"eval_recall": 0.8475750577367206,
"eval_runtime": 1.7885,
"eval_samples_per_second": 125.802,
"eval_steps_per_second": 16.214,
"step": 1100
},
{
"epoch": 19.473684210526315,
"grad_norm": 0.8939012289047241,
"learning_rate": 7.810266406757635e-05,
"loss": 0.1206,
"step": 1110
},
{
"epoch": 19.649122807017545,
"grad_norm": 0.6809560656547546,
"learning_rate": 7.680311890838207e-05,
"loss": 0.1225,
"step": 1120
},
{
"epoch": 19.82456140350877,
"grad_norm": 1.1481623649597168,
"learning_rate": 7.550357374918778e-05,
"loss": 0.1291,
"step": 1130
},
{
"epoch": 20.0,
"grad_norm": 2.0011980533599854,
"learning_rate": 7.42040285899935e-05,
"loss": 0.1482,
"step": 1140
},
{
"epoch": 20.17543859649123,
"grad_norm": 0.6619019508361816,
"learning_rate": 7.290448343079922e-05,
"loss": 0.1123,
"step": 1150
},
{
"epoch": 20.350877192982455,
"grad_norm": 0.796700656414032,
"learning_rate": 7.160493827160494e-05,
"loss": 0.1166,
"step": 1160
},
{
"epoch": 20.526315789473685,
"grad_norm": 0.9634900689125061,
"learning_rate": 7.030539311241065e-05,
"loss": 0.1263,
"step": 1170
},
{
"epoch": 20.70175438596491,
"grad_norm": 0.505535900592804,
"learning_rate": 6.900584795321637e-05,
"loss": 0.1117,
"step": 1180
},
{
"epoch": 20.87719298245614,
"grad_norm": 0.5166471600532532,
"learning_rate": 6.770630279402209e-05,
"loss": 0.1279,
"step": 1190
},
{
"epoch": 21.05263157894737,
"grad_norm": 1.2773476839065552,
"learning_rate": 6.640675763482781e-05,
"loss": 0.1444,
"step": 1200
},
{
"epoch": 21.05263157894737,
"eval_accuracy": 0.9417777777777778,
"eval_f1": 0.8471411901983664,
"eval_loss": 0.155166357755661,
"eval_precision": 0.8561320754716981,
"eval_recall": 0.8383371824480369,
"eval_runtime": 2.37,
"eval_samples_per_second": 94.937,
"eval_steps_per_second": 12.236,
"step": 1200
},
{
"epoch": 21.228070175438596,
"grad_norm": 0.793021559715271,
"learning_rate": 6.510721247563352e-05,
"loss": 0.1168,
"step": 1210
},
{
"epoch": 21.403508771929825,
"grad_norm": 1.2551689147949219,
"learning_rate": 6.380766731643924e-05,
"loss": 0.1089,
"step": 1220
},
{
"epoch": 21.57894736842105,
"grad_norm": 0.6803563237190247,
"learning_rate": 6.250812215724496e-05,
"loss": 0.1186,
"step": 1230
},
{
"epoch": 21.75438596491228,
"grad_norm": 1.2632770538330078,
"learning_rate": 6.120857699805068e-05,
"loss": 0.1116,
"step": 1240
},
{
"epoch": 21.92982456140351,
"grad_norm": 0.525141716003418,
"learning_rate": 5.99090318388564e-05,
"loss": 0.0979,
"step": 1250
},
{
"epoch": 22.105263157894736,
"grad_norm": 0.5942980647087097,
"learning_rate": 5.860948667966212e-05,
"loss": 0.1483,
"step": 1260
},
{
"epoch": 22.280701754385966,
"grad_norm": 1.0624207258224487,
"learning_rate": 5.7309941520467835e-05,
"loss": 0.1155,
"step": 1270
},
{
"epoch": 22.45614035087719,
"grad_norm": 0.6244792938232422,
"learning_rate": 5.6010396361273556e-05,
"loss": 0.1159,
"step": 1280
},
{
"epoch": 22.63157894736842,
"grad_norm": 1.9767743349075317,
"learning_rate": 5.471085120207927e-05,
"loss": 0.1165,
"step": 1290
},
{
"epoch": 22.80701754385965,
"grad_norm": 2.270113468170166,
"learning_rate": 5.341130604288499e-05,
"loss": 0.1133,
"step": 1300
},
{
"epoch": 22.80701754385965,
"eval_accuracy": 0.9448888888888889,
"eval_f1": 0.8561484918793504,
"eval_loss": 0.1531468778848648,
"eval_precision": 0.8601398601398601,
"eval_recall": 0.8521939953810623,
"eval_runtime": 4.5112,
"eval_samples_per_second": 49.875,
"eval_steps_per_second": 6.428,
"step": 1300
},
{
"epoch": 22.982456140350877,
"grad_norm": 2.3252851963043213,
"learning_rate": 5.2111760883690706e-05,
"loss": 0.1018,
"step": 1310
},
{
"epoch": 23.157894736842106,
"grad_norm": 1.3282454013824463,
"learning_rate": 5.081221572449643e-05,
"loss": 0.1194,
"step": 1320
},
{
"epoch": 23.333333333333332,
"grad_norm": 0.652642548084259,
"learning_rate": 4.951267056530214e-05,
"loss": 0.1016,
"step": 1330
},
{
"epoch": 23.50877192982456,
"grad_norm": 1.584074854850769,
"learning_rate": 4.821312540610786e-05,
"loss": 0.1109,
"step": 1340
},
{
"epoch": 23.68421052631579,
"grad_norm": 0.5799722075462341,
"learning_rate": 4.691358024691358e-05,
"loss": 0.0901,
"step": 1350
},
{
"epoch": 23.859649122807017,
"grad_norm": 1.9589979648590088,
"learning_rate": 4.56140350877193e-05,
"loss": 0.1195,
"step": 1360
},
{
"epoch": 24.035087719298247,
"grad_norm": 0.784710705280304,
"learning_rate": 4.431448992852502e-05,
"loss": 0.1318,
"step": 1370
},
{
"epoch": 24.210526315789473,
"grad_norm": 1.0715792179107666,
"learning_rate": 4.301494476933073e-05,
"loss": 0.1236,
"step": 1380
},
{
"epoch": 24.385964912280702,
"grad_norm": 0.8761755228042603,
"learning_rate": 4.1715399610136454e-05,
"loss": 0.1076,
"step": 1390
},
{
"epoch": 24.56140350877193,
"grad_norm": 0.8874859809875488,
"learning_rate": 4.041585445094217e-05,
"loss": 0.1019,
"step": 1400
},
{
"epoch": 24.56140350877193,
"eval_accuracy": 0.9431111111111111,
"eval_f1": 0.8490566037735849,
"eval_loss": 0.15768744051456451,
"eval_precision": 0.8674698795180723,
"eval_recall": 0.8314087759815243,
"eval_runtime": 1.817,
"eval_samples_per_second": 123.828,
"eval_steps_per_second": 15.96,
"step": 1400
},
{
"epoch": 24.736842105263158,
"grad_norm": 0.569615364074707,
"learning_rate": 3.911630929174789e-05,
"loss": 0.1114,
"step": 1410
},
{
"epoch": 24.912280701754387,
"grad_norm": 0.4636388123035431,
"learning_rate": 3.7816764132553604e-05,
"loss": 0.1016,
"step": 1420
},
{
"epoch": 25.087719298245613,
"grad_norm": 0.7966068983078003,
"learning_rate": 3.6517218973359325e-05,
"loss": 0.1181,
"step": 1430
},
{
"epoch": 25.263157894736842,
"grad_norm": 0.7331326603889465,
"learning_rate": 3.521767381416504e-05,
"loss": 0.1037,
"step": 1440
},
{
"epoch": 25.43859649122807,
"grad_norm": 1.1376439332962036,
"learning_rate": 3.391812865497076e-05,
"loss": 0.091,
"step": 1450
},
{
"epoch": 25.614035087719298,
"grad_norm": 0.43491020798683167,
"learning_rate": 3.2618583495776475e-05,
"loss": 0.102,
"step": 1460
},
{
"epoch": 25.789473684210527,
"grad_norm": 0.9410120844841003,
"learning_rate": 3.1319038336582196e-05,
"loss": 0.1108,
"step": 1470
},
{
"epoch": 25.964912280701753,
"grad_norm": 0.9321810603141785,
"learning_rate": 3.0019493177387914e-05,
"loss": 0.1059,
"step": 1480
},
{
"epoch": 26.140350877192983,
"grad_norm": 0.5571371912956238,
"learning_rate": 2.871994801819363e-05,
"loss": 0.0926,
"step": 1490
},
{
"epoch": 26.31578947368421,
"grad_norm": 1.9081007242202759,
"learning_rate": 2.742040285899935e-05,
"loss": 0.1141,
"step": 1500
},
{
"epoch": 26.31578947368421,
"eval_accuracy": 0.9413333333333334,
"eval_f1": 0.8472222222222222,
"eval_loss": 0.15601032972335815,
"eval_precision": 0.8491879350348028,
"eval_recall": 0.8452655889145496,
"eval_runtime": 1.867,
"eval_samples_per_second": 120.511,
"eval_steps_per_second": 15.533,
"step": 1500
},
{
"epoch": 26.49122807017544,
"grad_norm": 0.8356673121452332,
"learning_rate": 2.6120857699805067e-05,
"loss": 0.1077,
"step": 1510
},
{
"epoch": 26.666666666666668,
"grad_norm": 1.3644295930862427,
"learning_rate": 2.4821312540610784e-05,
"loss": 0.1212,
"step": 1520
},
{
"epoch": 26.842105263157894,
"grad_norm": 0.779222309589386,
"learning_rate": 2.3521767381416506e-05,
"loss": 0.1229,
"step": 1530
},
{
"epoch": 27.017543859649123,
"grad_norm": 0.5873481631278992,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.0998,
"step": 1540
},
{
"epoch": 27.19298245614035,
"grad_norm": 0.9948704242706299,
"learning_rate": 2.092267706302794e-05,
"loss": 0.1435,
"step": 1550
},
{
"epoch": 27.36842105263158,
"grad_norm": 0.32820120453834534,
"learning_rate": 1.962313190383366e-05,
"loss": 0.0992,
"step": 1560
},
{
"epoch": 27.54385964912281,
"grad_norm": 1.0797744989395142,
"learning_rate": 1.8323586744639376e-05,
"loss": 0.1095,
"step": 1570
},
{
"epoch": 27.719298245614034,
"grad_norm": 1.5036197900772095,
"learning_rate": 1.7024041585445094e-05,
"loss": 0.119,
"step": 1580
},
{
"epoch": 27.894736842105264,
"grad_norm": 1.0871007442474365,
"learning_rate": 1.5724496426250812e-05,
"loss": 0.0974,
"step": 1590
},
{
"epoch": 28.07017543859649,
"grad_norm": 0.6861986517906189,
"learning_rate": 1.442495126705653e-05,
"loss": 0.1087,
"step": 1600
},
{
"epoch": 28.07017543859649,
"eval_accuracy": 0.9422222222222222,
"eval_f1": 0.8491879350348028,
"eval_loss": 0.15734025835990906,
"eval_precision": 0.8531468531468531,
"eval_recall": 0.8452655889145496,
"eval_runtime": 3.5904,
"eval_samples_per_second": 62.668,
"eval_steps_per_second": 8.077,
"step": 1600
},
{
"epoch": 28.24561403508772,
"grad_norm": 1.5399742126464844,
"learning_rate": 1.3125406107862247e-05,
"loss": 0.1243,
"step": 1610
},
{
"epoch": 28.42105263157895,
"grad_norm": 0.7721771001815796,
"learning_rate": 1.1825860948667967e-05,
"loss": 0.0965,
"step": 1620
},
{
"epoch": 28.596491228070175,
"grad_norm": 1.040131688117981,
"learning_rate": 1.0526315789473684e-05,
"loss": 0.1133,
"step": 1630
},
{
"epoch": 28.771929824561404,
"grad_norm": 0.9755656123161316,
"learning_rate": 9.226770630279402e-06,
"loss": 0.0885,
"step": 1640
},
{
"epoch": 28.94736842105263,
"grad_norm": 0.5838367342948914,
"learning_rate": 7.92722547108512e-06,
"loss": 0.1134,
"step": 1650
},
{
"epoch": 29.12280701754386,
"grad_norm": 1.698116421699524,
"learning_rate": 6.6276803118908384e-06,
"loss": 0.1278,
"step": 1660
},
{
"epoch": 29.29824561403509,
"grad_norm": 0.581572413444519,
"learning_rate": 5.328135152696556e-06,
"loss": 0.1209,
"step": 1670
},
{
"epoch": 29.473684210526315,
"grad_norm": 0.4100797772407532,
"learning_rate": 4.028589993502274e-06,
"loss": 0.1108,
"step": 1680
},
{
"epoch": 29.649122807017545,
"grad_norm": 1.5013538599014282,
"learning_rate": 2.729044834307992e-06,
"loss": 0.1195,
"step": 1690
},
{
"epoch": 29.82456140350877,
"grad_norm": 1.0121512413024902,
"learning_rate": 1.4294996751137102e-06,
"loss": 0.1015,
"step": 1700
},
{
"epoch": 29.82456140350877,
"eval_accuracy": 0.9422222222222222,
"eval_f1": 0.8488372093023255,
"eval_loss": 0.15452326834201813,
"eval_precision": 0.8548009367681498,
"eval_recall": 0.8429561200923787,
"eval_runtime": 1.8193,
"eval_samples_per_second": 123.672,
"eval_steps_per_second": 15.94,
"step": 1700
},
{
"epoch": 30.0,
"grad_norm": 2.770343780517578,
"learning_rate": 1.299545159194282e-07,
"loss": 0.1342,
"step": 1710
},
{
"epoch": 30.0,
"step": 1710,
"total_flos": 1.77124415883264e+17,
"train_loss": 0.20865077226482637,
"train_runtime": 373.9101,
"train_samples_per_second": 72.21,
"train_steps_per_second": 4.573
}
],
"logging_steps": 10,
"max_steps": 1710,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.77124415883264e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}