{ "best_metric": 1.064771056175232, "best_model_checkpoint": "sbert-ru-huawei-sentiment-fine-up/checkpoint-4868", "epoch": 2.0, "eval_steps": 500, "global_step": 4868, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010271158586688579, "grad_norm": 31.50909423828125, "learning_rate": 5.476951163852123e-07, "loss": 5.7521, "step": 25 }, { "epoch": 0.020542317173377157, "grad_norm": 65.67906951904297, "learning_rate": 1.1182108626198083e-06, "loss": 5.3431, "step": 50 }, { "epoch": 0.030813475760065736, "grad_norm": 47.638511657714844, "learning_rate": 1.6659059790050208e-06, "loss": 3.9751, "step": 75 }, { "epoch": 0.041084634346754315, "grad_norm": 23.86779022216797, "learning_rate": 2.2364217252396165e-06, "loss": 1.9721, "step": 100 }, { "epoch": 0.05135579293344289, "grad_norm": 17.054542541503906, "learning_rate": 2.806937471474213e-06, "loss": 1.3281, "step": 125 }, { "epoch": 0.06162695152013147, "grad_norm": 10.087335586547852, "learning_rate": 3.3774532177088093e-06, "loss": 1.236, "step": 150 }, { "epoch": 0.07189811010682005, "grad_norm": 22.066057205200195, "learning_rate": 3.9479689639434044e-06, "loss": 1.0576, "step": 175 }, { "epoch": 0.08216926869350863, "grad_norm": 16.504131317138672, "learning_rate": 4.518484710178001e-06, "loss": 0.9854, "step": 200 }, { "epoch": 0.0924404272801972, "grad_norm": 16.981975555419922, "learning_rate": 5.089000456412597e-06, "loss": 0.9098, "step": 225 }, { "epoch": 0.10271158586688578, "grad_norm": 16.270986557006836, "learning_rate": 5.659516202647193e-06, "loss": 0.9133, "step": 250 }, { "epoch": 0.11298274445357437, "grad_norm": 13.069902420043945, "learning_rate": 6.230031948881789e-06, "loss": 0.8994, "step": 275 }, { "epoch": 0.12325390304026294, "grad_norm": 12.201191902160645, "learning_rate": 6.800547695116385e-06, "loss": 0.8731, "step": 300 }, { "epoch": 0.13352506162695152, "grad_norm": 7.924577236175537, "learning_rate": 7.371063441350981e-06, "loss": 0.9483, "step": 325 }, { "epoch": 0.1437962202136401, "grad_norm": 16.856870651245117, "learning_rate": 7.941579187585578e-06, "loss": 0.7615, "step": 350 }, { "epoch": 0.15406737880032867, "grad_norm": 17.550830841064453, "learning_rate": 8.512094933820173e-06, "loss": 0.8699, "step": 375 }, { "epoch": 0.16433853738701726, "grad_norm": 8.01524543762207, "learning_rate": 9.08261068005477e-06, "loss": 0.9946, "step": 400 }, { "epoch": 0.17460969597370585, "grad_norm": 12.508491516113281, "learning_rate": 9.653126426289367e-06, "loss": 0.8598, "step": 425 }, { "epoch": 0.1848808545603944, "grad_norm": 11.055355072021484, "learning_rate": 1.0223642172523962e-05, "loss": 0.8902, "step": 450 }, { "epoch": 0.195152013147083, "grad_norm": 12.723591804504395, "learning_rate": 1.0794157918758559e-05, "loss": 0.9183, "step": 475 }, { "epoch": 0.20542317173377156, "grad_norm": 29.50878143310547, "learning_rate": 1.1364673664993154e-05, "loss": 0.9221, "step": 500 }, { "epoch": 0.21569433032046015, "grad_norm": 15.626035690307617, "learning_rate": 1.193518941122775e-05, "loss": 0.8211, "step": 525 }, { "epoch": 0.22596548890714874, "grad_norm": 10.57879638671875, "learning_rate": 1.2505705157462347e-05, "loss": 0.9007, "step": 550 }, { "epoch": 0.2362366474938373, "grad_norm": 10.496281623840332, "learning_rate": 1.3076220903696942e-05, "loss": 0.8799, "step": 575 }, { "epoch": 0.2465078060805259, "grad_norm": 12.28962516784668, "learning_rate": 1.3646736649931538e-05, "loss": 0.8934, "step": 600 }, { "epoch": 0.25677896466721445, "grad_norm": 10.393588066101074, "learning_rate": 1.4217252396166134e-05, "loss": 0.8276, "step": 625 }, { "epoch": 0.26705012325390304, "grad_norm": 11.521328926086426, "learning_rate": 1.4787768142400731e-05, "loss": 0.8975, "step": 650 }, { "epoch": 0.2773212818405916, "grad_norm": 10.707319259643555, "learning_rate": 1.5358283888635328e-05, "loss": 0.8862, "step": 675 }, { "epoch": 0.2875924404272802, "grad_norm": 11.027580261230469, "learning_rate": 1.5928799634869923e-05, "loss": 0.839, "step": 700 }, { "epoch": 0.29786359901396875, "grad_norm": 8.072233200073242, "learning_rate": 1.6499315381104518e-05, "loss": 0.8836, "step": 725 }, { "epoch": 0.30813475760065734, "grad_norm": 5.254186153411865, "learning_rate": 1.7069831127339113e-05, "loss": 0.8102, "step": 750 }, { "epoch": 0.3184059161873459, "grad_norm": 9.644079208374023, "learning_rate": 1.7640346873573712e-05, "loss": 0.8363, "step": 775 }, { "epoch": 0.3286770747740345, "grad_norm": 17.169221878051758, "learning_rate": 1.8210862619808307e-05, "loss": 0.7916, "step": 800 }, { "epoch": 0.3389482333607231, "grad_norm": 12.09295654296875, "learning_rate": 1.8781378366042902e-05, "loss": 0.8846, "step": 825 }, { "epoch": 0.3492193919474117, "grad_norm": 20.629968643188477, "learning_rate": 1.93518941122775e-05, "loss": 0.9433, "step": 850 }, { "epoch": 0.35949055053410023, "grad_norm": 12.02403736114502, "learning_rate": 1.9922409858512096e-05, "loss": 0.7863, "step": 875 }, { "epoch": 0.3697617091207888, "grad_norm": 7.93563985824585, "learning_rate": 2.0492925604746694e-05, "loss": 0.8346, "step": 900 }, { "epoch": 0.3800328677074774, "grad_norm": 10.790715217590332, "learning_rate": 2.106344135098129e-05, "loss": 0.8584, "step": 925 }, { "epoch": 0.390304026294166, "grad_norm": 13.152490615844727, "learning_rate": 2.1633957097215885e-05, "loss": 0.8276, "step": 950 }, { "epoch": 0.4005751848808546, "grad_norm": 8.368447303771973, "learning_rate": 2.220447284345048e-05, "loss": 0.9162, "step": 975 }, { "epoch": 0.4108463434675431, "grad_norm": 5.231266021728516, "learning_rate": 2.2774988589685075e-05, "loss": 0.8999, "step": 1000 }, { "epoch": 0.4211175020542317, "grad_norm": 4.503598690032959, "learning_rate": 2.334550433591967e-05, "loss": 0.8748, "step": 1025 }, { "epoch": 0.4313886606409203, "grad_norm": 5.98704719543457, "learning_rate": 2.391602008215427e-05, "loss": 0.9061, "step": 1050 }, { "epoch": 0.4416598192276089, "grad_norm": 9.622344970703125, "learning_rate": 2.4486535828388864e-05, "loss": 0.9336, "step": 1075 }, { "epoch": 0.4519309778142975, "grad_norm": 7.320569038391113, "learning_rate": 2.5057051574623462e-05, "loss": 0.8711, "step": 1100 }, { "epoch": 0.462202136400986, "grad_norm": 6.236270427703857, "learning_rate": 2.562756732085806e-05, "loss": 0.8359, "step": 1125 }, { "epoch": 0.4724732949876746, "grad_norm": 9.492266654968262, "learning_rate": 2.6198083067092656e-05, "loss": 0.8248, "step": 1150 }, { "epoch": 0.4827444535743632, "grad_norm": 9.270378112792969, "learning_rate": 2.676859881332725e-05, "loss": 0.8539, "step": 1175 }, { "epoch": 0.4930156121610518, "grad_norm": 8.744667053222656, "learning_rate": 2.7316293929712462e-05, "loss": 0.9035, "step": 1200 }, { "epoch": 0.5032867707477403, "grad_norm": 7.469476222991943, "learning_rate": 2.7886809675947057e-05, "loss": 0.8951, "step": 1225 }, { "epoch": 0.5135579293344289, "grad_norm": 6.202262878417969, "learning_rate": 2.8457325422181653e-05, "loss": 0.9175, "step": 1250 }, { "epoch": 0.5238290879211175, "grad_norm": 6.080565452575684, "learning_rate": 2.9027841168416248e-05, "loss": 0.8948, "step": 1275 }, { "epoch": 0.5341002465078061, "grad_norm": 10.15191650390625, "learning_rate": 2.9598356914650843e-05, "loss": 0.8631, "step": 1300 }, { "epoch": 0.5443714050944947, "grad_norm": 13.219573020935059, "learning_rate": 3.0168872660885445e-05, "loss": 0.9074, "step": 1325 }, { "epoch": 0.5546425636811833, "grad_norm": 7.929615497589111, "learning_rate": 3.073938840712004e-05, "loss": 0.8134, "step": 1350 }, { "epoch": 0.5649137222678718, "grad_norm": 9.197999954223633, "learning_rate": 3.130990415335464e-05, "loss": 0.8826, "step": 1375 }, { "epoch": 0.5751848808545604, "grad_norm": 9.510801315307617, "learning_rate": 3.1880419899589233e-05, "loss": 0.865, "step": 1400 }, { "epoch": 0.585456039441249, "grad_norm": 14.126985549926758, "learning_rate": 3.245093564582383e-05, "loss": 0.9076, "step": 1425 }, { "epoch": 0.5957271980279375, "grad_norm": 13.595422744750977, "learning_rate": 3.3021451392058424e-05, "loss": 0.8754, "step": 1450 }, { "epoch": 0.6059983566146261, "grad_norm": 8.661046028137207, "learning_rate": 3.359196713829302e-05, "loss": 0.8723, "step": 1475 }, { "epoch": 0.6162695152013147, "grad_norm": 13.609139442443848, "learning_rate": 3.4162482884527614e-05, "loss": 0.8326, "step": 1500 }, { "epoch": 0.6265406737880033, "grad_norm": 14.61794662475586, "learning_rate": 3.473299863076221e-05, "loss": 0.8022, "step": 1525 }, { "epoch": 0.6368118323746919, "grad_norm": 10.622493743896484, "learning_rate": 3.5303514376996804e-05, "loss": 0.8726, "step": 1550 }, { "epoch": 0.6470829909613804, "grad_norm": 5.068788051605225, "learning_rate": 3.585120949338202e-05, "loss": 0.8936, "step": 1575 }, { "epoch": 0.657354149548069, "grad_norm": 6.1167402267456055, "learning_rate": 3.6421725239616614e-05, "loss": 0.8834, "step": 1600 }, { "epoch": 0.6676253081347576, "grad_norm": 6.667758941650391, "learning_rate": 3.699224098585121e-05, "loss": 0.925, "step": 1625 }, { "epoch": 0.6778964667214462, "grad_norm": 11.814676284790039, "learning_rate": 3.7562756732085804e-05, "loss": 0.8439, "step": 1650 }, { "epoch": 0.6881676253081348, "grad_norm": 13.39101505279541, "learning_rate": 3.8133272478320406e-05, "loss": 0.7441, "step": 1675 }, { "epoch": 0.6984387838948234, "grad_norm": 7.23984956741333, "learning_rate": 3.8703788224555e-05, "loss": 0.939, "step": 1700 }, { "epoch": 0.7087099424815119, "grad_norm": 16.45111846923828, "learning_rate": 3.9274303970789596e-05, "loss": 0.9217, "step": 1725 }, { "epoch": 0.7189811010682005, "grad_norm": 13.015716552734375, "learning_rate": 3.984481971702419e-05, "loss": 0.9648, "step": 1750 }, { "epoch": 0.729252259654889, "grad_norm": 4.811708927154541, "learning_rate": 4.041533546325879e-05, "loss": 0.854, "step": 1775 }, { "epoch": 0.7395234182415776, "grad_norm": 12.734000205993652, "learning_rate": 4.098585120949339e-05, "loss": 0.8663, "step": 1800 }, { "epoch": 0.7497945768282662, "grad_norm": 5.706826210021973, "learning_rate": 4.1556366955727984e-05, "loss": 0.9436, "step": 1825 }, { "epoch": 0.7600657354149548, "grad_norm": 10.904698371887207, "learning_rate": 4.212688270196258e-05, "loss": 0.9147, "step": 1850 }, { "epoch": 0.7703368940016434, "grad_norm": 13.604780197143555, "learning_rate": 4.2697398448197174e-05, "loss": 0.9512, "step": 1875 }, { "epoch": 0.780608052588332, "grad_norm": 11.89741325378418, "learning_rate": 4.326791419443177e-05, "loss": 0.9356, "step": 1900 }, { "epoch": 0.7908792111750206, "grad_norm": 9.004523277282715, "learning_rate": 4.3838429940666364e-05, "loss": 0.927, "step": 1925 }, { "epoch": 0.8011503697617092, "grad_norm": 11.654400825500488, "learning_rate": 4.440894568690096e-05, "loss": 0.8424, "step": 1950 }, { "epoch": 0.8114215283483976, "grad_norm": 8.863574028015137, "learning_rate": 4.4979461433135554e-05, "loss": 0.8867, "step": 1975 }, { "epoch": 0.8216926869350862, "grad_norm": 6.209610462188721, "learning_rate": 4.554997717937015e-05, "loss": 0.8153, "step": 2000 }, { "epoch": 0.8319638455217748, "grad_norm": 7.504034996032715, "learning_rate": 4.6120492925604745e-05, "loss": 0.855, "step": 2025 }, { "epoch": 0.8422350041084634, "grad_norm": 12.570643424987793, "learning_rate": 4.669100867183934e-05, "loss": 0.9577, "step": 2050 }, { "epoch": 0.852506162695152, "grad_norm": 9.295639038085938, "learning_rate": 4.726152441807394e-05, "loss": 0.84, "step": 2075 }, { "epoch": 0.8627773212818406, "grad_norm": 8.446712493896484, "learning_rate": 4.783204016430854e-05, "loss": 0.9405, "step": 2100 }, { "epoch": 0.8730484798685292, "grad_norm": 9.992650032043457, "learning_rate": 4.840255591054313e-05, "loss": 1.0059, "step": 2125 }, { "epoch": 0.8833196384552178, "grad_norm": 9.760039329528809, "learning_rate": 4.897307165677773e-05, "loss": 0.8583, "step": 2150 }, { "epoch": 0.8935907970419064, "grad_norm": 17.948223114013672, "learning_rate": 4.954358740301233e-05, "loss": 0.9798, "step": 2175 }, { "epoch": 0.903861955628595, "grad_norm": 10.00885009765625, "learning_rate": 4.9999992064830994e-05, "loss": 1.1331, "step": 2200 }, { "epoch": 0.9141331142152835, "grad_norm": 9.577177047729492, "learning_rate": 4.9999714334444495e-05, "loss": 1.1493, "step": 2225 }, { "epoch": 0.924404272801972, "grad_norm": 12.528915405273438, "learning_rate": 4.999903985064472e-05, "loss": 1.1891, "step": 2250 }, { "epoch": 0.9346754313886606, "grad_norm": 21.42876625061035, "learning_rate": 4.9997968624135974e-05, "loss": 1.1592, "step": 2275 }, { "epoch": 0.9449465899753492, "grad_norm": 4.62033748626709, "learning_rate": 4.9996500671918936e-05, "loss": 1.1924, "step": 2300 }, { "epoch": 0.9552177485620378, "grad_norm": 12.126282691955566, "learning_rate": 4.999463601729047e-05, "loss": 1.1318, "step": 2325 }, { "epoch": 0.9654889071487264, "grad_norm": 7.639413356781006, "learning_rate": 4.999237468984326e-05, "loss": 1.1486, "step": 2350 }, { "epoch": 0.975760065735415, "grad_norm": 7.761475563049316, "learning_rate": 4.998971672546527e-05, "loss": 1.1463, "step": 2375 }, { "epoch": 0.9860312243221035, "grad_norm": 9.19140911102295, "learning_rate": 4.998666216633926e-05, "loss": 1.0832, "step": 2400 }, { "epoch": 0.9963023829087921, "grad_norm": 6.729145526885986, "learning_rate": 4.998321106094204e-05, "loss": 1.0749, "step": 2425 }, { "epoch": 1.0, "eval_accuracy": 0.5863556971129148, "eval_f1_macro": 0.29454395983476717, "eval_f1_micro": 0.5863556971129148, "eval_f1_weighted": 0.47959265391783995, "eval_loss": 1.1192505359649658, "eval_precision_macro": 0.4105745703977606, "eval_precision_micro": 0.5863556971129148, "eval_precision_weighted": 0.5196212057725517, "eval_recall_macro": 0.3058262541410274, "eval_recall_micro": 0.5863556971129148, "eval_recall_weighted": 0.5863556971129148, "eval_runtime": 58.2052, "eval_samples_per_second": 167.219, "eval_steps_per_second": 5.24, "step": 2434 }, { "epoch": 1.0065735414954806, "grad_norm": 12.559377670288086, "learning_rate": 4.997936346404375e-05, "loss": 1.2554, "step": 2450 }, { "epoch": 1.0168447000821692, "grad_norm": 5.791057586669922, "learning_rate": 4.997511943670697e-05, "loss": 1.084, "step": 2475 }, { "epoch": 1.0271158586688578, "grad_norm": 13.0701322555542, "learning_rate": 4.9970479046285785e-05, "loss": 1.1482, "step": 2500 }, { "epoch": 1.0373870172555464, "grad_norm": 51.21843338012695, "learning_rate": 4.9965442366424646e-05, "loss": 1.1803, "step": 2525 }, { "epoch": 1.047658175842235, "grad_norm": 8.89924430847168, "learning_rate": 4.996000947705727e-05, "loss": 1.1225, "step": 2550 }, { "epoch": 1.0579293344289236, "grad_norm": 9.04704761505127, "learning_rate": 4.9954180464405345e-05, "loss": 1.1617, "step": 2575 }, { "epoch": 1.0682004930156122, "grad_norm": 7.195584297180176, "learning_rate": 4.9947955420977144e-05, "loss": 1.0882, "step": 2600 }, { "epoch": 1.0784716516023007, "grad_norm": 16.638315200805664, "learning_rate": 4.994133444556609e-05, "loss": 1.1194, "step": 2625 }, { "epoch": 1.0887428101889893, "grad_norm": 4.319007396697998, "learning_rate": 4.993431764324916e-05, "loss": 1.1418, "step": 2650 }, { "epoch": 1.099013968775678, "grad_norm": 17.74847984313965, "learning_rate": 4.992690512538525e-05, "loss": 1.0342, "step": 2675 }, { "epoch": 1.1092851273623665, "grad_norm": 10.52570915222168, "learning_rate": 4.991909700961337e-05, "loss": 1.1285, "step": 2700 }, { "epoch": 1.119556285949055, "grad_norm": 5.708019733428955, "learning_rate": 4.991089341985079e-05, "loss": 1.0229, "step": 2725 }, { "epoch": 1.1298274445357437, "grad_norm": 21.305837631225586, "learning_rate": 4.9902294486291104e-05, "loss": 1.0598, "step": 2750 }, { "epoch": 1.1400986031224323, "grad_norm": 23.91814422607422, "learning_rate": 4.9893300345402105e-05, "loss": 1.1705, "step": 2775 }, { "epoch": 1.1503697617091209, "grad_norm": 8.894378662109375, "learning_rate": 4.988391113992368e-05, "loss": 1.0257, "step": 2800 }, { "epoch": 1.1606409202958095, "grad_norm": 12.799375534057617, "learning_rate": 4.987412701886549e-05, "loss": 1.1463, "step": 2825 }, { "epoch": 1.170912078882498, "grad_norm": 12.658759117126465, "learning_rate": 4.9863948137504666e-05, "loss": 1.1462, "step": 2850 }, { "epoch": 1.1811832374691864, "grad_norm": 6.2793707847595215, "learning_rate": 4.9853374657383275e-05, "loss": 1.0928, "step": 2875 }, { "epoch": 1.191454396055875, "grad_norm": 10.470609664916992, "learning_rate": 4.9842406746305814e-05, "loss": 1.0454, "step": 2900 }, { "epoch": 1.2017255546425636, "grad_norm": 46.87442398071289, "learning_rate": 4.9831044578336524e-05, "loss": 1.1075, "step": 2925 }, { "epoch": 1.2119967132292522, "grad_norm": 9.962231636047363, "learning_rate": 4.981928833379661e-05, "loss": 1.1, "step": 2950 }, { "epoch": 1.2222678718159408, "grad_norm": 4.477356433868408, "learning_rate": 4.9807138199261414e-05, "loss": 1.0811, "step": 2975 }, { "epoch": 1.2325390304026294, "grad_norm": 11.651799201965332, "learning_rate": 4.9794594367557435e-05, "loss": 1.024, "step": 3000 }, { "epoch": 1.242810188989318, "grad_norm": 7.755206108093262, "learning_rate": 4.978165703775927e-05, "loss": 1.067, "step": 3025 }, { "epoch": 1.2530813475760065, "grad_norm": 15.455122947692871, "learning_rate": 4.9768326415186454e-05, "loss": 1.0311, "step": 3050 }, { "epoch": 1.2633525061626951, "grad_norm": 9.131962776184082, "learning_rate": 4.975460271140021e-05, "loss": 1.0401, "step": 3075 }, { "epoch": 1.2736236647493837, "grad_norm": 29.119171142578125, "learning_rate": 4.974048614420007e-05, "loss": 1.0969, "step": 3100 }, { "epoch": 1.2838948233360723, "grad_norm": 6.163785457611084, "learning_rate": 4.972597693762045e-05, "loss": 1.1278, "step": 3125 }, { "epoch": 1.2941659819227609, "grad_norm": 10.012632369995117, "learning_rate": 4.971107532192707e-05, "loss": 1.0993, "step": 3150 }, { "epoch": 1.3044371405094495, "grad_norm": 6.5291666984558105, "learning_rate": 4.96957815336133e-05, "loss": 1.0934, "step": 3175 }, { "epoch": 1.314708299096138, "grad_norm": 18.37449836730957, "learning_rate": 4.9680095815396435e-05, "loss": 1.104, "step": 3200 }, { "epoch": 1.3249794576828267, "grad_norm": 8.442584991455078, "learning_rate": 4.9664018416213796e-05, "loss": 1.0918, "step": 3225 }, { "epoch": 1.3352506162695152, "grad_norm": 5.685943603515625, "learning_rate": 4.9647549591218835e-05, "loss": 1.0535, "step": 3250 }, { "epoch": 1.3455217748562038, "grad_norm": 5.7307448387146, "learning_rate": 4.963068960177701e-05, "loss": 1.1604, "step": 3275 }, { "epoch": 1.3557929334428924, "grad_norm": 4.045636177062988, "learning_rate": 4.961343871546175e-05, "loss": 1.0484, "step": 3300 }, { "epoch": 1.366064092029581, "grad_norm": 6.996286869049072, "learning_rate": 4.959579720605009e-05, "loss": 1.1055, "step": 3325 }, { "epoch": 1.3763352506162696, "grad_norm": 10.471301078796387, "learning_rate": 4.957776535351838e-05, "loss": 1.119, "step": 3350 }, { "epoch": 1.3866064092029582, "grad_norm": 6.319746017456055, "learning_rate": 4.955934344403786e-05, "loss": 1.127, "step": 3375 }, { "epoch": 1.3968775677896468, "grad_norm": 6.542300224304199, "learning_rate": 4.954053176997009e-05, "loss": 0.9722, "step": 3400 }, { "epoch": 1.4071487263763354, "grad_norm": 7.353511810302734, "learning_rate": 4.952133062986232e-05, "loss": 1.0545, "step": 3425 }, { "epoch": 1.417419884963024, "grad_norm": 6.178640842437744, "learning_rate": 4.950174032844274e-05, "loss": 1.022, "step": 3450 }, { "epoch": 1.4276910435497125, "grad_norm": 7.4956955909729, "learning_rate": 4.948176117661567e-05, "loss": 1.0411, "step": 3475 }, { "epoch": 1.437962202136401, "grad_norm": 16.34415626525879, "learning_rate": 4.9461393491456595e-05, "loss": 1.0635, "step": 3500 }, { "epoch": 1.4482333607230895, "grad_norm": 16.984901428222656, "learning_rate": 4.944063759620715e-05, "loss": 1.1581, "step": 3525 }, { "epoch": 1.458504519309778, "grad_norm": 17.63494300842285, "learning_rate": 4.941949382027e-05, "loss": 1.0962, "step": 3550 }, { "epoch": 1.4687756778964667, "grad_norm": 5.165820598602295, "learning_rate": 4.939796249920358e-05, "loss": 1.0976, "step": 3575 }, { "epoch": 1.4790468364831553, "grad_norm": 6.0279436111450195, "learning_rate": 4.93760439747168e-05, "loss": 1.0734, "step": 3600 }, { "epoch": 1.4893179950698439, "grad_norm": 4.7154717445373535, "learning_rate": 4.935373859466361e-05, "loss": 1.0507, "step": 3625 }, { "epoch": 1.4995891536565324, "grad_norm": 11.337488174438477, "learning_rate": 4.93310467130375e-05, "loss": 1.1746, "step": 3650 }, { "epoch": 1.509860312243221, "grad_norm": 18.193342208862305, "learning_rate": 4.930796868996582e-05, "loss": 1.0982, "step": 3675 }, { "epoch": 1.5201314708299096, "grad_norm": 7.017096996307373, "learning_rate": 4.928450489170415e-05, "loss": 1.1556, "step": 3700 }, { "epoch": 1.5304026294165982, "grad_norm": 2.5547895431518555, "learning_rate": 4.926065569063041e-05, "loss": 1.0685, "step": 3725 }, { "epoch": 1.5406737880032868, "grad_norm": 8.476696014404297, "learning_rate": 4.9236421465239025e-05, "loss": 1.0737, "step": 3750 }, { "epoch": 1.5509449465899754, "grad_norm": 8.107565879821777, "learning_rate": 4.921180260013483e-05, "loss": 1.086, "step": 3775 }, { "epoch": 1.561216105176664, "grad_norm": 8.92119312286377, "learning_rate": 4.9186799486027057e-05, "loss": 1.1056, "step": 3800 }, { "epoch": 1.5714872637633523, "grad_norm": 9.719097137451172, "learning_rate": 4.916141251972307e-05, "loss": 0.9651, "step": 3825 }, { "epoch": 1.581758422350041, "grad_norm": 16.80173110961914, "learning_rate": 4.9135642104122093e-05, "loss": 0.9173, "step": 3850 }, { "epoch": 1.5920295809367295, "grad_norm": 15.660393714904785, "learning_rate": 4.910948864820882e-05, "loss": 1.0374, "step": 3875 }, { "epoch": 1.6023007395234181, "grad_norm": 17.720876693725586, "learning_rate": 4.90829525670469e-05, "loss": 0.9644, "step": 3900 }, { "epoch": 1.6125718981101067, "grad_norm": 31.2305965423584, "learning_rate": 4.905603428177238e-05, "loss": 1.0033, "step": 3925 }, { "epoch": 1.6228430566967953, "grad_norm": 7.728428840637207, "learning_rate": 4.9028734219587e-05, "loss": 1.0823, "step": 3950 }, { "epoch": 1.6331142152834839, "grad_norm": 15.458768844604492, "learning_rate": 4.900105281375143e-05, "loss": 1.0676, "step": 3975 }, { "epoch": 1.6433853738701725, "grad_norm": 18.69976806640625, "learning_rate": 4.8972990503578386e-05, "loss": 1.0445, "step": 4000 }, { "epoch": 1.653656532456861, "grad_norm": 8.92398452758789, "learning_rate": 4.894454773442564e-05, "loss": 1.0722, "step": 4025 }, { "epoch": 1.6639276910435497, "grad_norm": 12.593587875366211, "learning_rate": 4.891572495768898e-05, "loss": 1.0598, "step": 4050 }, { "epoch": 1.6741988496302382, "grad_norm": 4.575959205627441, "learning_rate": 4.8886522630795045e-05, "loss": 1.0429, "step": 4075 }, { "epoch": 1.6844700082169268, "grad_norm": 14.307016372680664, "learning_rate": 4.8856941217194035e-05, "loss": 0.9699, "step": 4100 }, { "epoch": 1.6947411668036154, "grad_norm": 6.749273300170898, "learning_rate": 4.882698118635239e-05, "loss": 1.0919, "step": 4125 }, { "epoch": 1.705012325390304, "grad_norm": 9.383212089538574, "learning_rate": 4.879664301374532e-05, "loss": 1.1693, "step": 4150 }, { "epoch": 1.7152834839769926, "grad_norm": 4.1049909591674805, "learning_rate": 4.876592718084928e-05, "loss": 1.159, "step": 4175 }, { "epoch": 1.7255546425636812, "grad_norm": 8.32151985168457, "learning_rate": 4.873483417513429e-05, "loss": 1.0969, "step": 4200 }, { "epoch": 1.7358258011503698, "grad_norm": 6.129182815551758, "learning_rate": 4.8703364490056245e-05, "loss": 1.0632, "step": 4225 }, { "epoch": 1.7460969597370584, "grad_norm": 5.181979179382324, "learning_rate": 4.8671518625049036e-05, "loss": 0.9523, "step": 4250 }, { "epoch": 1.756368118323747, "grad_norm": 6.566084384918213, "learning_rate": 4.863929708551667e-05, "loss": 1.0336, "step": 4275 }, { "epoch": 1.7666392769104355, "grad_norm": 14.47562313079834, "learning_rate": 4.8606700382825224e-05, "loss": 1.0493, "step": 4300 }, { "epoch": 1.7769104354971241, "grad_norm": 3.870394706726074, "learning_rate": 4.8573729034294736e-05, "loss": 1.1012, "step": 4325 }, { "epoch": 1.7871815940838127, "grad_norm": 21.563486099243164, "learning_rate": 4.854038356319098e-05, "loss": 1.0629, "step": 4350 }, { "epoch": 1.7974527526705013, "grad_norm": 4.163707256317139, "learning_rate": 4.850666449871718e-05, "loss": 1.0252, "step": 4375 }, { "epoch": 1.80772391125719, "grad_norm": 7.478860378265381, "learning_rate": 4.8472572376005614e-05, "loss": 1.1695, "step": 4400 }, { "epoch": 1.8179950698438785, "grad_norm": 8.34506893157959, "learning_rate": 4.843810773610911e-05, "loss": 1.0471, "step": 4425 }, { "epoch": 1.828266228430567, "grad_norm": 7.001756191253662, "learning_rate": 4.840327112599244e-05, "loss": 1.0156, "step": 4450 }, { "epoch": 1.8385373870172557, "grad_norm": 4.876439094543457, "learning_rate": 4.836806309852371e-05, "loss": 1.022, "step": 4475 }, { "epoch": 1.8488085456039443, "grad_norm": 5.670014381408691, "learning_rate": 4.833248421246548e-05, "loss": 1.1103, "step": 4500 }, { "epoch": 1.8590797041906328, "grad_norm": 5.291884422302246, "learning_rate": 4.8296535032466e-05, "loss": 1.0795, "step": 4525 }, { "epoch": 1.8693508627773214, "grad_norm": 7.388947486877441, "learning_rate": 4.826021612905017e-05, "loss": 1.1053, "step": 4550 }, { "epoch": 1.87962202136401, "grad_norm": 5.867747783660889, "learning_rate": 4.822352807861054e-05, "loss": 1.0175, "step": 4575 }, { "epoch": 1.8898931799506984, "grad_norm": 11.747688293457031, "learning_rate": 4.818647146339813e-05, "loss": 1.0419, "step": 4600 }, { "epoch": 1.900164338537387, "grad_norm": 7.364708423614502, "learning_rate": 4.814904687151321e-05, "loss": 1.0909, "step": 4625 }, { "epoch": 1.9104354971240756, "grad_norm": 10.691391944885254, "learning_rate": 4.811125489689594e-05, "loss": 1.1139, "step": 4650 }, { "epoch": 1.9207066557107642, "grad_norm": 61.839820861816406, "learning_rate": 4.807309613931698e-05, "loss": 1.0478, "step": 4675 }, { "epoch": 1.9309778142974527, "grad_norm": 3.956354856491089, "learning_rate": 4.803457120436792e-05, "loss": 1.1058, "step": 4700 }, { "epoch": 1.9412489728841413, "grad_norm": 8.450248718261719, "learning_rate": 4.799568070345175e-05, "loss": 1.1035, "step": 4725 }, { "epoch": 1.95152013147083, "grad_norm": 9.974756240844727, "learning_rate": 4.795642525377307e-05, "loss": 1.0522, "step": 4750 }, { "epoch": 1.9617912900575185, "grad_norm": 23.98746109008789, "learning_rate": 4.791680547832835e-05, "loss": 1.0571, "step": 4775 }, { "epoch": 1.972062448644207, "grad_norm": 10.666019439697266, "learning_rate": 4.7876822005896016e-05, "loss": 1.1004, "step": 4800 }, { "epoch": 1.9823336072308957, "grad_norm": 8.37453842163086, "learning_rate": 4.7836475471026484e-05, "loss": 1.106, "step": 4825 }, { "epoch": 1.9926047658175843, "grad_norm": 6.877459526062012, "learning_rate": 4.779576651403209e-05, "loss": 1.0404, "step": 4850 }, { "epoch": 2.0, "eval_accuracy": 0.6005342648720846, "eval_f1_macro": 0.34074175503832355, "eval_f1_micro": 0.6005342648720846, "eval_f1_weighted": 0.5316262488920555, "eval_loss": 1.064771056175232, "eval_precision_macro": 0.42047577765386696, "eval_precision_micro": 0.6005342648720846, "eval_precision_weighted": 0.5362395289892545, "eval_recall_macro": 0.32992208872704937, "eval_recall_micro": 0.6005342648720846, "eval_recall_weighted": 0.6005342648720846, "eval_runtime": 58.1251, "eval_samples_per_second": 167.449, "eval_steps_per_second": 5.247, "step": 4868 } ], "logging_steps": 25, "max_steps": 21906, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.629350229586739e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }