|
{ |
|
"best_metric": 0.011031342670321465, |
|
"best_model_checkpoint": "autotrain-mb2mv-qdf75/checkpoint-3839", |
|
"epoch": 11.0, |
|
"eval_steps": 500, |
|
"global_step": 3839, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04871060171919771, |
|
"grad_norm": 12.027495384216309, |
|
"learning_rate": 2.028639618138425e-06, |
|
"loss": 4.4947, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09742120343839542, |
|
"grad_norm": 11.511441230773926, |
|
"learning_rate": 4.05727923627685e-06, |
|
"loss": 4.4912, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.14613180515759314, |
|
"grad_norm": 12.500868797302246, |
|
"learning_rate": 5.966587112171838e-06, |
|
"loss": 4.4842, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.19484240687679083, |
|
"grad_norm": 11.219860076904297, |
|
"learning_rate": 7.995226730310263e-06, |
|
"loss": 4.4186, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.24355300859598855, |
|
"grad_norm": 11.510499000549316, |
|
"learning_rate": 1.0023866348448688e-05, |
|
"loss": 4.5182, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2922636103151863, |
|
"grad_norm": 12.528426170349121, |
|
"learning_rate": 1.2052505966587113e-05, |
|
"loss": 4.4535, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.34097421203438394, |
|
"grad_norm": 11.191871643066406, |
|
"learning_rate": 1.4081145584725539e-05, |
|
"loss": 4.4233, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.38968481375358166, |
|
"grad_norm": 13.019774436950684, |
|
"learning_rate": 1.6109785202863962e-05, |
|
"loss": 4.3845, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4383954154727794, |
|
"grad_norm": 11.506011962890625, |
|
"learning_rate": 1.8138424821002386e-05, |
|
"loss": 4.3334, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4871060171919771, |
|
"grad_norm": 11.238750457763672, |
|
"learning_rate": 2.0167064439140813e-05, |
|
"loss": 4.2554, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5358166189111748, |
|
"grad_norm": 11.771519660949707, |
|
"learning_rate": 2.2195704057279237e-05, |
|
"loss": 4.2028, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5845272206303725, |
|
"grad_norm": 12.988870620727539, |
|
"learning_rate": 2.4224343675417664e-05, |
|
"loss": 4.0815, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6332378223495702, |
|
"grad_norm": 12.157154083251953, |
|
"learning_rate": 2.6252983293556088e-05, |
|
"loss": 4.1055, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6819484240687679, |
|
"grad_norm": 12.214086532592773, |
|
"learning_rate": 2.828162291169451e-05, |
|
"loss": 3.9383, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7306590257879656, |
|
"grad_norm": 11.511253356933594, |
|
"learning_rate": 3.031026252983294e-05, |
|
"loss": 3.9894, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7793696275071633, |
|
"grad_norm": 13.291036605834961, |
|
"learning_rate": 3.233890214797136e-05, |
|
"loss": 3.7944, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.828080229226361, |
|
"grad_norm": 13.204787254333496, |
|
"learning_rate": 3.424821002386635e-05, |
|
"loss": 3.7806, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8767908309455588, |
|
"grad_norm": 13.473506927490234, |
|
"learning_rate": 3.627684964200477e-05, |
|
"loss": 3.6716, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.9255014326647565, |
|
"grad_norm": 12.780734062194824, |
|
"learning_rate": 3.83054892601432e-05, |
|
"loss": 3.4641, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9742120343839542, |
|
"grad_norm": 20.36208724975586, |
|
"learning_rate": 4.0334128878281626e-05, |
|
"loss": 3.3018, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.43902439024390244, |
|
"eval_f1_macro": 0.3762682151157721, |
|
"eval_f1_micro": 0.43902439024390244, |
|
"eval_f1_weighted": 0.38500671624754507, |
|
"eval_loss": 2.9648730754852295, |
|
"eval_precision_macro": 0.5231114557498497, |
|
"eval_precision_micro": 0.43902439024390244, |
|
"eval_precision_weighted": 0.5343145132587477, |
|
"eval_recall_macro": 0.41928580121351205, |
|
"eval_recall_micro": 0.43902439024390244, |
|
"eval_recall_weighted": 0.43902439024390244, |
|
"eval_runtime": 3.4206, |
|
"eval_samples_per_second": 203.767, |
|
"eval_steps_per_second": 12.863, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.0229226361031518, |
|
"grad_norm": 11.021674156188965, |
|
"learning_rate": 4.236276849642005e-05, |
|
"loss": 3.1839, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.0716332378223496, |
|
"grad_norm": 12.111454963684082, |
|
"learning_rate": 4.4391408114558474e-05, |
|
"loss": 3.0222, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.1203438395415473, |
|
"grad_norm": 12.602492332458496, |
|
"learning_rate": 4.64200477326969e-05, |
|
"loss": 2.8135, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.1690544412607449, |
|
"grad_norm": 11.924349784851074, |
|
"learning_rate": 4.844868735083533e-05, |
|
"loss": 2.6733, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.2177650429799427, |
|
"grad_norm": 11.714224815368652, |
|
"learning_rate": 4.9946935526664904e-05, |
|
"loss": 2.4497, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2664756446991405, |
|
"grad_norm": 13.355523109436035, |
|
"learning_rate": 4.972141151499072e-05, |
|
"loss": 2.3633, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.3151862464183381, |
|
"grad_norm": 11.773361206054688, |
|
"learning_rate": 4.949588750331653e-05, |
|
"loss": 2.2063, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.3638968481375358, |
|
"grad_norm": 12.601265907287598, |
|
"learning_rate": 4.9270363491642345e-05, |
|
"loss": 2.2622, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.4126074498567336, |
|
"grad_norm": 11.489968299865723, |
|
"learning_rate": 4.9044839479968165e-05, |
|
"loss": 2.0349, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.4613180515759312, |
|
"grad_norm": 11.553853034973145, |
|
"learning_rate": 4.881931546829398e-05, |
|
"loss": 1.8452, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.5100286532951288, |
|
"grad_norm": 13.461709976196289, |
|
"learning_rate": 4.85937914566198e-05, |
|
"loss": 1.7711, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.5587392550143266, |
|
"grad_norm": 11.289037704467773, |
|
"learning_rate": 4.8368267444945606e-05, |
|
"loss": 1.562, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.6074498567335245, |
|
"grad_norm": 9.85034465789795, |
|
"learning_rate": 4.814274343327143e-05, |
|
"loss": 1.4258, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.656160458452722, |
|
"grad_norm": 8.626704216003418, |
|
"learning_rate": 4.791721942159724e-05, |
|
"loss": 1.2561, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.7048710601719197, |
|
"grad_norm": 8.728435516357422, |
|
"learning_rate": 4.769169540992306e-05, |
|
"loss": 1.229, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.7535816618911175, |
|
"grad_norm": 11.541463851928711, |
|
"learning_rate": 4.7466171398248875e-05, |
|
"loss": 1.175, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.8022922636103151, |
|
"grad_norm": 11.69316291809082, |
|
"learning_rate": 4.724064738657469e-05, |
|
"loss": 1.2961, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.8510028653295127, |
|
"grad_norm": 7.8675737380981445, |
|
"learning_rate": 4.70151233749005e-05, |
|
"loss": 0.9699, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.8997134670487106, |
|
"grad_norm": 7.240268230438232, |
|
"learning_rate": 4.678959936322632e-05, |
|
"loss": 1.235, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.9484240687679084, |
|
"grad_norm": 6.3042426109313965, |
|
"learning_rate": 4.6564075351552136e-05, |
|
"loss": 1.1121, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.997134670487106, |
|
"grad_norm": 8.941567420959473, |
|
"learning_rate": 4.633855133987796e-05, |
|
"loss": 0.9363, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.945480631276901, |
|
"eval_f1_macro": 0.9425017115213628, |
|
"eval_f1_micro": 0.945480631276901, |
|
"eval_f1_weighted": 0.9445694655877982, |
|
"eval_loss": 0.5551677346229553, |
|
"eval_precision_macro": 0.9567204214794576, |
|
"eval_precision_micro": 0.945480631276901, |
|
"eval_precision_weighted": 0.9597403513113699, |
|
"eval_recall_macro": 0.94451833307255, |
|
"eval_recall_micro": 0.945480631276901, |
|
"eval_recall_weighted": 0.945480631276901, |
|
"eval_runtime": 3.3936, |
|
"eval_samples_per_second": 205.389, |
|
"eval_steps_per_second": 12.966, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 2.0458452722063036, |
|
"grad_norm": 4.50822114944458, |
|
"learning_rate": 4.611302732820377e-05, |
|
"loss": 1.0456, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 2.0945558739255015, |
|
"grad_norm": 5.0435686111450195, |
|
"learning_rate": 4.5887503316529584e-05, |
|
"loss": 0.5131, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 2.1432664756446993, |
|
"grad_norm": 5.728665351867676, |
|
"learning_rate": 4.5661979304855405e-05, |
|
"loss": 0.705, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 2.1919770773638967, |
|
"grad_norm": 4.446041584014893, |
|
"learning_rate": 4.543645529318122e-05, |
|
"loss": 0.6504, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.2406876790830945, |
|
"grad_norm": 5.601328372955322, |
|
"learning_rate": 4.521093128150703e-05, |
|
"loss": 0.6361, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 2.2893982808022924, |
|
"grad_norm": 6.830723285675049, |
|
"learning_rate": 4.4985407269832846e-05, |
|
"loss": 0.7554, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 2.3381088825214897, |
|
"grad_norm": 9.851592063903809, |
|
"learning_rate": 4.4759883258158666e-05, |
|
"loss": 0.6952, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.3868194842406876, |
|
"grad_norm": 12.239367485046387, |
|
"learning_rate": 4.453435924648448e-05, |
|
"loss": 0.7793, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.4355300859598854, |
|
"grad_norm": 4.4265360832214355, |
|
"learning_rate": 4.43088352348103e-05, |
|
"loss": 0.5841, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.4842406876790832, |
|
"grad_norm": 11.818037986755371, |
|
"learning_rate": 4.4083311223136114e-05, |
|
"loss": 0.6259, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.532951289398281, |
|
"grad_norm": 13.520539283752441, |
|
"learning_rate": 4.385778721146193e-05, |
|
"loss": 0.674, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.5816618911174785, |
|
"grad_norm": 12.981139183044434, |
|
"learning_rate": 4.363226319978774e-05, |
|
"loss": 0.7362, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.6303724928366763, |
|
"grad_norm": 12.412555694580078, |
|
"learning_rate": 4.340673918811356e-05, |
|
"loss": 0.557, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.6790830945558737, |
|
"grad_norm": 7.840790748596191, |
|
"learning_rate": 4.3181215176439375e-05, |
|
"loss": 0.6293, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.7277936962750715, |
|
"grad_norm": 4.981259822845459, |
|
"learning_rate": 4.2955691164765196e-05, |
|
"loss": 0.5793, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.7765042979942693, |
|
"grad_norm": 7.165767669677734, |
|
"learning_rate": 4.2730167153091e-05, |
|
"loss": 0.5705, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.825214899713467, |
|
"grad_norm": 6.7298736572265625, |
|
"learning_rate": 4.250464314141682e-05, |
|
"loss": 0.5446, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.873925501432665, |
|
"grad_norm": 7.268840789794922, |
|
"learning_rate": 4.227911912974264e-05, |
|
"loss": 0.4438, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.9226361031518624, |
|
"grad_norm": 1.802043080329895, |
|
"learning_rate": 4.205359511806846e-05, |
|
"loss": 0.4892, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.9713467048710602, |
|
"grad_norm": 10.247010231018066, |
|
"learning_rate": 4.182807110639427e-05, |
|
"loss": 0.4922, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9727403156384505, |
|
"eval_f1_macro": 0.9705769072502259, |
|
"eval_f1_micro": 0.9727403156384505, |
|
"eval_f1_weighted": 0.9719785983620151, |
|
"eval_loss": 0.17863501608371735, |
|
"eval_precision_macro": 0.9795910916392845, |
|
"eval_precision_micro": 0.9727403156384505, |
|
"eval_precision_weighted": 0.9787328087184615, |
|
"eval_recall_macro": 0.969774335436986, |
|
"eval_recall_micro": 0.9727403156384505, |
|
"eval_recall_weighted": 0.9727403156384505, |
|
"eval_runtime": 3.4979, |
|
"eval_samples_per_second": 199.263, |
|
"eval_steps_per_second": 12.579, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 3.020057306590258, |
|
"grad_norm": 9.481998443603516, |
|
"learning_rate": 4.1602547094720085e-05, |
|
"loss": 0.5422, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 3.0687679083094554, |
|
"grad_norm": 9.671106338500977, |
|
"learning_rate": 4.13770230830459e-05, |
|
"loss": 0.408, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 3.1174785100286533, |
|
"grad_norm": 1.3068506717681885, |
|
"learning_rate": 4.115149907137172e-05, |
|
"loss": 0.404, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 3.166189111747851, |
|
"grad_norm": 8.020153045654297, |
|
"learning_rate": 4.092597505969753e-05, |
|
"loss": 0.4022, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 3.2148997134670485, |
|
"grad_norm": 9.03290843963623, |
|
"learning_rate": 4.070045104802335e-05, |
|
"loss": 0.4726, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 3.2636103151862463, |
|
"grad_norm": 8.347646713256836, |
|
"learning_rate": 4.047492703634917e-05, |
|
"loss": 0.4158, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 3.312320916905444, |
|
"grad_norm": 9.76726245880127, |
|
"learning_rate": 4.024940302467498e-05, |
|
"loss": 0.3523, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 3.361031518624642, |
|
"grad_norm": 8.464173316955566, |
|
"learning_rate": 4.00238790130008e-05, |
|
"loss": 0.5155, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 3.4097421203438394, |
|
"grad_norm": 4.331398963928223, |
|
"learning_rate": 3.9798355001326615e-05, |
|
"loss": 0.395, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.458452722063037, |
|
"grad_norm": 7.228985786437988, |
|
"learning_rate": 3.9572830989652435e-05, |
|
"loss": 0.3984, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 3.507163323782235, |
|
"grad_norm": 10.442928314208984, |
|
"learning_rate": 3.934730697797824e-05, |
|
"loss": 0.4143, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 3.555873925501433, |
|
"grad_norm": 8.429516792297363, |
|
"learning_rate": 3.912178296630406e-05, |
|
"loss": 0.3767, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 3.6045845272206303, |
|
"grad_norm": 12.501051902770996, |
|
"learning_rate": 3.8896258954629876e-05, |
|
"loss": 0.3507, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 3.653295128939828, |
|
"grad_norm": 7.7675652503967285, |
|
"learning_rate": 3.86707349429557e-05, |
|
"loss": 0.395, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 3.702005730659026, |
|
"grad_norm": 4.184613227844238, |
|
"learning_rate": 3.844521093128151e-05, |
|
"loss": 0.2874, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 3.7507163323782233, |
|
"grad_norm": 10.189749717712402, |
|
"learning_rate": 3.8219686919607324e-05, |
|
"loss": 0.3396, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 3.799426934097421, |
|
"grad_norm": 2.022300958633423, |
|
"learning_rate": 3.799416290793314e-05, |
|
"loss": 0.2436, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 3.848137535816619, |
|
"grad_norm": 8.71822452545166, |
|
"learning_rate": 3.776863889625896e-05, |
|
"loss": 0.3773, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 3.896848137535817, |
|
"grad_norm": 7.879873752593994, |
|
"learning_rate": 3.754311488458477e-05, |
|
"loss": 0.4686, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.945558739255014, |
|
"grad_norm": 1.0487672090530396, |
|
"learning_rate": 3.731759087291059e-05, |
|
"loss": 0.3921, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 3.994269340974212, |
|
"grad_norm": 8.260384559631348, |
|
"learning_rate": 3.70920668612364e-05, |
|
"loss": 0.2956, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9885222381635581, |
|
"eval_f1_macro": 0.9872476382417075, |
|
"eval_f1_micro": 0.9885222381635581, |
|
"eval_f1_weighted": 0.9882948425801252, |
|
"eval_loss": 0.09004738181829453, |
|
"eval_precision_macro": 0.9903644882560545, |
|
"eval_precision_micro": 0.9885222381635581, |
|
"eval_precision_weighted": 0.9902174543135807, |
|
"eval_recall_macro": 0.9865557467967108, |
|
"eval_recall_micro": 0.9885222381635581, |
|
"eval_recall_weighted": 0.9885222381635581, |
|
"eval_runtime": 3.3411, |
|
"eval_samples_per_second": 208.616, |
|
"eval_steps_per_second": 13.169, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 4.042979942693409, |
|
"grad_norm": 6.5528106689453125, |
|
"learning_rate": 3.686654284956222e-05, |
|
"loss": 0.4403, |
|
"step": 1411 |
|
}, |
|
{ |
|
"epoch": 4.091690544412607, |
|
"grad_norm": 3.8414504528045654, |
|
"learning_rate": 3.6641018837888034e-05, |
|
"loss": 0.2637, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 4.140401146131805, |
|
"grad_norm": 16.609180450439453, |
|
"learning_rate": 3.6415494826213854e-05, |
|
"loss": 0.3618, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 4.189111747851003, |
|
"grad_norm": 2.179348945617676, |
|
"learning_rate": 3.618997081453967e-05, |
|
"loss": 0.4447, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 4.237822349570201, |
|
"grad_norm": 3.5908546447753906, |
|
"learning_rate": 3.596444680286548e-05, |
|
"loss": 0.2905, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 4.286532951289399, |
|
"grad_norm": 7.550769805908203, |
|
"learning_rate": 3.5738922791191295e-05, |
|
"loss": 0.2448, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 4.3352435530085955, |
|
"grad_norm": 0.7109397649765015, |
|
"learning_rate": 3.5513398779517116e-05, |
|
"loss": 0.2127, |
|
"step": 1513 |
|
}, |
|
{ |
|
"epoch": 4.383954154727793, |
|
"grad_norm": 1.54320228099823, |
|
"learning_rate": 3.528787476784293e-05, |
|
"loss": 0.2202, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.432664756446991, |
|
"grad_norm": 10.156286239624023, |
|
"learning_rate": 3.506235075616875e-05, |
|
"loss": 0.3263, |
|
"step": 1547 |
|
}, |
|
{ |
|
"epoch": 4.481375358166189, |
|
"grad_norm": 11.149276733398438, |
|
"learning_rate": 3.4836826744494563e-05, |
|
"loss": 0.213, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 4.530085959885387, |
|
"grad_norm": 14.087788581848145, |
|
"learning_rate": 3.461130273282038e-05, |
|
"loss": 0.3907, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 4.578796561604585, |
|
"grad_norm": 6.006841659545898, |
|
"learning_rate": 3.43857787211462e-05, |
|
"loss": 0.4959, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 4.6275071633237825, |
|
"grad_norm": 6.818835258483887, |
|
"learning_rate": 3.416025470947201e-05, |
|
"loss": 0.3309, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 4.6762177650429795, |
|
"grad_norm": 1.0696688890457153, |
|
"learning_rate": 3.393473069779783e-05, |
|
"loss": 0.2245, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 4.724928366762177, |
|
"grad_norm": 11.383952140808105, |
|
"learning_rate": 3.370920668612364e-05, |
|
"loss": 0.3473, |
|
"step": 1649 |
|
}, |
|
{ |
|
"epoch": 4.773638968481375, |
|
"grad_norm": 7.438843727111816, |
|
"learning_rate": 3.348368267444946e-05, |
|
"loss": 0.2508, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 4.822349570200573, |
|
"grad_norm": 1.553702473640442, |
|
"learning_rate": 3.325815866277527e-05, |
|
"loss": 0.2669, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 4.871060171919771, |
|
"grad_norm": 5.968568325042725, |
|
"learning_rate": 3.303263465110109e-05, |
|
"loss": 0.1219, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.919770773638969, |
|
"grad_norm": 0.3757087290287018, |
|
"learning_rate": 3.280711063942691e-05, |
|
"loss": 0.2749, |
|
"step": 1717 |
|
}, |
|
{ |
|
"epoch": 4.9684813753581665, |
|
"grad_norm": 7.143729209899902, |
|
"learning_rate": 3.258158662775272e-05, |
|
"loss": 0.1591, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9885222381635581, |
|
"eval_f1_macro": 0.9880046003912989, |
|
"eval_f1_micro": 0.9885222381635581, |
|
"eval_f1_weighted": 0.9883902987879117, |
|
"eval_loss": 0.05691728740930557, |
|
"eval_precision_macro": 0.9900693683826214, |
|
"eval_precision_micro": 0.9885222381635581, |
|
"eval_precision_weighted": 0.9900236221613553, |
|
"eval_recall_macro": 0.9879231210556513, |
|
"eval_recall_micro": 0.9885222381635581, |
|
"eval_recall_weighted": 0.9885222381635581, |
|
"eval_runtime": 3.3973, |
|
"eval_samples_per_second": 205.165, |
|
"eval_steps_per_second": 12.952, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 5.017191977077364, |
|
"grad_norm": 1.9127634763717651, |
|
"learning_rate": 3.2356062616078534e-05, |
|
"loss": 0.397, |
|
"step": 1751 |
|
}, |
|
{ |
|
"epoch": 5.065902578796561, |
|
"grad_norm": 8.336675643920898, |
|
"learning_rate": 3.2130538604404355e-05, |
|
"loss": 0.2355, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 5.114613180515759, |
|
"grad_norm": 7.159496307373047, |
|
"learning_rate": 3.190501459273017e-05, |
|
"loss": 0.2086, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 5.163323782234957, |
|
"grad_norm": 6.03056526184082, |
|
"learning_rate": 3.167949058105599e-05, |
|
"loss": 0.2212, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 5.212034383954155, |
|
"grad_norm": 7.681415557861328, |
|
"learning_rate": 3.1453966569381796e-05, |
|
"loss": 0.2721, |
|
"step": 1819 |
|
}, |
|
{ |
|
"epoch": 5.260744985673353, |
|
"grad_norm": 1.4911251068115234, |
|
"learning_rate": 3.1228442557707616e-05, |
|
"loss": 0.1994, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 5.30945558739255, |
|
"grad_norm": 7.99345588684082, |
|
"learning_rate": 3.100291854603343e-05, |
|
"loss": 0.312, |
|
"step": 1853 |
|
}, |
|
{ |
|
"epoch": 5.358166189111748, |
|
"grad_norm": 3.288712978363037, |
|
"learning_rate": 3.077739453435925e-05, |
|
"loss": 0.3002, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 5.406876790830945, |
|
"grad_norm": 0.1384359449148178, |
|
"learning_rate": 3.0551870522685064e-05, |
|
"loss": 0.1875, |
|
"step": 1887 |
|
}, |
|
{ |
|
"epoch": 5.455587392550143, |
|
"grad_norm": 2.912055730819702, |
|
"learning_rate": 3.0326346511010878e-05, |
|
"loss": 0.1617, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 5.504297994269341, |
|
"grad_norm": 9.510294914245605, |
|
"learning_rate": 3.0100822499336695e-05, |
|
"loss": 0.1553, |
|
"step": 1921 |
|
}, |
|
{ |
|
"epoch": 5.553008595988539, |
|
"grad_norm": 5.520040988922119, |
|
"learning_rate": 2.9875298487662512e-05, |
|
"loss": 0.1949, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 5.6017191977077365, |
|
"grad_norm": 0.39325079321861267, |
|
"learning_rate": 2.964977447598833e-05, |
|
"loss": 0.2814, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 5.650429799426934, |
|
"grad_norm": 0.1934385895729065, |
|
"learning_rate": 2.942425046431414e-05, |
|
"loss": 0.3251, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 5.699140401146132, |
|
"grad_norm": 5.8890533447265625, |
|
"learning_rate": 2.9198726452639957e-05, |
|
"loss": 0.1652, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 5.747851002865329, |
|
"grad_norm": 5.028823375701904, |
|
"learning_rate": 2.8973202440965774e-05, |
|
"loss": 0.305, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 5.796561604584527, |
|
"grad_norm": 0.35111504793167114, |
|
"learning_rate": 2.874767842929159e-05, |
|
"loss": 0.1684, |
|
"step": 2023 |
|
}, |
|
{ |
|
"epoch": 5.845272206303725, |
|
"grad_norm": 1.910530686378479, |
|
"learning_rate": 2.8522154417617408e-05, |
|
"loss": 0.2535, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 5.893982808022923, |
|
"grad_norm": 5.5074334144592285, |
|
"learning_rate": 2.8296630405943218e-05, |
|
"loss": 0.2924, |
|
"step": 2057 |
|
}, |
|
{ |
|
"epoch": 5.9426934097421205, |
|
"grad_norm": 6.081971645355225, |
|
"learning_rate": 2.8071106394269035e-05, |
|
"loss": 0.1663, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 5.991404011461318, |
|
"grad_norm": 1.7783217430114746, |
|
"learning_rate": 2.7845582382594852e-05, |
|
"loss": 0.1912, |
|
"step": 2091 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9971305595408895, |
|
"eval_f1_macro": 0.997156659844563, |
|
"eval_f1_micro": 0.9971305595408895, |
|
"eval_f1_weighted": 0.9971139798573604, |
|
"eval_loss": 0.031095275655388832, |
|
"eval_precision_macro": 0.9975660216624072, |
|
"eval_precision_micro": 0.9971305595408895, |
|
"eval_precision_weighted": 0.9974204020115067, |
|
"eval_recall_macro": 0.9970740103270225, |
|
"eval_recall_micro": 0.9971305595408895, |
|
"eval_recall_weighted": 0.9971305595408895, |
|
"eval_runtime": 3.3447, |
|
"eval_samples_per_second": 208.387, |
|
"eval_steps_per_second": 13.155, |
|
"step": 2094 |
|
}, |
|
{ |
|
"epoch": 6.040114613180516, |
|
"grad_norm": 12.544295310974121, |
|
"learning_rate": 2.762005837092067e-05, |
|
"loss": 0.2401, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 6.088825214899713, |
|
"grad_norm": 0.36089888215065, |
|
"learning_rate": 2.7394534359246486e-05, |
|
"loss": 0.1197, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 6.137535816618911, |
|
"grad_norm": 3.824916124343872, |
|
"learning_rate": 2.71690103475723e-05, |
|
"loss": 0.2304, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 6.186246418338109, |
|
"grad_norm": 7.010196685791016, |
|
"learning_rate": 2.6943486335898117e-05, |
|
"loss": 0.2393, |
|
"step": 2159 |
|
}, |
|
{ |
|
"epoch": 6.234957020057307, |
|
"grad_norm": 0.23738817870616913, |
|
"learning_rate": 2.6717962324223934e-05, |
|
"loss": 0.2878, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 6.283667621776504, |
|
"grad_norm": 6.685153961181641, |
|
"learning_rate": 2.649243831254975e-05, |
|
"loss": 0.23, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 6.332378223495702, |
|
"grad_norm": 9.155635833740234, |
|
"learning_rate": 2.626691430087557e-05, |
|
"loss": 0.127, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 6.3810888252149, |
|
"grad_norm": 1.8248714208602905, |
|
"learning_rate": 2.604139028920138e-05, |
|
"loss": 0.264, |
|
"step": 2227 |
|
}, |
|
{ |
|
"epoch": 6.429799426934097, |
|
"grad_norm": 3.287179946899414, |
|
"learning_rate": 2.5815866277527196e-05, |
|
"loss": 0.2568, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 6.478510028653295, |
|
"grad_norm": 0.12249535322189331, |
|
"learning_rate": 2.5590342265853013e-05, |
|
"loss": 0.1211, |
|
"step": 2261 |
|
}, |
|
{ |
|
"epoch": 6.527220630372493, |
|
"grad_norm": 9.649252891540527, |
|
"learning_rate": 2.536481825417883e-05, |
|
"loss": 0.2599, |
|
"step": 2278 |
|
}, |
|
{ |
|
"epoch": 6.5759312320916905, |
|
"grad_norm": 8.501516342163086, |
|
"learning_rate": 2.5139294242504647e-05, |
|
"loss": 0.1997, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 6.624641833810888, |
|
"grad_norm": 6.785931587219238, |
|
"learning_rate": 2.491377023083046e-05, |
|
"loss": 0.1947, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 6.673352435530086, |
|
"grad_norm": 1.3328988552093506, |
|
"learning_rate": 2.4688246219156274e-05, |
|
"loss": 0.2236, |
|
"step": 2329 |
|
}, |
|
{ |
|
"epoch": 6.722063037249284, |
|
"grad_norm": 9.384140014648438, |
|
"learning_rate": 2.446272220748209e-05, |
|
"loss": 0.2394, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 6.770773638968482, |
|
"grad_norm": 1.0058611631393433, |
|
"learning_rate": 2.423719819580791e-05, |
|
"loss": 0.1288, |
|
"step": 2363 |
|
}, |
|
{ |
|
"epoch": 6.819484240687679, |
|
"grad_norm": 0.7905517816543579, |
|
"learning_rate": 2.4011674184133722e-05, |
|
"loss": 0.1528, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 6.868194842406877, |
|
"grad_norm": 10.827178955078125, |
|
"learning_rate": 2.378615017245954e-05, |
|
"loss": 0.1767, |
|
"step": 2397 |
|
}, |
|
{ |
|
"epoch": 6.916905444126074, |
|
"grad_norm": 7.897141933441162, |
|
"learning_rate": 2.3560626160785353e-05, |
|
"loss": 0.1411, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 6.965616045845272, |
|
"grad_norm": 4.635827541351318, |
|
"learning_rate": 2.333510214911117e-05, |
|
"loss": 0.1712, |
|
"step": 2431 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9956958393113343, |
|
"eval_f1_macro": 0.9951431111442676, |
|
"eval_f1_micro": 0.9956958393113343, |
|
"eval_f1_weighted": 0.9956405011600654, |
|
"eval_loss": 0.024958999827504158, |
|
"eval_precision_macro": 0.995983935742972, |
|
"eval_precision_micro": 0.9956958393113343, |
|
"eval_precision_weighted": 0.9964929061055317, |
|
"eval_recall_macro": 0.9953528399311532, |
|
"eval_recall_micro": 0.9956958393113343, |
|
"eval_recall_weighted": 0.9956958393113343, |
|
"eval_runtime": 3.4628, |
|
"eval_samples_per_second": 201.281, |
|
"eval_steps_per_second": 12.706, |
|
"step": 2443 |
|
}, |
|
{ |
|
"epoch": 7.01432664756447, |
|
"grad_norm": 0.12232652306556702, |
|
"learning_rate": 2.3109578137436987e-05, |
|
"loss": 0.2055, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 7.063037249283668, |
|
"grad_norm": 0.07312128692865372, |
|
"learning_rate": 2.28840541257628e-05, |
|
"loss": 0.1848, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 7.111747851002866, |
|
"grad_norm": 0.32181409001350403, |
|
"learning_rate": 2.2658530114088618e-05, |
|
"loss": 0.1364, |
|
"step": 2482 |
|
}, |
|
{ |
|
"epoch": 7.160458452722063, |
|
"grad_norm": 0.7672788500785828, |
|
"learning_rate": 2.2433006102414432e-05, |
|
"loss": 0.072, |
|
"step": 2499 |
|
}, |
|
{ |
|
"epoch": 7.2091690544412605, |
|
"grad_norm": 8.377331733703613, |
|
"learning_rate": 2.220748209074025e-05, |
|
"loss": 0.2638, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 7.257879656160458, |
|
"grad_norm": 9.670488357543945, |
|
"learning_rate": 2.1981958079066066e-05, |
|
"loss": 0.2495, |
|
"step": 2533 |
|
}, |
|
{ |
|
"epoch": 7.306590257879656, |
|
"grad_norm": 0.24363534152507782, |
|
"learning_rate": 2.1756434067391883e-05, |
|
"loss": 0.2038, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 7.355300859598854, |
|
"grad_norm": 2.2357654571533203, |
|
"learning_rate": 2.15309100557177e-05, |
|
"loss": 0.2934, |
|
"step": 2567 |
|
}, |
|
{ |
|
"epoch": 7.404011461318052, |
|
"grad_norm": 0.20546384155750275, |
|
"learning_rate": 2.1305386044043514e-05, |
|
"loss": 0.1834, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 7.45272206303725, |
|
"grad_norm": 0.32598844170570374, |
|
"learning_rate": 2.107986203236933e-05, |
|
"loss": 0.0821, |
|
"step": 2601 |
|
}, |
|
{ |
|
"epoch": 7.501432664756447, |
|
"grad_norm": 8.553650856018066, |
|
"learning_rate": 2.0854338020695148e-05, |
|
"loss": 0.0992, |
|
"step": 2618 |
|
}, |
|
{ |
|
"epoch": 7.5501432664756445, |
|
"grad_norm": 0.3119734525680542, |
|
"learning_rate": 2.062881400902096e-05, |
|
"loss": 0.2344, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 7.598853868194842, |
|
"grad_norm": 6.1670002937316895, |
|
"learning_rate": 2.040328999734678e-05, |
|
"loss": 0.1058, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 7.64756446991404, |
|
"grad_norm": 2.705218553543091, |
|
"learning_rate": 2.0177765985672592e-05, |
|
"loss": 0.1608, |
|
"step": 2669 |
|
}, |
|
{ |
|
"epoch": 7.696275071633238, |
|
"grad_norm": 5.938003063201904, |
|
"learning_rate": 1.995224197399841e-05, |
|
"loss": 0.1554, |
|
"step": 2686 |
|
}, |
|
{ |
|
"epoch": 7.744985673352436, |
|
"grad_norm": 0.41698479652404785, |
|
"learning_rate": 1.9726717962324227e-05, |
|
"loss": 0.0979, |
|
"step": 2703 |
|
}, |
|
{ |
|
"epoch": 7.793696275071634, |
|
"grad_norm": 0.4503624141216278, |
|
"learning_rate": 1.950119395065004e-05, |
|
"loss": 0.1353, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 7.842406876790831, |
|
"grad_norm": 4.662674427032471, |
|
"learning_rate": 1.9275669938975857e-05, |
|
"loss": 0.1295, |
|
"step": 2737 |
|
}, |
|
{ |
|
"epoch": 7.891117478510028, |
|
"grad_norm": 5.666357517242432, |
|
"learning_rate": 1.905014592730167e-05, |
|
"loss": 0.2144, |
|
"step": 2754 |
|
}, |
|
{ |
|
"epoch": 7.939828080229226, |
|
"grad_norm": 0.6394052505493164, |
|
"learning_rate": 1.8824621915627488e-05, |
|
"loss": 0.178, |
|
"step": 2771 |
|
}, |
|
{ |
|
"epoch": 7.988538681948424, |
|
"grad_norm": 9.961098670959473, |
|
"learning_rate": 1.8599097903953305e-05, |
|
"loss": 0.2561, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9956958393113343, |
|
"eval_f1_macro": 0.9951431111442676, |
|
"eval_f1_micro": 0.9956958393113343, |
|
"eval_f1_weighted": 0.9956405011600654, |
|
"eval_loss": 0.023447172716259956, |
|
"eval_precision_macro": 0.995983935742972, |
|
"eval_precision_micro": 0.9956958393113343, |
|
"eval_precision_weighted": 0.9964929061055317, |
|
"eval_recall_macro": 0.9953528399311532, |
|
"eval_recall_micro": 0.9956958393113343, |
|
"eval_recall_weighted": 0.9956958393113343, |
|
"eval_runtime": 3.3596, |
|
"eval_samples_per_second": 207.466, |
|
"eval_steps_per_second": 13.097, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 8.037249283667622, |
|
"grad_norm": 10.93313217163086, |
|
"learning_rate": 1.837357389227912e-05, |
|
"loss": 0.11, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 8.085959885386819, |
|
"grad_norm": 0.05085707828402519, |
|
"learning_rate": 1.8148049880604936e-05, |
|
"loss": 0.0616, |
|
"step": 2822 |
|
}, |
|
{ |
|
"epoch": 8.134670487106018, |
|
"grad_norm": 10.42803955078125, |
|
"learning_rate": 1.792252586893075e-05, |
|
"loss": 0.0648, |
|
"step": 2839 |
|
}, |
|
{ |
|
"epoch": 8.183381088825215, |
|
"grad_norm": 1.379164457321167, |
|
"learning_rate": 1.7697001857256567e-05, |
|
"loss": 0.1706, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 8.232091690544413, |
|
"grad_norm": 8.783364295959473, |
|
"learning_rate": 1.7471477845582384e-05, |
|
"loss": 0.1954, |
|
"step": 2873 |
|
}, |
|
{ |
|
"epoch": 8.28080229226361, |
|
"grad_norm": 1.5522698163986206, |
|
"learning_rate": 1.7245953833908197e-05, |
|
"loss": 0.2134, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 8.329512893982809, |
|
"grad_norm": 6.784268379211426, |
|
"learning_rate": 1.7020429822234015e-05, |
|
"loss": 0.1928, |
|
"step": 2907 |
|
}, |
|
{ |
|
"epoch": 8.378223495702006, |
|
"grad_norm": 3.0361063480377197, |
|
"learning_rate": 1.679490581055983e-05, |
|
"loss": 0.3187, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 8.426934097421203, |
|
"grad_norm": 1.8513216972351074, |
|
"learning_rate": 1.6569381798885645e-05, |
|
"loss": 0.1319, |
|
"step": 2941 |
|
}, |
|
{ |
|
"epoch": 8.475644699140402, |
|
"grad_norm": 0.5567758083343506, |
|
"learning_rate": 1.6343857787211462e-05, |
|
"loss": 0.136, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 8.524355300859598, |
|
"grad_norm": 2.810915231704712, |
|
"learning_rate": 1.611833377553728e-05, |
|
"loss": 0.0858, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 8.573065902578797, |
|
"grad_norm": 1.9855493307113647, |
|
"learning_rate": 1.5892809763863097e-05, |
|
"loss": 0.1732, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 8.621776504297994, |
|
"grad_norm": 0.1735798567533493, |
|
"learning_rate": 1.566728575218891e-05, |
|
"loss": 0.0911, |
|
"step": 3009 |
|
}, |
|
{ |
|
"epoch": 8.670487106017191, |
|
"grad_norm": 0.19329993426799774, |
|
"learning_rate": 1.5441761740514727e-05, |
|
"loss": 0.1024, |
|
"step": 3026 |
|
}, |
|
{ |
|
"epoch": 8.71919770773639, |
|
"grad_norm": 4.43624210357666, |
|
"learning_rate": 1.5216237728840543e-05, |
|
"loss": 0.1408, |
|
"step": 3043 |
|
}, |
|
{ |
|
"epoch": 8.767908309455587, |
|
"grad_norm": 9.911310195922852, |
|
"learning_rate": 1.4990713717166358e-05, |
|
"loss": 0.1626, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 8.816618911174785, |
|
"grad_norm": 1.323052167892456, |
|
"learning_rate": 1.4765189705492175e-05, |
|
"loss": 0.1222, |
|
"step": 3077 |
|
}, |
|
{ |
|
"epoch": 8.865329512893982, |
|
"grad_norm": 11.561975479125977, |
|
"learning_rate": 1.4539665693817989e-05, |
|
"loss": 0.1769, |
|
"step": 3094 |
|
}, |
|
{ |
|
"epoch": 8.914040114613181, |
|
"grad_norm": 0.08104487508535385, |
|
"learning_rate": 1.4314141682143806e-05, |
|
"loss": 0.0953, |
|
"step": 3111 |
|
}, |
|
{ |
|
"epoch": 8.962750716332378, |
|
"grad_norm": 0.04927730932831764, |
|
"learning_rate": 1.408861767046962e-05, |
|
"loss": 0.0574, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9956958393113343, |
|
"eval_f1_macro": 0.9948504424939576, |
|
"eval_f1_micro": 0.9956958393113343, |
|
"eval_f1_weighted": 0.9956230754082893, |
|
"eval_loss": 0.017519734799861908, |
|
"eval_precision_macro": 0.995983935742972, |
|
"eval_precision_micro": 0.9956958393113343, |
|
"eval_precision_weighted": 0.9964929061055317, |
|
"eval_recall_macro": 0.9948364888123926, |
|
"eval_recall_micro": 0.9956958393113343, |
|
"eval_recall_weighted": 0.9956958393113343, |
|
"eval_runtime": 3.4072, |
|
"eval_samples_per_second": 204.565, |
|
"eval_steps_per_second": 12.914, |
|
"step": 3141 |
|
}, |
|
{ |
|
"epoch": 9.011461318051577, |
|
"grad_norm": 0.3880198001861572, |
|
"learning_rate": 1.3863093658795437e-05, |
|
"loss": 0.1954, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 9.060171919770774, |
|
"grad_norm": 0.05076654255390167, |
|
"learning_rate": 1.3637569647121254e-05, |
|
"loss": 0.0797, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 9.10888252148997, |
|
"grad_norm": 6.032546043395996, |
|
"learning_rate": 1.3412045635447068e-05, |
|
"loss": 0.2393, |
|
"step": 3179 |
|
}, |
|
{ |
|
"epoch": 9.15759312320917, |
|
"grad_norm": 11.056164741516113, |
|
"learning_rate": 1.3186521623772885e-05, |
|
"loss": 0.1001, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 9.206303724928366, |
|
"grad_norm": 0.19840994477272034, |
|
"learning_rate": 1.29609976120987e-05, |
|
"loss": 0.0743, |
|
"step": 3213 |
|
}, |
|
{ |
|
"epoch": 9.255014326647565, |
|
"grad_norm": 4.645060062408447, |
|
"learning_rate": 1.2735473600424515e-05, |
|
"loss": 0.1446, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 9.303724928366762, |
|
"grad_norm": 9.013117790222168, |
|
"learning_rate": 1.2509949588750332e-05, |
|
"loss": 0.1288, |
|
"step": 3247 |
|
}, |
|
{ |
|
"epoch": 9.35243553008596, |
|
"grad_norm": 1.7711181640625, |
|
"learning_rate": 1.228442557707615e-05, |
|
"loss": 0.0835, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 9.401146131805158, |
|
"grad_norm": 5.3366379737854, |
|
"learning_rate": 1.2058901565401965e-05, |
|
"loss": 0.1201, |
|
"step": 3281 |
|
}, |
|
{ |
|
"epoch": 9.449856733524355, |
|
"grad_norm": 3.4900286197662354, |
|
"learning_rate": 1.183337755372778e-05, |
|
"loss": 0.1026, |
|
"step": 3298 |
|
}, |
|
{ |
|
"epoch": 9.498567335243553, |
|
"grad_norm": 0.059569913893938065, |
|
"learning_rate": 1.1607853542053596e-05, |
|
"loss": 0.1123, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 9.54727793696275, |
|
"grad_norm": 8.251703262329102, |
|
"learning_rate": 1.1382329530379411e-05, |
|
"loss": 0.1441, |
|
"step": 3332 |
|
}, |
|
{ |
|
"epoch": 9.595988538681949, |
|
"grad_norm": 1.078260064125061, |
|
"learning_rate": 1.1156805518705226e-05, |
|
"loss": 0.1191, |
|
"step": 3349 |
|
}, |
|
{ |
|
"epoch": 9.644699140401146, |
|
"grad_norm": 7.364470958709717, |
|
"learning_rate": 1.0931281507031044e-05, |
|
"loss": 0.1539, |
|
"step": 3366 |
|
}, |
|
{ |
|
"epoch": 9.693409742120345, |
|
"grad_norm": 2.354499101638794, |
|
"learning_rate": 1.0705757495356859e-05, |
|
"loss": 0.1502, |
|
"step": 3383 |
|
}, |
|
{ |
|
"epoch": 9.742120343839542, |
|
"grad_norm": 10.193525314331055, |
|
"learning_rate": 1.0480233483682674e-05, |
|
"loss": 0.1199, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.790830945558739, |
|
"grad_norm": 2.536367893218994, |
|
"learning_rate": 1.025470947200849e-05, |
|
"loss": 0.2098, |
|
"step": 3417 |
|
}, |
|
{ |
|
"epoch": 9.839541547277937, |
|
"grad_norm": 5.09243631362915, |
|
"learning_rate": 1.0029185460334307e-05, |
|
"loss": 0.2896, |
|
"step": 3434 |
|
}, |
|
{ |
|
"epoch": 9.888252148997134, |
|
"grad_norm": 1.098929762840271, |
|
"learning_rate": 9.803661448660124e-06, |
|
"loss": 0.1017, |
|
"step": 3451 |
|
}, |
|
{ |
|
"epoch": 9.936962750716333, |
|
"grad_norm": 0.1535651981830597, |
|
"learning_rate": 9.57813743698594e-06, |
|
"loss": 0.1077, |
|
"step": 3468 |
|
}, |
|
{ |
|
"epoch": 9.98567335243553, |
|
"grad_norm": 0.5236210227012634, |
|
"learning_rate": 9.352613425311755e-06, |
|
"loss": 0.084, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9956958393113343, |
|
"eval_f1_macro": 0.9949831411206324, |
|
"eval_f1_micro": 0.9956958393113343, |
|
"eval_f1_weighted": 0.9955771743939521, |
|
"eval_loss": 0.015395666472613811, |
|
"eval_precision_macro": 0.9964707314104905, |
|
"eval_precision_micro": 0.9956958393113343, |
|
"eval_precision_weighted": 0.9963769691172847, |
|
"eval_recall_macro": 0.9945496270797476, |
|
"eval_recall_micro": 0.9956958393113343, |
|
"eval_recall_weighted": 0.9956958393113343, |
|
"eval_runtime": 3.4096, |
|
"eval_samples_per_second": 204.424, |
|
"eval_steps_per_second": 12.905, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 10.034383954154729, |
|
"grad_norm": 0.10064805299043655, |
|
"learning_rate": 9.12708941363757e-06, |
|
"loss": 0.129, |
|
"step": 3502 |
|
}, |
|
{ |
|
"epoch": 10.083094555873926, |
|
"grad_norm": 0.9957130551338196, |
|
"learning_rate": 8.901565401963385e-06, |
|
"loss": 0.0652, |
|
"step": 3519 |
|
}, |
|
{ |
|
"epoch": 10.131805157593123, |
|
"grad_norm": 4.763299465179443, |
|
"learning_rate": 8.676041390289202e-06, |
|
"loss": 0.1531, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 10.180515759312321, |
|
"grad_norm": 0.9550924897193909, |
|
"learning_rate": 8.450517378615018e-06, |
|
"loss": 0.0548, |
|
"step": 3553 |
|
}, |
|
{ |
|
"epoch": 10.229226361031518, |
|
"grad_norm": 0.5510568022727966, |
|
"learning_rate": 8.224993366940833e-06, |
|
"loss": 0.171, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 10.277936962750717, |
|
"grad_norm": 1.120082139968872, |
|
"learning_rate": 7.999469355266649e-06, |
|
"loss": 0.1506, |
|
"step": 3587 |
|
}, |
|
{ |
|
"epoch": 10.326647564469914, |
|
"grad_norm": 8.000428199768066, |
|
"learning_rate": 7.773945343592464e-06, |
|
"loss": 0.0791, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 10.375358166189113, |
|
"grad_norm": 0.08897445350885391, |
|
"learning_rate": 7.548421331918282e-06, |
|
"loss": 0.1006, |
|
"step": 3621 |
|
}, |
|
{ |
|
"epoch": 10.42406876790831, |
|
"grad_norm": 1.5076502561569214, |
|
"learning_rate": 7.322897320244097e-06, |
|
"loss": 0.1085, |
|
"step": 3638 |
|
}, |
|
{ |
|
"epoch": 10.472779369627506, |
|
"grad_norm": 0.3444303870201111, |
|
"learning_rate": 7.097373308569913e-06, |
|
"loss": 0.0881, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 10.521489971346705, |
|
"grad_norm": 5.353268146514893, |
|
"learning_rate": 6.871849296895728e-06, |
|
"loss": 0.1233, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 10.570200573065902, |
|
"grad_norm": 6.925529479980469, |
|
"learning_rate": 6.646325285221544e-06, |
|
"loss": 0.1726, |
|
"step": 3689 |
|
}, |
|
{ |
|
"epoch": 10.6189111747851, |
|
"grad_norm": 1.2398282289505005, |
|
"learning_rate": 6.42080127354736e-06, |
|
"loss": 0.1607, |
|
"step": 3706 |
|
}, |
|
{ |
|
"epoch": 10.667621776504298, |
|
"grad_norm": 0.17667262256145477, |
|
"learning_rate": 6.195277261873176e-06, |
|
"loss": 0.1065, |
|
"step": 3723 |
|
}, |
|
{ |
|
"epoch": 10.716332378223496, |
|
"grad_norm": 8.593550682067871, |
|
"learning_rate": 5.969753250198992e-06, |
|
"loss": 0.1598, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 10.765042979942693, |
|
"grad_norm": 6.634376049041748, |
|
"learning_rate": 5.744229238524808e-06, |
|
"loss": 0.214, |
|
"step": 3757 |
|
}, |
|
{ |
|
"epoch": 10.81375358166189, |
|
"grad_norm": 0.12865765392780304, |
|
"learning_rate": 5.518705226850624e-06, |
|
"loss": 0.1079, |
|
"step": 3774 |
|
}, |
|
{ |
|
"epoch": 10.862464183381089, |
|
"grad_norm": 0.4588039815425873, |
|
"learning_rate": 5.29318121517644e-06, |
|
"loss": 0.1218, |
|
"step": 3791 |
|
}, |
|
{ |
|
"epoch": 10.911174785100286, |
|
"grad_norm": 8.046585083007812, |
|
"learning_rate": 5.0676572035022555e-06, |
|
"loss": 0.1618, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 10.959885386819485, |
|
"grad_norm": 0.34073105454444885, |
|
"learning_rate": 4.842133191828072e-06, |
|
"loss": 0.0788, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 1.0, |
|
"eval_f1_macro": 1.0, |
|
"eval_f1_micro": 1.0, |
|
"eval_f1_weighted": 1.0, |
|
"eval_loss": 0.011031342670321465, |
|
"eval_precision_macro": 1.0, |
|
"eval_precision_micro": 1.0, |
|
"eval_precision_weighted": 1.0, |
|
"eval_recall_macro": 1.0, |
|
"eval_recall_micro": 1.0, |
|
"eval_recall_weighted": 1.0, |
|
"eval_runtime": 3.4121, |
|
"eval_samples_per_second": 204.275, |
|
"eval_steps_per_second": 12.895, |
|
"step": 3839 |
|
} |
|
], |
|
"logging_steps": 17, |
|
"max_steps": 4188, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3756905482586214e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|