|
{ |
|
"best_metric": 0.6890261173248291, |
|
"best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/Aina-large-2024_10_23-batch-size32_freeze_monolabel/checkpoint-337824", |
|
"epoch": 112.0, |
|
"eval_steps": 500, |
|
"global_step": 370944, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15096618357487923, |
|
"grad_norm": 4.092954635620117, |
|
"learning_rate": 0.001, |
|
"loss": 1.2862, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30193236714975846, |
|
"grad_norm": 2.8465919494628906, |
|
"learning_rate": 0.001, |
|
"loss": 1.0646, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4528985507246377, |
|
"grad_norm": 2.6722023487091064, |
|
"learning_rate": 0.001, |
|
"loss": 1.0197, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6038647342995169, |
|
"grad_norm": 2.542285442352295, |
|
"learning_rate": 0.001, |
|
"loss": 1.0012, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7548309178743962, |
|
"grad_norm": 3.1086227893829346, |
|
"learning_rate": 0.001, |
|
"loss": 0.9796, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9057971014492754, |
|
"grad_norm": 1.9604114294052124, |
|
"learning_rate": 0.001, |
|
"loss": 0.9658, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7178935447338618, |
|
"eval_f1_macro": 0.22165703415657395, |
|
"eval_f1_micro": 0.7178935447338618, |
|
"eval_loss": 0.8467838168144226, |
|
"eval_runtime": 218.9663, |
|
"eval_samples_per_second": 161.303, |
|
"eval_steps_per_second": 5.042, |
|
"learning_rate": 0.001, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 1.0567632850241546, |
|
"grad_norm": 2.1088340282440186, |
|
"learning_rate": 0.001, |
|
"loss": 0.9605, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2077294685990339, |
|
"grad_norm": 2.5187363624572754, |
|
"learning_rate": 0.001, |
|
"loss": 0.9718, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.358695652173913, |
|
"grad_norm": 1.7005295753479004, |
|
"learning_rate": 0.001, |
|
"loss": 0.934, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5096618357487923, |
|
"grad_norm": 1.8835418224334717, |
|
"learning_rate": 0.001, |
|
"loss": 0.9532, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.6606280193236715, |
|
"grad_norm": 1.95162832736969, |
|
"learning_rate": 0.001, |
|
"loss": 0.9426, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.8115942028985508, |
|
"grad_norm": 2.5510926246643066, |
|
"learning_rate": 0.001, |
|
"loss": 0.9391, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.96256038647343, |
|
"grad_norm": 1.474992275238037, |
|
"learning_rate": 0.001, |
|
"loss": 0.9257, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7247168742921857, |
|
"eval_f1_macro": 0.30432564891312835, |
|
"eval_f1_micro": 0.7247168742921857, |
|
"eval_loss": 0.8172192573547363, |
|
"eval_runtime": 219.7649, |
|
"eval_samples_per_second": 160.717, |
|
"eval_steps_per_second": 5.024, |
|
"learning_rate": 0.001, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 2.1135265700483092, |
|
"grad_norm": 1.5706532001495361, |
|
"learning_rate": 0.001, |
|
"loss": 0.9204, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.2644927536231885, |
|
"grad_norm": 1.8336281776428223, |
|
"learning_rate": 0.001, |
|
"loss": 0.9214, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.4154589371980677, |
|
"grad_norm": 1.5006639957427979, |
|
"learning_rate": 0.001, |
|
"loss": 0.9134, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.566425120772947, |
|
"grad_norm": 1.3287198543548584, |
|
"learning_rate": 0.001, |
|
"loss": 0.9272, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.717391304347826, |
|
"grad_norm": 1.6935093402862549, |
|
"learning_rate": 0.001, |
|
"loss": 0.9081, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.8683574879227054, |
|
"grad_norm": 1.6363154649734497, |
|
"learning_rate": 0.001, |
|
"loss": 0.9202, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.72599093997735, |
|
"eval_f1_macro": 0.30354947063534227, |
|
"eval_f1_micro": 0.72599093997735, |
|
"eval_loss": 0.8048254251480103, |
|
"eval_runtime": 213.9071, |
|
"eval_samples_per_second": 165.118, |
|
"eval_steps_per_second": 5.161, |
|
"learning_rate": 0.001, |
|
"step": 9936 |
|
}, |
|
{ |
|
"epoch": 3.0193236714975846, |
|
"grad_norm": 1.5169860124588013, |
|
"learning_rate": 0.001, |
|
"loss": 0.9103, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.170289855072464, |
|
"grad_norm": 1.4337282180786133, |
|
"learning_rate": 0.001, |
|
"loss": 0.9141, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.321256038647343, |
|
"grad_norm": 1.2123082876205444, |
|
"learning_rate": 0.001, |
|
"loss": 0.9108, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.4722222222222223, |
|
"grad_norm": 1.4377527236938477, |
|
"learning_rate": 0.001, |
|
"loss": 0.8972, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.6231884057971016, |
|
"grad_norm": 1.3103830814361572, |
|
"learning_rate": 0.001, |
|
"loss": 0.9089, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.774154589371981, |
|
"grad_norm": 1.3572064638137817, |
|
"learning_rate": 0.001, |
|
"loss": 0.9155, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.92512077294686, |
|
"grad_norm": 1.3236734867095947, |
|
"learning_rate": 0.001, |
|
"loss": 0.8905, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.728510758776897, |
|
"eval_f1_macro": 0.3108900654252266, |
|
"eval_f1_micro": 0.728510758776897, |
|
"eval_loss": 0.7946847677230835, |
|
"eval_runtime": 217.8922, |
|
"eval_samples_per_second": 162.099, |
|
"eval_steps_per_second": 5.067, |
|
"learning_rate": 0.001, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 4.076086956521739, |
|
"grad_norm": 1.0904438495635986, |
|
"learning_rate": 0.001, |
|
"loss": 0.9027, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.2270531400966185, |
|
"grad_norm": 1.2598488330841064, |
|
"learning_rate": 0.001, |
|
"loss": 0.8869, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.378019323671498, |
|
"grad_norm": 1.28193998336792, |
|
"learning_rate": 0.001, |
|
"loss": 0.9013, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.528985507246377, |
|
"grad_norm": 1.0907433032989502, |
|
"learning_rate": 0.001, |
|
"loss": 0.9015, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.679951690821256, |
|
"grad_norm": 1.0357964038848877, |
|
"learning_rate": 0.001, |
|
"loss": 0.8984, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.830917874396135, |
|
"grad_norm": 1.0421031713485718, |
|
"learning_rate": 0.001, |
|
"loss": 0.8975, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.981884057971015, |
|
"grad_norm": 1.3573296070098877, |
|
"learning_rate": 0.001, |
|
"loss": 0.907, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7309173272933183, |
|
"eval_f1_macro": 0.30462992193687743, |
|
"eval_f1_micro": 0.7309173272933183, |
|
"eval_loss": 0.7821700572967529, |
|
"eval_runtime": 220.5011, |
|
"eval_samples_per_second": 160.181, |
|
"eval_steps_per_second": 5.007, |
|
"learning_rate": 0.001, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 5.132850241545894, |
|
"grad_norm": 1.1736644506454468, |
|
"learning_rate": 0.001, |
|
"loss": 0.8845, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.283816425120773, |
|
"grad_norm": 1.2128374576568604, |
|
"learning_rate": 0.001, |
|
"loss": 0.892, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.434782608695652, |
|
"grad_norm": 1.199894666671753, |
|
"learning_rate": 0.001, |
|
"loss": 0.8843, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.585748792270532, |
|
"grad_norm": 1.0396721363067627, |
|
"learning_rate": 0.001, |
|
"loss": 0.8865, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.736714975845411, |
|
"grad_norm": 1.4927618503570557, |
|
"learning_rate": 0.001, |
|
"loss": 0.9021, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.88768115942029, |
|
"grad_norm": 1.0294079780578613, |
|
"learning_rate": 0.001, |
|
"loss": 0.8925, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7345413363533408, |
|
"eval_f1_macro": 0.31594313464626494, |
|
"eval_f1_micro": 0.7345413363533408, |
|
"eval_loss": 0.7838464379310608, |
|
"eval_runtime": 219.6797, |
|
"eval_samples_per_second": 160.78, |
|
"eval_steps_per_second": 5.025, |
|
"learning_rate": 0.001, |
|
"step": 19872 |
|
}, |
|
{ |
|
"epoch": 6.038647342995169, |
|
"grad_norm": 1.1218583583831787, |
|
"learning_rate": 0.001, |
|
"loss": 0.9113, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.1896135265700485, |
|
"grad_norm": 1.1388846635818481, |
|
"learning_rate": 0.001, |
|
"loss": 0.8748, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.340579710144928, |
|
"grad_norm": 1.2864261865615845, |
|
"learning_rate": 0.001, |
|
"loss": 0.8787, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.491545893719807, |
|
"grad_norm": 1.1587731838226318, |
|
"learning_rate": 0.001, |
|
"loss": 0.8889, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.642512077294686, |
|
"grad_norm": 0.9929710626602173, |
|
"learning_rate": 0.001, |
|
"loss": 0.8816, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.793478260869565, |
|
"grad_norm": 1.097940444946289, |
|
"learning_rate": 0.001, |
|
"loss": 0.8932, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 0.9754383563995361, |
|
"learning_rate": 0.001, |
|
"loss": 0.8922, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7357304643261608, |
|
"eval_f1_macro": 0.3244028121603974, |
|
"eval_f1_micro": 0.7357304643261608, |
|
"eval_loss": 0.793089747428894, |
|
"eval_runtime": 224.722, |
|
"eval_samples_per_second": 157.172, |
|
"eval_steps_per_second": 4.913, |
|
"learning_rate": 0.001, |
|
"step": 23184 |
|
}, |
|
{ |
|
"epoch": 7.095410628019324, |
|
"grad_norm": 1.002031922340393, |
|
"learning_rate": 0.001, |
|
"loss": 0.8795, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.246376811594203, |
|
"grad_norm": 1.0516221523284912, |
|
"learning_rate": 0.001, |
|
"loss": 0.8941, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.397342995169082, |
|
"grad_norm": 1.164881944656372, |
|
"learning_rate": 0.001, |
|
"loss": 0.8719, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.548309178743962, |
|
"grad_norm": 0.8948553204536438, |
|
"learning_rate": 0.001, |
|
"loss": 0.8732, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.699275362318841, |
|
"grad_norm": 1.2892440557479858, |
|
"learning_rate": 0.001, |
|
"loss": 0.9082, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.85024154589372, |
|
"grad_norm": 0.9325675368309021, |
|
"learning_rate": 0.001, |
|
"loss": 0.883, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7354190260475651, |
|
"eval_f1_macro": 0.3240719942212276, |
|
"eval_f1_micro": 0.7354190260475651, |
|
"eval_loss": 0.7687743306159973, |
|
"eval_runtime": 208.4146, |
|
"eval_samples_per_second": 169.47, |
|
"eval_steps_per_second": 5.297, |
|
"learning_rate": 0.001, |
|
"step": 26496 |
|
}, |
|
{ |
|
"epoch": 8.001207729468598, |
|
"grad_norm": 0.9580215811729431, |
|
"learning_rate": 0.001, |
|
"loss": 0.8839, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.152173913043478, |
|
"grad_norm": 0.9364214539527893, |
|
"learning_rate": 0.001, |
|
"loss": 0.8819, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.303140096618357, |
|
"grad_norm": 0.913118302822113, |
|
"learning_rate": 0.001, |
|
"loss": 0.8703, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.454106280193237, |
|
"grad_norm": 1.1664602756500244, |
|
"learning_rate": 0.001, |
|
"loss": 0.89, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.605072463768115, |
|
"grad_norm": 1.5964057445526123, |
|
"learning_rate": 0.001, |
|
"loss": 0.8855, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.756038647342995, |
|
"grad_norm": 0.9933860898017883, |
|
"learning_rate": 0.001, |
|
"loss": 0.8887, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.907004830917874, |
|
"grad_norm": 1.3583431243896484, |
|
"learning_rate": 0.001, |
|
"loss": 0.8697, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7376557191392978, |
|
"eval_f1_macro": 0.3242274323363683, |
|
"eval_f1_micro": 0.7376557191392978, |
|
"eval_loss": 0.7634698152542114, |
|
"eval_runtime": 206.1807, |
|
"eval_samples_per_second": 171.306, |
|
"eval_steps_per_second": 5.355, |
|
"learning_rate": 0.001, |
|
"step": 29808 |
|
}, |
|
{ |
|
"epoch": 9.057971014492754, |
|
"grad_norm": 1.0802897214889526, |
|
"learning_rate": 0.001, |
|
"loss": 0.8723, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.208937198067632, |
|
"grad_norm": 1.0164059400558472, |
|
"learning_rate": 0.001, |
|
"loss": 0.8713, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.359903381642512, |
|
"grad_norm": 1.0108799934387207, |
|
"learning_rate": 0.001, |
|
"loss": 0.8706, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.51086956521739, |
|
"grad_norm": 1.2227768898010254, |
|
"learning_rate": 0.001, |
|
"loss": 0.8592, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.66183574879227, |
|
"grad_norm": 0.9643123745918274, |
|
"learning_rate": 0.001, |
|
"loss": 0.8828, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 9.81280193236715, |
|
"grad_norm": 1.145917534828186, |
|
"learning_rate": 0.001, |
|
"loss": 0.8971, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 9.96376811594203, |
|
"grad_norm": 1.157076358795166, |
|
"learning_rate": 0.001, |
|
"loss": 0.8782, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7373159682899207, |
|
"eval_f1_macro": 0.3327024872296584, |
|
"eval_f1_micro": 0.7373159682899207, |
|
"eval_loss": 0.7689030170440674, |
|
"eval_runtime": 212.3199, |
|
"eval_samples_per_second": 166.353, |
|
"eval_steps_per_second": 5.2, |
|
"learning_rate": 0.001, |
|
"step": 33120 |
|
}, |
|
{ |
|
"epoch": 10.114734299516908, |
|
"grad_norm": 1.1405360698699951, |
|
"learning_rate": 0.001, |
|
"loss": 0.8815, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 10.265700483091788, |
|
"grad_norm": 0.999214231967926, |
|
"learning_rate": 0.001, |
|
"loss": 0.8882, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 10.416666666666666, |
|
"grad_norm": 0.8223280906677246, |
|
"learning_rate": 0.001, |
|
"loss": 0.8733, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 10.567632850241546, |
|
"grad_norm": 1.0058332681655884, |
|
"learning_rate": 0.001, |
|
"loss": 0.8835, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 10.718599033816425, |
|
"grad_norm": 0.9850085377693176, |
|
"learning_rate": 0.001, |
|
"loss": 0.8767, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 10.869565217391305, |
|
"grad_norm": 0.9125489592552185, |
|
"learning_rate": 0.001, |
|
"loss": 0.8869, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7349943374858438, |
|
"eval_f1_macro": 0.3336563616631842, |
|
"eval_f1_micro": 0.7349943374858438, |
|
"eval_loss": 0.7676350474357605, |
|
"eval_runtime": 208.2863, |
|
"eval_samples_per_second": 169.574, |
|
"eval_steps_per_second": 5.3, |
|
"learning_rate": 0.001, |
|
"step": 36432 |
|
}, |
|
{ |
|
"epoch": 11.020531400966183, |
|
"grad_norm": 1.0183864831924438, |
|
"learning_rate": 0.001, |
|
"loss": 0.8785, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 11.171497584541063, |
|
"grad_norm": 1.4868329763412476, |
|
"learning_rate": 0.001, |
|
"loss": 0.8876, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 11.322463768115941, |
|
"grad_norm": 1.1966030597686768, |
|
"learning_rate": 0.001, |
|
"loss": 0.8655, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 11.473429951690822, |
|
"grad_norm": 1.3686827421188354, |
|
"learning_rate": 0.001, |
|
"loss": 0.8802, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 11.6243961352657, |
|
"grad_norm": 1.0419063568115234, |
|
"learning_rate": 0.001, |
|
"loss": 0.8785, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 11.77536231884058, |
|
"grad_norm": 1.4371249675750732, |
|
"learning_rate": 0.001, |
|
"loss": 0.8896, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 11.926328502415458, |
|
"grad_norm": 1.1206474304199219, |
|
"learning_rate": 0.001, |
|
"loss": 0.8791, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7368912797281993, |
|
"eval_f1_macro": 0.34087135736791074, |
|
"eval_f1_micro": 0.7368912797281993, |
|
"eval_loss": 0.7639870643615723, |
|
"eval_runtime": 207.0154, |
|
"eval_samples_per_second": 170.615, |
|
"eval_steps_per_second": 5.333, |
|
"learning_rate": 0.001, |
|
"step": 39744 |
|
}, |
|
{ |
|
"epoch": 12.077294685990339, |
|
"grad_norm": 0.9196418523788452, |
|
"learning_rate": 0.001, |
|
"loss": 0.8727, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 12.228260869565217, |
|
"grad_norm": 1.03386390209198, |
|
"learning_rate": 0.001, |
|
"loss": 0.8807, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 12.379227053140097, |
|
"grad_norm": 1.0366028547286987, |
|
"learning_rate": 0.001, |
|
"loss": 0.8811, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 12.530193236714975, |
|
"grad_norm": 1.0820589065551758, |
|
"learning_rate": 0.001, |
|
"loss": 0.8801, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 12.681159420289855, |
|
"grad_norm": 0.8397698402404785, |
|
"learning_rate": 0.001, |
|
"loss": 0.8804, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 12.832125603864734, |
|
"grad_norm": 1.00918447971344, |
|
"learning_rate": 0.001, |
|
"loss": 0.8624, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 12.983091787439614, |
|
"grad_norm": 1.0675106048583984, |
|
"learning_rate": 0.001, |
|
"loss": 0.9017, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.733691959229898, |
|
"eval_f1_macro": 0.34004712807527443, |
|
"eval_f1_micro": 0.733691959229898, |
|
"eval_loss": 0.7673575282096863, |
|
"eval_runtime": 207.246, |
|
"eval_samples_per_second": 170.426, |
|
"eval_steps_per_second": 5.327, |
|
"learning_rate": 0.001, |
|
"step": 43056 |
|
}, |
|
{ |
|
"epoch": 13.134057971014492, |
|
"grad_norm": 0.9813660383224487, |
|
"learning_rate": 0.001, |
|
"loss": 0.8767, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 13.285024154589372, |
|
"grad_norm": 0.9276372790336609, |
|
"learning_rate": 0.001, |
|
"loss": 0.8798, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 13.43599033816425, |
|
"grad_norm": 0.9873163104057312, |
|
"learning_rate": 0.001, |
|
"loss": 0.8597, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 13.58695652173913, |
|
"grad_norm": 1.1890395879745483, |
|
"learning_rate": 0.001, |
|
"loss": 0.8879, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 13.73792270531401, |
|
"grad_norm": 0.979046106338501, |
|
"learning_rate": 0.001, |
|
"loss": 0.8794, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 13.88888888888889, |
|
"grad_norm": 1.0946838855743408, |
|
"learning_rate": 0.001, |
|
"loss": 0.8753, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7381087202718006, |
|
"eval_f1_macro": 0.3270675446568499, |
|
"eval_f1_micro": 0.7381087202718006, |
|
"eval_loss": 0.7585554122924805, |
|
"eval_runtime": 206.2625, |
|
"eval_samples_per_second": 171.238, |
|
"eval_steps_per_second": 5.352, |
|
"learning_rate": 0.001, |
|
"step": 46368 |
|
}, |
|
{ |
|
"epoch": 14.039855072463768, |
|
"grad_norm": 1.1594228744506836, |
|
"learning_rate": 0.001, |
|
"loss": 0.8827, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 14.190821256038648, |
|
"grad_norm": 1.0241971015930176, |
|
"learning_rate": 0.001, |
|
"loss": 0.8892, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 14.341787439613526, |
|
"grad_norm": 0.8705746531486511, |
|
"learning_rate": 0.001, |
|
"loss": 0.8792, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 14.492753623188406, |
|
"grad_norm": 1.0380282402038574, |
|
"learning_rate": 0.001, |
|
"loss": 0.8738, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 14.643719806763285, |
|
"grad_norm": 1.0658243894577026, |
|
"learning_rate": 0.001, |
|
"loss": 0.8698, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 14.794685990338165, |
|
"grad_norm": 1.2038367986679077, |
|
"learning_rate": 0.001, |
|
"loss": 0.8671, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 14.945652173913043, |
|
"grad_norm": 1.0316836833953857, |
|
"learning_rate": 0.001, |
|
"loss": 0.872, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7372593431483578, |
|
"eval_f1_macro": 0.322932331052492, |
|
"eval_f1_micro": 0.7372593431483578, |
|
"eval_loss": 0.7657922506332397, |
|
"eval_runtime": 217.354, |
|
"eval_samples_per_second": 162.5, |
|
"eval_steps_per_second": 5.079, |
|
"learning_rate": 0.001, |
|
"step": 49680 |
|
}, |
|
{ |
|
"epoch": 15.096618357487923, |
|
"grad_norm": 1.3473697900772095, |
|
"learning_rate": 0.001, |
|
"loss": 0.8779, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 15.247584541062801, |
|
"grad_norm": 0.8602265119552612, |
|
"learning_rate": 0.001, |
|
"loss": 0.8731, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 15.398550724637682, |
|
"grad_norm": 1.1624842882156372, |
|
"learning_rate": 0.001, |
|
"loss": 0.8687, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 15.54951690821256, |
|
"grad_norm": 1.0083673000335693, |
|
"learning_rate": 0.001, |
|
"loss": 0.8908, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 15.70048309178744, |
|
"grad_norm": 1.0320228338241577, |
|
"learning_rate": 0.001, |
|
"loss": 0.9016, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 15.851449275362318, |
|
"grad_norm": 1.144671082496643, |
|
"learning_rate": 0.001, |
|
"loss": 0.8672, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7388731596828992, |
|
"eval_f1_macro": 0.3353494267357002, |
|
"eval_f1_micro": 0.7388731596828992, |
|
"eval_loss": 0.8086147904396057, |
|
"eval_runtime": 213.7192, |
|
"eval_samples_per_second": 165.264, |
|
"eval_steps_per_second": 5.166, |
|
"learning_rate": 0.001, |
|
"step": 52992 |
|
}, |
|
{ |
|
"epoch": 16.002415458937197, |
|
"grad_norm": 0.9885081052780151, |
|
"learning_rate": 0.001, |
|
"loss": 0.8832, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 16.153381642512077, |
|
"grad_norm": 1.0105615854263306, |
|
"learning_rate": 0.001, |
|
"loss": 0.869, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 16.304347826086957, |
|
"grad_norm": 1.2131775617599487, |
|
"learning_rate": 0.001, |
|
"loss": 0.8668, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 16.455314009661837, |
|
"grad_norm": 0.7681830525398254, |
|
"learning_rate": 0.001, |
|
"loss": 0.8867, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 16.606280193236714, |
|
"grad_norm": 1.025246500968933, |
|
"learning_rate": 0.001, |
|
"loss": 0.8827, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 16.757246376811594, |
|
"grad_norm": 1.2803984880447388, |
|
"learning_rate": 0.001, |
|
"loss": 0.8701, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 16.908212560386474, |
|
"grad_norm": 0.9495580792427063, |
|
"learning_rate": 0.001, |
|
"loss": 0.8678, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7390147225368063, |
|
"eval_f1_macro": 0.3359269139745874, |
|
"eval_f1_micro": 0.7390147225368063, |
|
"eval_loss": 0.7629187107086182, |
|
"eval_runtime": 208.4315, |
|
"eval_samples_per_second": 169.456, |
|
"eval_steps_per_second": 5.297, |
|
"learning_rate": 0.001, |
|
"step": 56304 |
|
}, |
|
{ |
|
"epoch": 17.059178743961354, |
|
"grad_norm": 1.0740729570388794, |
|
"learning_rate": 0.001, |
|
"loss": 0.8708, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 17.21014492753623, |
|
"grad_norm": 1.2202423810958862, |
|
"learning_rate": 0.001, |
|
"loss": 0.8549, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 17.36111111111111, |
|
"grad_norm": 1.469183325767517, |
|
"learning_rate": 0.001, |
|
"loss": 0.8802, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 17.51207729468599, |
|
"grad_norm": 1.090923547744751, |
|
"learning_rate": 0.001, |
|
"loss": 0.8716, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 17.66304347826087, |
|
"grad_norm": 1.1791819334030151, |
|
"learning_rate": 0.001, |
|
"loss": 0.8773, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 17.814009661835748, |
|
"grad_norm": 1.0563361644744873, |
|
"learning_rate": 0.001, |
|
"loss": 0.8847, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 17.964975845410628, |
|
"grad_norm": 0.9826712608337402, |
|
"learning_rate": 0.001, |
|
"loss": 0.8875, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7364949037372593, |
|
"eval_f1_macro": 0.33531357209441837, |
|
"eval_f1_micro": 0.7364949037372593, |
|
"eval_loss": 0.7615325450897217, |
|
"eval_runtime": 209.5656, |
|
"eval_samples_per_second": 168.539, |
|
"eval_steps_per_second": 5.268, |
|
"learning_rate": 0.001, |
|
"step": 59616 |
|
}, |
|
{ |
|
"epoch": 18.115942028985508, |
|
"grad_norm": 1.3085087537765503, |
|
"learning_rate": 0.001, |
|
"loss": 0.8767, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 18.266908212560388, |
|
"grad_norm": 1.6434136629104614, |
|
"learning_rate": 0.001, |
|
"loss": 0.8759, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 18.417874396135264, |
|
"grad_norm": 0.933411717414856, |
|
"learning_rate": 0.001, |
|
"loss": 0.8743, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 18.568840579710145, |
|
"grad_norm": 0.8527688384056091, |
|
"learning_rate": 0.001, |
|
"loss": 0.8902, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 18.719806763285025, |
|
"grad_norm": 0.930264949798584, |
|
"learning_rate": 0.001, |
|
"loss": 0.8831, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 18.870772946859905, |
|
"grad_norm": 0.9774535894393921, |
|
"learning_rate": 0.001, |
|
"loss": 0.8645, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.738731596828992, |
|
"eval_f1_macro": 0.34497718468246574, |
|
"eval_f1_micro": 0.738731596828992, |
|
"eval_loss": 0.7682134509086609, |
|
"eval_runtime": 209.5606, |
|
"eval_samples_per_second": 168.543, |
|
"eval_steps_per_second": 5.268, |
|
"learning_rate": 0.001, |
|
"step": 62928 |
|
}, |
|
{ |
|
"epoch": 19.02173913043478, |
|
"grad_norm": 0.9957185983657837, |
|
"learning_rate": 0.001, |
|
"loss": 0.8768, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 19.17270531400966, |
|
"grad_norm": 1.1577240228652954, |
|
"learning_rate": 0.001, |
|
"loss": 0.8735, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 19.32367149758454, |
|
"grad_norm": 1.202587604522705, |
|
"learning_rate": 0.001, |
|
"loss": 0.8843, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 19.47463768115942, |
|
"grad_norm": 0.9294016361236572, |
|
"learning_rate": 0.001, |
|
"loss": 0.8752, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 19.6256038647343, |
|
"grad_norm": 0.9645271897315979, |
|
"learning_rate": 0.001, |
|
"loss": 0.8777, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 19.77657004830918, |
|
"grad_norm": 0.9506662487983704, |
|
"learning_rate": 0.001, |
|
"loss": 0.8812, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 19.92753623188406, |
|
"grad_norm": 1.0420480966567993, |
|
"learning_rate": 0.001, |
|
"loss": 0.881, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7406285390713476, |
|
"eval_f1_macro": 0.3410941881456483, |
|
"eval_f1_micro": 0.7406285390713476, |
|
"eval_loss": 0.7559322118759155, |
|
"eval_runtime": 216.2958, |
|
"eval_samples_per_second": 163.295, |
|
"eval_steps_per_second": 5.104, |
|
"learning_rate": 0.001, |
|
"step": 66240 |
|
}, |
|
{ |
|
"epoch": 20.07850241545894, |
|
"grad_norm": 1.0472239255905151, |
|
"learning_rate": 0.001, |
|
"loss": 0.8593, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 20.229468599033815, |
|
"grad_norm": 1.329965353012085, |
|
"learning_rate": 0.001, |
|
"loss": 0.8724, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 20.380434782608695, |
|
"grad_norm": 0.8913328051567078, |
|
"learning_rate": 0.001, |
|
"loss": 0.8763, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 20.531400966183575, |
|
"grad_norm": 1.1009160280227661, |
|
"learning_rate": 0.001, |
|
"loss": 0.8682, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 20.682367149758456, |
|
"grad_norm": 0.8890852332115173, |
|
"learning_rate": 0.001, |
|
"loss": 0.8891, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 20.833333333333332, |
|
"grad_norm": 0.9112018346786499, |
|
"learning_rate": 0.001, |
|
"loss": 0.8833, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 20.984299516908212, |
|
"grad_norm": 0.9217807054519653, |
|
"learning_rate": 0.001, |
|
"loss": 0.8927, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7349093997734994, |
|
"eval_f1_macro": 0.3408196596431974, |
|
"eval_f1_micro": 0.7349093997734994, |
|
"eval_loss": 0.7755085229873657, |
|
"eval_runtime": 206.6211, |
|
"eval_samples_per_second": 170.941, |
|
"eval_steps_per_second": 5.343, |
|
"learning_rate": 0.001, |
|
"step": 69552 |
|
}, |
|
{ |
|
"epoch": 21.135265700483092, |
|
"grad_norm": 0.9805058836936951, |
|
"learning_rate": 0.001, |
|
"loss": 0.8726, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 21.286231884057973, |
|
"grad_norm": 1.1102917194366455, |
|
"learning_rate": 0.001, |
|
"loss": 0.8795, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 21.43719806763285, |
|
"grad_norm": 0.9818475842475891, |
|
"learning_rate": 0.001, |
|
"loss": 0.8745, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 21.58816425120773, |
|
"grad_norm": 1.11347234249115, |
|
"learning_rate": 0.001, |
|
"loss": 0.8885, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 21.73913043478261, |
|
"grad_norm": 1.0838714838027954, |
|
"learning_rate": 0.001, |
|
"loss": 0.8691, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 21.89009661835749, |
|
"grad_norm": 1.058936595916748, |
|
"learning_rate": 0.001, |
|
"loss": 0.8704, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7343714609286524, |
|
"eval_f1_macro": 0.3233148815749822, |
|
"eval_f1_micro": 0.7343714609286524, |
|
"eval_loss": 0.7673645615577698, |
|
"eval_runtime": 206.0861, |
|
"eval_samples_per_second": 171.385, |
|
"eval_steps_per_second": 5.357, |
|
"learning_rate": 0.001, |
|
"step": 72864 |
|
}, |
|
{ |
|
"epoch": 22.041062801932366, |
|
"grad_norm": 1.0462204217910767, |
|
"learning_rate": 0.001, |
|
"loss": 0.8799, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 22.192028985507246, |
|
"grad_norm": 1.295551061630249, |
|
"learning_rate": 0.001, |
|
"loss": 0.8779, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 22.342995169082126, |
|
"grad_norm": 2.222907543182373, |
|
"learning_rate": 0.001, |
|
"loss": 0.8746, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 22.493961352657006, |
|
"grad_norm": 1.1222875118255615, |
|
"learning_rate": 0.001, |
|
"loss": 0.8815, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 22.644927536231883, |
|
"grad_norm": 0.9735862612724304, |
|
"learning_rate": 0.001, |
|
"loss": 0.871, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 22.795893719806763, |
|
"grad_norm": 0.9529093503952026, |
|
"learning_rate": 0.001, |
|
"loss": 0.8804, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 22.946859903381643, |
|
"grad_norm": 1.0049786567687988, |
|
"learning_rate": 0.001, |
|
"loss": 0.8711, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7340317100792751, |
|
"eval_f1_macro": 0.3139130897718377, |
|
"eval_f1_micro": 0.7340317100792751, |
|
"eval_loss": 0.769507884979248, |
|
"eval_runtime": 217.9621, |
|
"eval_samples_per_second": 162.047, |
|
"eval_steps_per_second": 5.065, |
|
"learning_rate": 0.001, |
|
"step": 76176 |
|
}, |
|
{ |
|
"epoch": 23.097826086956523, |
|
"grad_norm": 1.3799837827682495, |
|
"learning_rate": 0.001, |
|
"loss": 0.8864, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 23.2487922705314, |
|
"grad_norm": 0.9700829982757568, |
|
"learning_rate": 0.001, |
|
"loss": 0.8696, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 23.39975845410628, |
|
"grad_norm": 0.9899695515632629, |
|
"learning_rate": 0.001, |
|
"loss": 0.8639, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 23.55072463768116, |
|
"grad_norm": 1.1878501176834106, |
|
"learning_rate": 0.001, |
|
"loss": 0.881, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 23.70169082125604, |
|
"grad_norm": 1.3074716329574585, |
|
"learning_rate": 0.001, |
|
"loss": 0.8722, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 23.852657004830917, |
|
"grad_norm": 0.7670137882232666, |
|
"learning_rate": 0.001, |
|
"loss": 0.8722, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7400056625141563, |
|
"eval_f1_macro": 0.3337837042107275, |
|
"eval_f1_micro": 0.7400056625141563, |
|
"eval_loss": 0.7538156509399414, |
|
"eval_runtime": 207.7026, |
|
"eval_samples_per_second": 170.051, |
|
"eval_steps_per_second": 5.315, |
|
"learning_rate": 0.001, |
|
"step": 79488 |
|
}, |
|
{ |
|
"epoch": 24.003623188405797, |
|
"grad_norm": 1.0581316947937012, |
|
"learning_rate": 0.001, |
|
"loss": 0.889, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 24.154589371980677, |
|
"grad_norm": 0.8070245385169983, |
|
"learning_rate": 0.001, |
|
"loss": 0.8713, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 24.305555555555557, |
|
"grad_norm": 1.128304123878479, |
|
"learning_rate": 0.001, |
|
"loss": 0.8816, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 24.456521739130434, |
|
"grad_norm": 1.1110137701034546, |
|
"learning_rate": 0.001, |
|
"loss": 0.8753, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 24.607487922705314, |
|
"grad_norm": 0.9744004011154175, |
|
"learning_rate": 0.001, |
|
"loss": 0.8682, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 24.758454106280194, |
|
"grad_norm": 0.911435067653656, |
|
"learning_rate": 0.001, |
|
"loss": 0.8618, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 24.909420289855074, |
|
"grad_norm": 0.8553086519241333, |
|
"learning_rate": 0.001, |
|
"loss": 0.884, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7351925254813138, |
|
"eval_f1_macro": 0.3480083355542471, |
|
"eval_f1_micro": 0.7351925254813138, |
|
"eval_loss": 0.7643014788627625, |
|
"eval_runtime": 213.2646, |
|
"eval_samples_per_second": 165.616, |
|
"eval_steps_per_second": 5.177, |
|
"learning_rate": 0.001, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 25.06038647342995, |
|
"grad_norm": 0.8580687642097473, |
|
"learning_rate": 0.001, |
|
"loss": 0.8737, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 25.21135265700483, |
|
"grad_norm": 0.944041907787323, |
|
"learning_rate": 0.001, |
|
"loss": 0.8675, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 25.36231884057971, |
|
"grad_norm": 1.206111192703247, |
|
"learning_rate": 0.001, |
|
"loss": 0.8836, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 25.51328502415459, |
|
"grad_norm": 1.0812050104141235, |
|
"learning_rate": 0.001, |
|
"loss": 0.8893, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 25.664251207729468, |
|
"grad_norm": 1.0790187120437622, |
|
"learning_rate": 0.001, |
|
"loss": 0.8768, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 25.815217391304348, |
|
"grad_norm": 1.1290830373764038, |
|
"learning_rate": 0.001, |
|
"loss": 0.8819, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 25.966183574879228, |
|
"grad_norm": 1.0427964925765991, |
|
"learning_rate": 0.001, |
|
"loss": 0.8661, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7387882219705549, |
|
"eval_f1_macro": 0.32721566370968785, |
|
"eval_f1_micro": 0.7387882219705549, |
|
"eval_loss": 0.7568147778511047, |
|
"eval_runtime": 219.6821, |
|
"eval_samples_per_second": 160.778, |
|
"eval_steps_per_second": 5.025, |
|
"learning_rate": 0.001, |
|
"step": 86112 |
|
}, |
|
{ |
|
"epoch": 26.117149758454108, |
|
"grad_norm": 1.265699863433838, |
|
"learning_rate": 0.001, |
|
"loss": 0.868, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 26.268115942028984, |
|
"grad_norm": 0.9958316087722778, |
|
"learning_rate": 0.001, |
|
"loss": 0.8664, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 26.419082125603865, |
|
"grad_norm": 0.8388053774833679, |
|
"learning_rate": 0.001, |
|
"loss": 0.8857, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 26.570048309178745, |
|
"grad_norm": 1.056181788444519, |
|
"learning_rate": 0.001, |
|
"loss": 0.8789, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 26.721014492753625, |
|
"grad_norm": 0.85558021068573, |
|
"learning_rate": 0.001, |
|
"loss": 0.8786, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 26.8719806763285, |
|
"grad_norm": 1.0893311500549316, |
|
"learning_rate": 0.001, |
|
"loss": 0.8847, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7371177802944507, |
|
"eval_f1_macro": 0.34266214596872374, |
|
"eval_f1_micro": 0.7371177802944507, |
|
"eval_loss": 0.7665159106254578, |
|
"eval_runtime": 210.1589, |
|
"eval_samples_per_second": 168.063, |
|
"eval_steps_per_second": 5.253, |
|
"learning_rate": 0.001, |
|
"step": 89424 |
|
}, |
|
{ |
|
"epoch": 27.02294685990338, |
|
"grad_norm": 1.2845637798309326, |
|
"learning_rate": 0.001, |
|
"loss": 0.8903, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 27.17391304347826, |
|
"grad_norm": 0.9213324785232544, |
|
"learning_rate": 0.001, |
|
"loss": 0.8858, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 27.32487922705314, |
|
"grad_norm": 1.1463359594345093, |
|
"learning_rate": 0.001, |
|
"loss": 0.8753, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 27.47584541062802, |
|
"grad_norm": 1.2757492065429688, |
|
"learning_rate": 0.001, |
|
"loss": 0.879, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 27.6268115942029, |
|
"grad_norm": 1.0697648525238037, |
|
"learning_rate": 0.001, |
|
"loss": 0.8731, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 27.77777777777778, |
|
"grad_norm": 0.9725639224052429, |
|
"learning_rate": 0.001, |
|
"loss": 0.8826, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 27.92874396135266, |
|
"grad_norm": 1.3118997812271118, |
|
"learning_rate": 0.001, |
|
"loss": 0.8749, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7385050962627406, |
|
"eval_f1_macro": 0.3129308738494337, |
|
"eval_f1_micro": 0.7385050962627406, |
|
"eval_loss": 0.7591701745986938, |
|
"eval_runtime": 209.3556, |
|
"eval_samples_per_second": 168.708, |
|
"eval_steps_per_second": 5.273, |
|
"learning_rate": 0.001, |
|
"step": 92736 |
|
}, |
|
{ |
|
"epoch": 28.079710144927535, |
|
"grad_norm": 1.1031404733657837, |
|
"learning_rate": 0.001, |
|
"loss": 0.8582, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 28.230676328502415, |
|
"grad_norm": 1.0540931224822998, |
|
"learning_rate": 0.001, |
|
"loss": 0.8728, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 28.381642512077295, |
|
"grad_norm": 0.9347510933876038, |
|
"learning_rate": 0.001, |
|
"loss": 0.8758, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 28.532608695652176, |
|
"grad_norm": 1.1869118213653564, |
|
"learning_rate": 0.001, |
|
"loss": 0.8803, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 28.683574879227052, |
|
"grad_norm": 0.9577313661575317, |
|
"learning_rate": 0.001, |
|
"loss": 0.8792, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 28.834541062801932, |
|
"grad_norm": 1.0004523992538452, |
|
"learning_rate": 0.001, |
|
"loss": 0.8732, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 28.985507246376812, |
|
"grad_norm": 1.257615566253662, |
|
"learning_rate": 0.001, |
|
"loss": 0.8782, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7402038505096262, |
|
"eval_f1_macro": 0.34203740173275743, |
|
"eval_f1_micro": 0.7402038505096262, |
|
"eval_loss": 0.7544065117835999, |
|
"eval_runtime": 211.8639, |
|
"eval_samples_per_second": 166.711, |
|
"eval_steps_per_second": 5.211, |
|
"learning_rate": 0.001, |
|
"step": 96048 |
|
}, |
|
{ |
|
"epoch": 29.136473429951693, |
|
"grad_norm": 1.1350879669189453, |
|
"learning_rate": 0.001, |
|
"loss": 0.8781, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 29.28743961352657, |
|
"grad_norm": 1.1391183137893677, |
|
"learning_rate": 0.001, |
|
"loss": 0.8898, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 29.43840579710145, |
|
"grad_norm": 1.0922129154205322, |
|
"learning_rate": 0.001, |
|
"loss": 0.8716, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 29.58937198067633, |
|
"grad_norm": 1.3424519300460815, |
|
"learning_rate": 0.001, |
|
"loss": 0.8716, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 29.740338164251206, |
|
"grad_norm": 1.146209478378296, |
|
"learning_rate": 0.001, |
|
"loss": 0.8666, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 29.891304347826086, |
|
"grad_norm": 1.1248446702957153, |
|
"learning_rate": 0.001, |
|
"loss": 0.882, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7412231030577576, |
|
"eval_f1_macro": 0.35031500886831946, |
|
"eval_f1_micro": 0.7412231030577576, |
|
"eval_loss": 0.7548879981040955, |
|
"eval_runtime": 207.5785, |
|
"eval_samples_per_second": 170.153, |
|
"eval_steps_per_second": 5.318, |
|
"learning_rate": 0.001, |
|
"step": 99360 |
|
}, |
|
{ |
|
"epoch": 30.042270531400966, |
|
"grad_norm": 1.1519653797149658, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8725, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 30.193236714975846, |
|
"grad_norm": 0.7686799764633179, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8431, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 30.344202898550726, |
|
"grad_norm": 0.976070761680603, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8539, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 30.495169082125603, |
|
"grad_norm": 0.9183005094528198, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8393, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 30.646135265700483, |
|
"grad_norm": 0.9291144013404846, |
|
"learning_rate": 0.0001, |
|
"loss": 0.854, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 30.797101449275363, |
|
"grad_norm": 1.2014933824539185, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8453, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 30.94806763285024, |
|
"grad_norm": 0.8994304537773132, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8481, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7457248018120045, |
|
"eval_f1_macro": 0.3601618767603402, |
|
"eval_f1_micro": 0.7457248018120045, |
|
"eval_loss": 0.7332457304000854, |
|
"eval_runtime": 218.4764, |
|
"eval_samples_per_second": 161.665, |
|
"eval_steps_per_second": 5.053, |
|
"learning_rate": 0.0001, |
|
"step": 102672 |
|
}, |
|
{ |
|
"epoch": 31.09903381642512, |
|
"grad_norm": 1.2251958847045898, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8288, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"grad_norm": 0.7248632311820984, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8366, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 31.40096618357488, |
|
"grad_norm": 0.9571515917778015, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8293, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 31.55193236714976, |
|
"grad_norm": 0.9452911615371704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8302, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 31.702898550724637, |
|
"grad_norm": 1.0487594604492188, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8193, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 31.853864734299517, |
|
"grad_norm": 1.2892954349517822, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8329, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7456115515288788, |
|
"eval_f1_macro": 0.36956451081134645, |
|
"eval_f1_micro": 0.7456115515288788, |
|
"eval_loss": 0.7296127080917358, |
|
"eval_runtime": 208.608, |
|
"eval_samples_per_second": 169.313, |
|
"eval_steps_per_second": 5.292, |
|
"learning_rate": 0.0001, |
|
"step": 105984 |
|
}, |
|
{ |
|
"epoch": 32.00483091787439, |
|
"grad_norm": 1.3711316585540771, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8266, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 32.155797101449274, |
|
"grad_norm": 1.0526502132415771, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8222, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 32.306763285024154, |
|
"grad_norm": 1.1645632982254028, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8185, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 32.457729468599034, |
|
"grad_norm": 1.0817097425460815, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8313, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 32.608695652173914, |
|
"grad_norm": 1.298172116279602, |
|
"learning_rate": 0.0001, |
|
"loss": 0.842, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 32.759661835748794, |
|
"grad_norm": 1.3646366596221924, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8113, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 32.910628019323674, |
|
"grad_norm": 0.9803568124771118, |
|
"learning_rate": 0.0001, |
|
"loss": 0.817, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.746687429218573, |
|
"eval_f1_macro": 0.3749049829157648, |
|
"eval_f1_micro": 0.746687429218573, |
|
"eval_loss": 0.7269963026046753, |
|
"eval_runtime": 210.9198, |
|
"eval_samples_per_second": 167.457, |
|
"eval_steps_per_second": 5.234, |
|
"learning_rate": 0.0001, |
|
"step": 109296 |
|
}, |
|
{ |
|
"epoch": 33.06159420289855, |
|
"grad_norm": 0.8479082584381104, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8237, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 33.21256038647343, |
|
"grad_norm": 0.9720476269721985, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8086, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 33.36352657004831, |
|
"grad_norm": 0.9979745149612427, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8191, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 33.51449275362319, |
|
"grad_norm": 0.9301393628120422, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8311, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 33.66545893719807, |
|
"grad_norm": 1.1360764503479004, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8297, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 33.81642512077295, |
|
"grad_norm": 1.2328333854675293, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8127, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 33.96739130434783, |
|
"grad_norm": 1.1281388998031616, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8173, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7471404303510759, |
|
"eval_f1_macro": 0.3683427663296638, |
|
"eval_f1_micro": 0.7471404303510759, |
|
"eval_loss": 0.7234370708465576, |
|
"eval_runtime": 216.4876, |
|
"eval_samples_per_second": 163.15, |
|
"eval_steps_per_second": 5.1, |
|
"learning_rate": 0.0001, |
|
"step": 112608 |
|
}, |
|
{ |
|
"epoch": 34.11835748792271, |
|
"grad_norm": 0.9314268231391907, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7958, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 34.26932367149758, |
|
"grad_norm": 1.1677554845809937, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8122, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 34.42028985507246, |
|
"grad_norm": 1.3356529474258423, |
|
"learning_rate": 0.0001, |
|
"loss": 0.815, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 34.57125603864734, |
|
"grad_norm": 1.1832484006881714, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8035, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 34.72222222222222, |
|
"grad_norm": 1.0759506225585938, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8232, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 34.8731884057971, |
|
"grad_norm": 1.3692307472229004, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8221, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.74920724801812, |
|
"eval_f1_macro": 0.37945515998159585, |
|
"eval_f1_micro": 0.74920724801812, |
|
"eval_loss": 0.7186616063117981, |
|
"eval_runtime": 215.2105, |
|
"eval_samples_per_second": 164.118, |
|
"eval_steps_per_second": 5.13, |
|
"learning_rate": 0.0001, |
|
"step": 115920 |
|
}, |
|
{ |
|
"epoch": 35.02415458937198, |
|
"grad_norm": 1.063859462738037, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8192, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 35.17512077294686, |
|
"grad_norm": 1.2980016469955444, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8125, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 35.32608695652174, |
|
"grad_norm": 0.8495572209358215, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8047, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 35.477053140096615, |
|
"grad_norm": 1.1018431186676025, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8181, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 35.628019323671495, |
|
"grad_norm": 1.5239720344543457, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8143, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 35.778985507246375, |
|
"grad_norm": 0.7327041029930115, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8114, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 35.929951690821255, |
|
"grad_norm": 1.2075918912887573, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8085, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7484144960362401, |
|
"eval_f1_macro": 0.37576109730783996, |
|
"eval_f1_micro": 0.7484144960362401, |
|
"eval_loss": 0.7215430736541748, |
|
"eval_runtime": 213.3628, |
|
"eval_samples_per_second": 165.54, |
|
"eval_steps_per_second": 5.174, |
|
"learning_rate": 0.0001, |
|
"step": 119232 |
|
}, |
|
{ |
|
"epoch": 36.080917874396135, |
|
"grad_norm": 1.33438241481781, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8099, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 36.231884057971016, |
|
"grad_norm": 1.2371516227722168, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8109, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 36.382850241545896, |
|
"grad_norm": 1.3165804147720337, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8021, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 36.533816425120776, |
|
"grad_norm": 1.2245359420776367, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8043, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 36.68478260869565, |
|
"grad_norm": 1.1979665756225586, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8056, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 36.83574879227053, |
|
"grad_norm": 1.217260479927063, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8232, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 36.98671497584541, |
|
"grad_norm": 1.242099404335022, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8113, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7505096262740657, |
|
"eval_f1_macro": 0.37673433650483235, |
|
"eval_f1_micro": 0.7505096262740657, |
|
"eval_loss": 0.7179692387580872, |
|
"eval_runtime": 210.8094, |
|
"eval_samples_per_second": 167.545, |
|
"eval_steps_per_second": 5.237, |
|
"learning_rate": 0.0001, |
|
"step": 122544 |
|
}, |
|
{ |
|
"epoch": 37.13768115942029, |
|
"grad_norm": 1.0589709281921387, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8105, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 37.28864734299517, |
|
"grad_norm": 1.1704827547073364, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8023, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 37.43961352657005, |
|
"grad_norm": 1.1597093343734741, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8087, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 37.59057971014493, |
|
"grad_norm": 1.0800503492355347, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8143, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 37.74154589371981, |
|
"grad_norm": 1.308185338973999, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8109, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 37.89251207729468, |
|
"grad_norm": 1.2090855836868286, |
|
"learning_rate": 0.0001, |
|
"loss": 0.802, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7502265005662514, |
|
"eval_f1_macro": 0.38606784223081947, |
|
"eval_f1_micro": 0.7502265005662514, |
|
"eval_loss": 0.7137466669082642, |
|
"eval_runtime": 216.808, |
|
"eval_samples_per_second": 162.909, |
|
"eval_steps_per_second": 5.092, |
|
"learning_rate": 0.0001, |
|
"step": 125856 |
|
}, |
|
{ |
|
"epoch": 38.04347826086956, |
|
"grad_norm": 1.272017002105713, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8112, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 38.19444444444444, |
|
"grad_norm": 1.3077664375305176, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7937, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 38.34541062801932, |
|
"grad_norm": 1.2666317224502563, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8173, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 38.4963768115942, |
|
"grad_norm": 1.3403891324996948, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8062, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 38.64734299516908, |
|
"grad_norm": 1.316519856452942, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8047, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 38.79830917874396, |
|
"grad_norm": 1.1211258172988892, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8203, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 38.94927536231884, |
|
"grad_norm": 1.4983903169631958, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8042, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7514439411098528, |
|
"eval_f1_macro": 0.38681796623592196, |
|
"eval_f1_micro": 0.7514439411098528, |
|
"eval_loss": 0.7124654650688171, |
|
"eval_runtime": 219.4372, |
|
"eval_samples_per_second": 160.957, |
|
"eval_steps_per_second": 5.031, |
|
"learning_rate": 0.0001, |
|
"step": 129168 |
|
}, |
|
{ |
|
"epoch": 39.10024154589372, |
|
"grad_norm": 1.1229315996170044, |
|
"learning_rate": 0.0001, |
|
"loss": 0.785, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 39.2512077294686, |
|
"grad_norm": 1.2249174118041992, |
|
"learning_rate": 0.0001, |
|
"loss": 0.801, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 39.40217391304348, |
|
"grad_norm": 1.760386347770691, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8125, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 39.55314009661836, |
|
"grad_norm": 1.0228271484375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8015, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 39.70410628019324, |
|
"grad_norm": 1.1143656969070435, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7833, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 39.85507246376812, |
|
"grad_norm": 1.0798527002334595, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7976, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7499433748584371, |
|
"eval_f1_macro": 0.3844158005616281, |
|
"eval_f1_micro": 0.7499433748584371, |
|
"eval_loss": 0.7125608325004578, |
|
"eval_runtime": 212.9084, |
|
"eval_samples_per_second": 165.893, |
|
"eval_steps_per_second": 5.185, |
|
"learning_rate": 0.0001, |
|
"step": 132480 |
|
}, |
|
{ |
|
"epoch": 40.006038647343, |
|
"grad_norm": 1.3306584358215332, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8192, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 40.15700483091788, |
|
"grad_norm": 1.3124111890792847, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8016, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 40.30797101449275, |
|
"grad_norm": 1.4162460565567017, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7982, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 40.45893719806763, |
|
"grad_norm": 1.360172986984253, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8125, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 40.60990338164251, |
|
"grad_norm": 1.2010914087295532, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8077, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 40.76086956521739, |
|
"grad_norm": 1.451456069946289, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8008, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 40.91183574879227, |
|
"grad_norm": 1.5531110763549805, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7963, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7515571913929785, |
|
"eval_f1_macro": 0.3905191464380679, |
|
"eval_f1_micro": 0.7515571913929785, |
|
"eval_loss": 0.7112360596656799, |
|
"eval_runtime": 213.4713, |
|
"eval_samples_per_second": 165.456, |
|
"eval_steps_per_second": 5.172, |
|
"learning_rate": 0.0001, |
|
"step": 135792 |
|
}, |
|
{ |
|
"epoch": 41.06280193236715, |
|
"grad_norm": 1.173843502998352, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8097, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 41.21376811594203, |
|
"grad_norm": 1.4110829830169678, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7872, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 41.36473429951691, |
|
"grad_norm": 1.5491451025009155, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8035, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 41.515700483091784, |
|
"grad_norm": 1.6657460927963257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7889, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 41.666666666666664, |
|
"grad_norm": 1.4883304834365845, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7865, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 41.817632850241544, |
|
"grad_norm": 1.416391372680664, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8115, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 41.968599033816425, |
|
"grad_norm": 0.9394751191139221, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8054, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7511041902604757, |
|
"eval_f1_macro": 0.3926316856477778, |
|
"eval_f1_micro": 0.7511041902604757, |
|
"eval_loss": 0.7115524411201477, |
|
"eval_runtime": 213.3241, |
|
"eval_samples_per_second": 165.57, |
|
"eval_steps_per_second": 5.175, |
|
"learning_rate": 0.0001, |
|
"step": 139104 |
|
}, |
|
{ |
|
"epoch": 42.119565217391305, |
|
"grad_norm": 1.5084961652755737, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7955, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 42.270531400966185, |
|
"grad_norm": 1.0601041316986084, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8035, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 42.421497584541065, |
|
"grad_norm": 1.6845051050186157, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8009, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 42.572463768115945, |
|
"grad_norm": 1.5695706605911255, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8061, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 42.72342995169082, |
|
"grad_norm": 1.5166726112365723, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7945, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 42.8743961352657, |
|
"grad_norm": 1.2639328241348267, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8119, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7515571913929785, |
|
"eval_f1_macro": 0.3900649425217524, |
|
"eval_f1_micro": 0.7515571913929785, |
|
"eval_loss": 0.7097686529159546, |
|
"eval_runtime": 212.7399, |
|
"eval_samples_per_second": 166.024, |
|
"eval_steps_per_second": 5.189, |
|
"learning_rate": 0.0001, |
|
"step": 142416 |
|
}, |
|
{ |
|
"epoch": 43.02536231884058, |
|
"grad_norm": 1.5196884870529175, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8022, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 43.17632850241546, |
|
"grad_norm": 1.6238950490951538, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7843, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 43.32729468599034, |
|
"grad_norm": 1.1949807405471802, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8026, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 43.47826086956522, |
|
"grad_norm": 1.6461068391799927, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7946, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 43.6292270531401, |
|
"grad_norm": 1.3773376941680908, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7896, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 43.78019323671498, |
|
"grad_norm": 1.1680546998977661, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7963, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 43.93115942028985, |
|
"grad_norm": 1.5515567064285278, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8009, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7506511891279728, |
|
"eval_f1_macro": 0.38968576708900504, |
|
"eval_f1_micro": 0.7506511891279728, |
|
"eval_loss": 0.7101725339889526, |
|
"eval_runtime": 209.581, |
|
"eval_samples_per_second": 168.527, |
|
"eval_steps_per_second": 5.268, |
|
"learning_rate": 0.0001, |
|
"step": 145728 |
|
}, |
|
{ |
|
"epoch": 44.08212560386473, |
|
"grad_norm": 1.7003437280654907, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7993, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 44.23309178743961, |
|
"grad_norm": 1.3267650604248047, |
|
"learning_rate": 0.0001, |
|
"loss": 0.812, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 44.38405797101449, |
|
"grad_norm": 1.4716774225234985, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8012, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 44.53502415458937, |
|
"grad_norm": 1.015686273574829, |
|
"learning_rate": 0.0001, |
|
"loss": 0.778, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 44.68599033816425, |
|
"grad_norm": 1.2423222064971924, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8172, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 44.83695652173913, |
|
"grad_norm": 1.5044087171554565, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8041, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 44.98792270531401, |
|
"grad_norm": 1.1471318006515503, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7929, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7516987542468856, |
|
"eval_f1_macro": 0.38827203312086217, |
|
"eval_f1_micro": 0.7516987542468856, |
|
"eval_loss": 0.7100493311882019, |
|
"eval_runtime": 215.0099, |
|
"eval_samples_per_second": 164.271, |
|
"eval_steps_per_second": 5.135, |
|
"learning_rate": 0.0001, |
|
"step": 149040 |
|
}, |
|
{ |
|
"epoch": 45.138888888888886, |
|
"grad_norm": 2.307978868484497, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7942, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 45.289855072463766, |
|
"grad_norm": 1.3866568803787231, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8082, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 45.440821256038646, |
|
"grad_norm": 1.7695890665054321, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7892, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 45.591787439613526, |
|
"grad_norm": 1.2394018173217773, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7929, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 45.742753623188406, |
|
"grad_norm": 1.6897494792938232, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7997, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 45.893719806763286, |
|
"grad_norm": 1.5814383029937744, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8079, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7509909399773499, |
|
"eval_f1_macro": 0.3911936795292309, |
|
"eval_f1_micro": 0.7509909399773499, |
|
"eval_loss": 0.7068280577659607, |
|
"eval_runtime": 209.5945, |
|
"eval_samples_per_second": 168.516, |
|
"eval_steps_per_second": 5.267, |
|
"learning_rate": 0.0001, |
|
"step": 152352 |
|
}, |
|
{ |
|
"epoch": 46.04468599033817, |
|
"grad_norm": 1.5347557067871094, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7954, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 46.19565217391305, |
|
"grad_norm": 1.6254936456680298, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7791, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 46.34661835748792, |
|
"grad_norm": 0.9997854828834534, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8056, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 46.4975845410628, |
|
"grad_norm": 1.80784273147583, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7839, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 46.64855072463768, |
|
"grad_norm": 1.3725862503051758, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8005, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 46.79951690821256, |
|
"grad_norm": 1.7076722383499146, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8084, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 46.95048309178744, |
|
"grad_norm": 0.9908552169799805, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8053, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7509909399773499, |
|
"eval_f1_macro": 0.38879336218586646, |
|
"eval_f1_micro": 0.7509909399773499, |
|
"eval_loss": 0.7073561549186707, |
|
"eval_runtime": 209.7549, |
|
"eval_samples_per_second": 168.387, |
|
"eval_steps_per_second": 5.263, |
|
"learning_rate": 0.0001, |
|
"step": 155664 |
|
}, |
|
{ |
|
"epoch": 47.10144927536232, |
|
"grad_norm": 1.562961220741272, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7871, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 47.2524154589372, |
|
"grad_norm": 1.2126048803329468, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7896, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 47.40338164251208, |
|
"grad_norm": 1.2961828708648682, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7788, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 47.55434782608695, |
|
"grad_norm": 1.5157291889190674, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7951, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 47.70531400966183, |
|
"grad_norm": 1.4402084350585938, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7888, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 47.856280193236714, |
|
"grad_norm": 1.5191363096237183, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7965, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7507644394110985, |
|
"eval_f1_macro": 0.38896207109046926, |
|
"eval_f1_micro": 0.7507644394110985, |
|
"eval_loss": 0.7095273733139038, |
|
"eval_runtime": 208.9561, |
|
"eval_samples_per_second": 169.031, |
|
"eval_steps_per_second": 5.283, |
|
"learning_rate": 0.0001, |
|
"step": 158976 |
|
}, |
|
{ |
|
"epoch": 48.007246376811594, |
|
"grad_norm": 1.181817650794983, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7951, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 48.158212560386474, |
|
"grad_norm": 1.894116759300232, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7766, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 48.309178743961354, |
|
"grad_norm": 1.7920688390731812, |
|
"learning_rate": 0.0001, |
|
"loss": 0.779, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 48.460144927536234, |
|
"grad_norm": 1.5282026529312134, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8092, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 48.611111111111114, |
|
"grad_norm": 2.105564832687378, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7872, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 48.76207729468599, |
|
"grad_norm": 1.047914981842041, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7991, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 48.91304347826087, |
|
"grad_norm": 1.8722704648971558, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8043, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7509343148357871, |
|
"eval_f1_macro": 0.393500354059222, |
|
"eval_f1_micro": 0.7509343148357871, |
|
"eval_loss": 0.7090209126472473, |
|
"eval_runtime": 211.4098, |
|
"eval_samples_per_second": 167.069, |
|
"eval_steps_per_second": 5.222, |
|
"learning_rate": 0.0001, |
|
"step": 162288 |
|
}, |
|
{ |
|
"epoch": 49.06400966183575, |
|
"grad_norm": 1.6309547424316406, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7861, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 49.21497584541063, |
|
"grad_norm": 1.2643849849700928, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7903, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 49.36594202898551, |
|
"grad_norm": 1.3246151208877563, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7803, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 49.51690821256039, |
|
"grad_norm": 1.8463890552520752, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8013, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 49.66787439613527, |
|
"grad_norm": 1.5198359489440918, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7956, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 49.81884057971015, |
|
"grad_norm": 1.9406630992889404, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8004, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 49.96980676328502, |
|
"grad_norm": 2.282998561859131, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7861, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7512174405436014, |
|
"eval_f1_macro": 0.4025782389887584, |
|
"eval_f1_micro": 0.7512174405436014, |
|
"eval_loss": 0.7080034017562866, |
|
"eval_runtime": 220.5346, |
|
"eval_samples_per_second": 160.156, |
|
"eval_steps_per_second": 5.006, |
|
"learning_rate": 0.0001, |
|
"step": 165600 |
|
}, |
|
{ |
|
"epoch": 50.1207729468599, |
|
"grad_norm": 1.6418976783752441, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7868, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 50.27173913043478, |
|
"grad_norm": 1.3250433206558228, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7945, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 50.42270531400966, |
|
"grad_norm": 1.5783368349075317, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7964, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 50.57367149758454, |
|
"grad_norm": 1.6777008771896362, |
|
"learning_rate": 0.0001, |
|
"loss": 0.772, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 50.72463768115942, |
|
"grad_norm": 1.729814052581787, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7925, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 50.8756038647343, |
|
"grad_norm": 1.6847931146621704, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7917, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7513873159682899, |
|
"eval_f1_macro": 0.3942298110767962, |
|
"eval_f1_micro": 0.7513873159682899, |
|
"eval_loss": 0.7062203288078308, |
|
"eval_runtime": 209.6717, |
|
"eval_samples_per_second": 168.454, |
|
"eval_steps_per_second": 5.265, |
|
"learning_rate": 0.0001, |
|
"step": 168912 |
|
}, |
|
{ |
|
"epoch": 51.02657004830918, |
|
"grad_norm": 1.8825182914733887, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7902, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 51.177536231884055, |
|
"grad_norm": 1.9477916955947876, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7904, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 51.328502415458935, |
|
"grad_norm": 1.3268564939498901, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7863, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 51.479468599033815, |
|
"grad_norm": 1.6211856603622437, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7821, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 51.630434782608695, |
|
"grad_norm": 1.8431261777877808, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7901, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 51.781400966183575, |
|
"grad_norm": 1.8904542922973633, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7864, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 51.932367149758456, |
|
"grad_norm": 1.6243914365768433, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7909, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7526330690826727, |
|
"eval_f1_macro": 0.3971378678667864, |
|
"eval_f1_micro": 0.7526330690826727, |
|
"eval_loss": 0.7048719525337219, |
|
"eval_runtime": 221.7242, |
|
"eval_samples_per_second": 159.297, |
|
"eval_steps_per_second": 4.979, |
|
"learning_rate": 0.0001, |
|
"step": 172224 |
|
}, |
|
{ |
|
"epoch": 52.083333333333336, |
|
"grad_norm": 1.5745625495910645, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7943, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 52.234299516908216, |
|
"grad_norm": 2.089834213256836, |
|
"learning_rate": 0.0001, |
|
"loss": 0.785, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 52.38526570048309, |
|
"grad_norm": 2.062624454498291, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7903, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 52.53623188405797, |
|
"grad_norm": 1.331170678138733, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7875, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 52.68719806763285, |
|
"grad_norm": 1.5486934185028076, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7807, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 52.83816425120773, |
|
"grad_norm": 2.0570003986358643, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7864, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 52.98913043478261, |
|
"grad_norm": 1.223645567893982, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7886, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7526330690826727, |
|
"eval_f1_macro": 0.4016673062072626, |
|
"eval_f1_micro": 0.7526330690826727, |
|
"eval_loss": 0.7044239640235901, |
|
"eval_runtime": 207.7966, |
|
"eval_samples_per_second": 169.974, |
|
"eval_steps_per_second": 5.313, |
|
"learning_rate": 0.0001, |
|
"step": 175536 |
|
}, |
|
{ |
|
"epoch": 53.14009661835749, |
|
"grad_norm": 1.4457918405532837, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7735, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 53.29106280193237, |
|
"grad_norm": 1.7474045753479004, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8022, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 53.44202898550725, |
|
"grad_norm": 1.0434207916259766, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7826, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 53.59299516908212, |
|
"grad_norm": 1.553357481956482, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7954, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 53.743961352657, |
|
"grad_norm": 1.872591495513916, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7805, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 53.89492753623188, |
|
"grad_norm": 1.3298755884170532, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7834, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7524348810872027, |
|
"eval_f1_macro": 0.3991645638715668, |
|
"eval_f1_micro": 0.7524348810872027, |
|
"eval_loss": 0.7028033137321472, |
|
"eval_runtime": 210.1534, |
|
"eval_samples_per_second": 168.068, |
|
"eval_steps_per_second": 5.253, |
|
"learning_rate": 0.0001, |
|
"step": 178848 |
|
}, |
|
{ |
|
"epoch": 54.04589371980676, |
|
"grad_norm": 2.058666706085205, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7999, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 54.19685990338164, |
|
"grad_norm": 1.9143357276916504, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7819, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 54.34782608695652, |
|
"grad_norm": 2.0311787128448486, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7867, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 54.4987922705314, |
|
"grad_norm": 1.4539964199066162, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7775, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 54.64975845410628, |
|
"grad_norm": 1.5299663543701172, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7855, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 54.80072463768116, |
|
"grad_norm": 1.3402701616287231, |
|
"learning_rate": 0.0001, |
|
"loss": 0.783, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 54.95169082125604, |
|
"grad_norm": 1.9268286228179932, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7991, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7526896942242356, |
|
"eval_f1_macro": 0.39664989522082456, |
|
"eval_f1_micro": 0.7526896942242356, |
|
"eval_loss": 0.7028517127037048, |
|
"eval_runtime": 209.4841, |
|
"eval_samples_per_second": 168.605, |
|
"eval_steps_per_second": 5.27, |
|
"learning_rate": 0.0001, |
|
"step": 182160 |
|
}, |
|
{ |
|
"epoch": 55.10265700483092, |
|
"grad_norm": 1.6586686372756958, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8025, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 55.2536231884058, |
|
"grad_norm": 1.9422414302825928, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7772, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 55.40458937198068, |
|
"grad_norm": 1.540802001953125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7824, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 55.55555555555556, |
|
"grad_norm": 1.2288109064102173, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7913, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 55.70652173913044, |
|
"grad_norm": 1.309683918952942, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7869, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 55.85748792270532, |
|
"grad_norm": 2.4700379371643066, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7875, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7533408833522084, |
|
"eval_f1_macro": 0.4010505955386608, |
|
"eval_f1_micro": 0.7533408833522084, |
|
"eval_loss": 0.7026041746139526, |
|
"eval_runtime": 215.9707, |
|
"eval_samples_per_second": 163.541, |
|
"eval_steps_per_second": 5.112, |
|
"learning_rate": 0.0001, |
|
"step": 185472 |
|
}, |
|
{ |
|
"epoch": 56.00845410628019, |
|
"grad_norm": 1.324925422668457, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7799, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 56.15942028985507, |
|
"grad_norm": 1.140968680381775, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7671, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 56.31038647342995, |
|
"grad_norm": 1.9985508918762207, |
|
"learning_rate": 0.0001, |
|
"loss": 0.781, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 56.46135265700483, |
|
"grad_norm": 2.1326446533203125, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7926, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 56.61231884057971, |
|
"grad_norm": 2.2153525352478027, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7919, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 56.76328502415459, |
|
"grad_norm": 1.8268916606903076, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7758, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 56.91425120772947, |
|
"grad_norm": 1.2445095777511597, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7868, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7524915062287656, |
|
"eval_f1_macro": 0.4055993193482603, |
|
"eval_f1_micro": 0.7524915062287656, |
|
"eval_loss": 0.7028916478157043, |
|
"eval_runtime": 216.0565, |
|
"eval_samples_per_second": 163.476, |
|
"eval_steps_per_second": 5.11, |
|
"learning_rate": 0.0001, |
|
"step": 188784 |
|
}, |
|
{ |
|
"epoch": 57.06521739130435, |
|
"grad_norm": 1.4556875228881836, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7903, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 57.216183574879224, |
|
"grad_norm": 1.3705946207046509, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7873, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 57.367149758454104, |
|
"grad_norm": 2.036994457244873, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7796, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 57.518115942028984, |
|
"grad_norm": 2.2616024017333984, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7761, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 57.669082125603865, |
|
"grad_norm": 1.9602872133255005, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7959, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 57.820048309178745, |
|
"grad_norm": 1.309295892715454, |
|
"learning_rate": 0.0001, |
|
"loss": 0.784, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 57.971014492753625, |
|
"grad_norm": 1.5990263223648071, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7837, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7536240090600227, |
|
"eval_f1_macro": 0.4020138923754012, |
|
"eval_f1_micro": 0.7536240090600227, |
|
"eval_loss": 0.7021399736404419, |
|
"eval_runtime": 218.5717, |
|
"eval_samples_per_second": 161.595, |
|
"eval_steps_per_second": 5.051, |
|
"learning_rate": 0.0001, |
|
"step": 192096 |
|
}, |
|
{ |
|
"epoch": 58.121980676328505, |
|
"grad_norm": 1.6626112461090088, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7824, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 58.272946859903385, |
|
"grad_norm": 1.4282841682434082, |
|
"learning_rate": 0.0001, |
|
"loss": 0.769, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 58.42391304347826, |
|
"grad_norm": 1.731040120124817, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7868, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 58.57487922705314, |
|
"grad_norm": 1.7084987163543701, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7812, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 58.72584541062802, |
|
"grad_norm": 1.8147222995758057, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7941, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 58.8768115942029, |
|
"grad_norm": 1.8238271474838257, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7834, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7534258210645527, |
|
"eval_f1_macro": 0.40493707142310226, |
|
"eval_f1_micro": 0.7534258210645527, |
|
"eval_loss": 0.7011087536811829, |
|
"eval_runtime": 215.6629, |
|
"eval_samples_per_second": 163.774, |
|
"eval_steps_per_second": 5.119, |
|
"learning_rate": 0.0001, |
|
"step": 195408 |
|
}, |
|
{ |
|
"epoch": 59.02777777777778, |
|
"grad_norm": 2.115701913833618, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8038, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 59.17874396135266, |
|
"grad_norm": 1.9553638696670532, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7834, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 59.32971014492754, |
|
"grad_norm": 1.5982245206832886, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7845, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 59.48067632850242, |
|
"grad_norm": 1.805112361907959, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7744, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 59.63164251207729, |
|
"grad_norm": 2.541797161102295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.775, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 59.78260869565217, |
|
"grad_norm": 1.572972297668457, |
|
"learning_rate": 0.0001, |
|
"loss": 0.8033, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 59.93357487922705, |
|
"grad_norm": 1.4918849468231201, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7893, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7530294450736127, |
|
"eval_f1_macro": 0.402861411530856, |
|
"eval_f1_micro": 0.7530294450736127, |
|
"eval_loss": 0.7018985748291016, |
|
"eval_runtime": 213.0555, |
|
"eval_samples_per_second": 165.778, |
|
"eval_steps_per_second": 5.182, |
|
"learning_rate": 0.0001, |
|
"step": 198720 |
|
}, |
|
{ |
|
"epoch": 60.08454106280193, |
|
"grad_norm": 2.5043087005615234, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7765, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 60.23550724637681, |
|
"grad_norm": 2.011338710784912, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7886, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 60.38647342995169, |
|
"grad_norm": 2.148954153060913, |
|
"learning_rate": 0.0001, |
|
"loss": 0.782, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 60.53743961352657, |
|
"grad_norm": 2.625419855117798, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7833, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 60.68840579710145, |
|
"grad_norm": 1.7209972143173218, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7897, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 60.839371980676326, |
|
"grad_norm": 1.8467425107955933, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7846, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 60.990338164251206, |
|
"grad_norm": 1.4258301258087158, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7824, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.7518686296715742, |
|
"eval_f1_macro": 0.39945027687694223, |
|
"eval_f1_micro": 0.7518686296715742, |
|
"eval_loss": 0.7023395895957947, |
|
"eval_runtime": 222.5154, |
|
"eval_samples_per_second": 158.731, |
|
"eval_steps_per_second": 4.961, |
|
"learning_rate": 0.0001, |
|
"step": 202032 |
|
}, |
|
{ |
|
"epoch": 61.141304347826086, |
|
"grad_norm": 2.6606762409210205, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7765, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 61.292270531400966, |
|
"grad_norm": 2.2172415256500244, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7845, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 61.443236714975846, |
|
"grad_norm": 2.1280250549316406, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7851, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 61.594202898550726, |
|
"grad_norm": 1.6851128339767456, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7842, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 61.74516908212561, |
|
"grad_norm": 1.9471075534820557, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7881, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 61.89613526570048, |
|
"grad_norm": 1.5017220973968506, |
|
"learning_rate": 0.0001, |
|
"loss": 0.789, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.7524915062287656, |
|
"eval_f1_macro": 0.40407383649300527, |
|
"eval_f1_micro": 0.7524915062287656, |
|
"eval_loss": 0.7037662267684937, |
|
"eval_runtime": 216.9958, |
|
"eval_samples_per_second": 162.768, |
|
"eval_steps_per_second": 5.088, |
|
"learning_rate": 0.0001, |
|
"step": 205344 |
|
}, |
|
{ |
|
"epoch": 62.04710144927536, |
|
"grad_norm": 1.834807276725769, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7739, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 62.19806763285024, |
|
"grad_norm": 1.9558227062225342, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7741, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 62.34903381642512, |
|
"grad_norm": 2.4876391887664795, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7949, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"grad_norm": 1.7195085287094116, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7881, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 62.65096618357488, |
|
"grad_norm": 1.7376832962036133, |
|
"learning_rate": 0.0001, |
|
"loss": 0.782, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 62.80193236714976, |
|
"grad_norm": 2.0515975952148438, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7986, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 62.95289855072464, |
|
"grad_norm": 1.5486465692520142, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7778, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.753510758776897, |
|
"eval_f1_macro": 0.40375848569907075, |
|
"eval_f1_micro": 0.753510758776897, |
|
"eval_loss": 0.7002550959587097, |
|
"eval_runtime": 217.688, |
|
"eval_samples_per_second": 162.251, |
|
"eval_steps_per_second": 5.071, |
|
"learning_rate": 0.0001, |
|
"step": 208656 |
|
}, |
|
{ |
|
"epoch": 63.10386473429951, |
|
"grad_norm": 1.7268332242965698, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7804, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 63.25483091787439, |
|
"grad_norm": 2.5274782180786133, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7714, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 63.405797101449274, |
|
"grad_norm": 2.0961976051330566, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7831, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 63.556763285024154, |
|
"grad_norm": 1.5764572620391846, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7953, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 63.707729468599034, |
|
"grad_norm": 1.8891505002975464, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7802, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 63.858695652173914, |
|
"grad_norm": 2.4100189208984375, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7719, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7526047565118913, |
|
"eval_f1_macro": 0.3981875665805004, |
|
"eval_f1_micro": 0.7526047565118913, |
|
"eval_loss": 0.6997453570365906, |
|
"eval_runtime": 217.2646, |
|
"eval_samples_per_second": 162.567, |
|
"eval_steps_per_second": 5.081, |
|
"learning_rate": 0.0001, |
|
"step": 211968 |
|
}, |
|
{ |
|
"epoch": 64.00966183574879, |
|
"grad_norm": 1.7748676538467407, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7873, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 64.16062801932367, |
|
"grad_norm": 2.2638964653015137, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7785, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 64.31159420289855, |
|
"grad_norm": 2.0121142864227295, |
|
"learning_rate": 0.0001, |
|
"loss": 0.778, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 64.46256038647343, |
|
"grad_norm": 1.7299202680587769, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7786, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 64.61352657004831, |
|
"grad_norm": 1.9140008687973022, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7871, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 64.76449275362319, |
|
"grad_norm": 1.8414108753204346, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7791, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 64.91545893719807, |
|
"grad_norm": 1.9848062992095947, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7909, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.7515005662514156, |
|
"eval_f1_macro": 0.39972201826875875, |
|
"eval_f1_micro": 0.7515005662514156, |
|
"eval_loss": 0.7073760032653809, |
|
"eval_runtime": 221.4341, |
|
"eval_samples_per_second": 159.506, |
|
"eval_steps_per_second": 4.986, |
|
"learning_rate": 0.0001, |
|
"step": 215280 |
|
}, |
|
{ |
|
"epoch": 65.06642512077295, |
|
"grad_norm": 2.2006211280822754, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7954, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 65.21739130434783, |
|
"grad_norm": 1.9873759746551514, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7793, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 65.36835748792271, |
|
"grad_norm": 1.8538273572921753, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7869, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 65.51932367149759, |
|
"grad_norm": 1.5849995613098145, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7785, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 65.67028985507247, |
|
"grad_norm": 2.1744604110717773, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7825, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 65.82125603864735, |
|
"grad_norm": 1.745910882949829, |
|
"learning_rate": 0.0001, |
|
"loss": 0.788, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 65.97222222222223, |
|
"grad_norm": 1.908542275428772, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7854, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.7526330690826727, |
|
"eval_f1_macro": 0.39400038125390824, |
|
"eval_f1_micro": 0.7526330690826727, |
|
"eval_loss": 0.7018123269081116, |
|
"eval_runtime": 218.0947, |
|
"eval_samples_per_second": 161.948, |
|
"eval_steps_per_second": 5.062, |
|
"learning_rate": 0.0001, |
|
"step": 218592 |
|
}, |
|
{ |
|
"epoch": 66.1231884057971, |
|
"grad_norm": 2.1476829051971436, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7871, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 66.27415458937197, |
|
"grad_norm": 2.2997281551361084, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7652, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 66.42512077294685, |
|
"grad_norm": 2.048473358154297, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7857, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 66.57608695652173, |
|
"grad_norm": 2.110971450805664, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7878, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 66.72705314009661, |
|
"grad_norm": 2.692934274673462, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7746, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 66.8780193236715, |
|
"grad_norm": 2.0588693618774414, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7746, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.7543035107587769, |
|
"eval_f1_macro": 0.3999822208017269, |
|
"eval_f1_micro": 0.7543035107587769, |
|
"eval_loss": 0.7023409008979797, |
|
"eval_runtime": 216.8908, |
|
"eval_samples_per_second": 162.847, |
|
"eval_steps_per_second": 5.09, |
|
"learning_rate": 0.0001, |
|
"step": 221904 |
|
}, |
|
{ |
|
"epoch": 67.02898550724638, |
|
"grad_norm": 2.107386350631714, |
|
"learning_rate": 0.0001, |
|
"loss": 0.776, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 67.17995169082126, |
|
"grad_norm": 2.098714590072632, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7744, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 67.33091787439614, |
|
"grad_norm": 1.85200834274292, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7785, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 67.48188405797102, |
|
"grad_norm": 1.833825945854187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7908, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 67.6328502415459, |
|
"grad_norm": 1.4443333148956299, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7827, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 67.78381642512078, |
|
"grad_norm": 1.8465648889541626, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7808, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 67.93478260869566, |
|
"grad_norm": 2.001317262649536, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7905, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7541053227633069, |
|
"eval_f1_macro": 0.40634545918728326, |
|
"eval_f1_micro": 0.7541053227633069, |
|
"eval_loss": 0.6975185871124268, |
|
"eval_runtime": 221.9409, |
|
"eval_samples_per_second": 159.141, |
|
"eval_steps_per_second": 4.974, |
|
"learning_rate": 0.0001, |
|
"step": 225216 |
|
}, |
|
{ |
|
"epoch": 68.08574879227054, |
|
"grad_norm": 2.2237119674682617, |
|
"learning_rate": 0.0001, |
|
"loss": 0.782, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 68.23671497584542, |
|
"grad_norm": 1.959067702293396, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7767, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 68.3876811594203, |
|
"grad_norm": 1.9757815599441528, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7748, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 68.53864734299516, |
|
"grad_norm": 2.113232135772705, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7811, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 68.68961352657004, |
|
"grad_norm": 1.9340970516204834, |
|
"learning_rate": 0.0001, |
|
"loss": 0.777, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 68.84057971014492, |
|
"grad_norm": 0.9899095892906189, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7715, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 68.9915458937198, |
|
"grad_norm": 2.3360114097595215, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7824, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.7537938844847112, |
|
"eval_f1_macro": 0.4072097015037698, |
|
"eval_f1_micro": 0.7537938844847112, |
|
"eval_loss": 0.6993648409843445, |
|
"eval_runtime": 222.155, |
|
"eval_samples_per_second": 158.988, |
|
"eval_steps_per_second": 4.97, |
|
"learning_rate": 0.0001, |
|
"step": 228528 |
|
}, |
|
{ |
|
"epoch": 69.14251207729468, |
|
"grad_norm": 2.447714328765869, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7778, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 69.29347826086956, |
|
"grad_norm": 1.8527592420578003, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7735, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 69.44444444444444, |
|
"grad_norm": 2.621053457260132, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7825, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 69.59541062801932, |
|
"grad_norm": 2.390890598297119, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7685, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 69.7463768115942, |
|
"grad_norm": 2.128596544265747, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7834, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 69.89734299516908, |
|
"grad_norm": 1.8924263715744019, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7795, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7557191392978483, |
|
"eval_f1_macro": 0.40936702138264003, |
|
"eval_f1_micro": 0.7557191392978483, |
|
"eval_loss": 0.6969271898269653, |
|
"eval_runtime": 223.0544, |
|
"eval_samples_per_second": 158.347, |
|
"eval_steps_per_second": 4.949, |
|
"learning_rate": 0.0001, |
|
"step": 231840 |
|
}, |
|
{ |
|
"epoch": 70.04830917874396, |
|
"grad_norm": 1.684754729270935, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7991, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 70.19927536231884, |
|
"grad_norm": 2.4494612216949463, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7753, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 70.35024154589372, |
|
"grad_norm": 2.5033414363861084, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7847, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 70.5012077294686, |
|
"grad_norm": 2.4571475982666016, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7608, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 70.65217391304348, |
|
"grad_norm": 1.6463907957077026, |
|
"learning_rate": 0.0001, |
|
"loss": 0.772, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 70.80314009661836, |
|
"grad_norm": 2.111017942428589, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7705, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 70.95410628019323, |
|
"grad_norm": 1.4302709102630615, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7763, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.7564269535673839, |
|
"eval_f1_macro": 0.40852716904479053, |
|
"eval_f1_micro": 0.7564269535673839, |
|
"eval_loss": 0.6969292759895325, |
|
"eval_runtime": 224.5096, |
|
"eval_samples_per_second": 157.321, |
|
"eval_steps_per_second": 4.917, |
|
"learning_rate": 0.0001, |
|
"step": 235152 |
|
}, |
|
{ |
|
"epoch": 71.10507246376811, |
|
"grad_norm": 1.9323956966400146, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7882, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 71.25603864734299, |
|
"grad_norm": 1.4799989461898804, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7778, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 71.40700483091787, |
|
"grad_norm": 2.2827231884002686, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7834, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 71.55797101449275, |
|
"grad_norm": 2.1466054916381836, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7731, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 71.70893719806763, |
|
"grad_norm": 2.6581642627716064, |
|
"learning_rate": 0.0001, |
|
"loss": 0.768, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 71.85990338164251, |
|
"grad_norm": 2.4536802768707275, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7723, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.753114382785957, |
|
"eval_f1_macro": 0.40898447112902125, |
|
"eval_f1_micro": 0.753114382785957, |
|
"eval_loss": 0.6986733675003052, |
|
"eval_runtime": 224.8152, |
|
"eval_samples_per_second": 157.107, |
|
"eval_steps_per_second": 4.911, |
|
"learning_rate": 0.0001, |
|
"step": 238464 |
|
}, |
|
{ |
|
"epoch": 72.01086956521739, |
|
"grad_norm": 2.023843288421631, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7811, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 72.16183574879227, |
|
"grad_norm": 2.293466567993164, |
|
"learning_rate": 0.0001, |
|
"loss": 0.771, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 72.31280193236715, |
|
"grad_norm": 1.5046766996383667, |
|
"learning_rate": 0.0001, |
|
"loss": 0.77, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 72.46376811594203, |
|
"grad_norm": 2.528843641281128, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7937, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 72.61473429951691, |
|
"grad_norm": 3.005565643310547, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7715, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 72.76570048309179, |
|
"grad_norm": 1.9399968385696411, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7773, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 72.91666666666667, |
|
"grad_norm": 1.8866796493530273, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7914, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.7556058890147226, |
|
"eval_f1_macro": 0.4202687658592802, |
|
"eval_f1_micro": 0.7556058890147226, |
|
"eval_loss": 0.6945138573646545, |
|
"eval_runtime": 221.3201, |
|
"eval_samples_per_second": 159.588, |
|
"eval_steps_per_second": 4.988, |
|
"learning_rate": 0.0001, |
|
"step": 241776 |
|
}, |
|
{ |
|
"epoch": 73.06763285024155, |
|
"grad_norm": 2.379028081893921, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7795, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 73.21859903381643, |
|
"grad_norm": 2.0649399757385254, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7866, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 73.3695652173913, |
|
"grad_norm": 1.8513232469558716, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7831, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 73.52053140096618, |
|
"grad_norm": 2.739586114883423, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7867, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 73.67149758454106, |
|
"grad_norm": 1.8570257425308228, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7837, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 73.82246376811594, |
|
"grad_norm": 1.623005986213684, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7747, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 73.97342995169082, |
|
"grad_norm": 2.5861430168151855, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7658, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7544167610419026, |
|
"eval_f1_macro": 0.4117443260028933, |
|
"eval_f1_micro": 0.7544167610419026, |
|
"eval_loss": 0.6951290369033813, |
|
"eval_runtime": 221.2373, |
|
"eval_samples_per_second": 159.648, |
|
"eval_steps_per_second": 4.99, |
|
"learning_rate": 0.0001, |
|
"step": 245088 |
|
}, |
|
{ |
|
"epoch": 74.1243961352657, |
|
"grad_norm": 2.5300405025482178, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7725, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 74.27536231884058, |
|
"grad_norm": 2.3629848957061768, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7776, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 74.42632850241546, |
|
"grad_norm": 2.146430253982544, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7783, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 74.57729468599034, |
|
"grad_norm": 2.4947521686553955, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7689, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 74.72826086956522, |
|
"grad_norm": 3.2016279697418213, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7917, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 74.8792270531401, |
|
"grad_norm": 1.4425787925720215, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7803, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7548131370328426, |
|
"eval_f1_macro": 0.41036641620698266, |
|
"eval_f1_micro": 0.7548131370328426, |
|
"eval_loss": 0.6989214420318604, |
|
"eval_runtime": 217.4759, |
|
"eval_samples_per_second": 162.409, |
|
"eval_steps_per_second": 5.076, |
|
"learning_rate": 0.0001, |
|
"step": 248400 |
|
}, |
|
{ |
|
"epoch": 75.03019323671498, |
|
"grad_norm": 2.0696792602539062, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7758, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 75.18115942028986, |
|
"grad_norm": 2.0373711585998535, |
|
"learning_rate": 0.0001, |
|
"loss": 0.768, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 75.33212560386474, |
|
"grad_norm": 1.7089965343475342, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7784, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 75.48309178743962, |
|
"grad_norm": 2.095013380050659, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7801, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 75.6340579710145, |
|
"grad_norm": 2.4704737663269043, |
|
"learning_rate": 0.0001, |
|
"loss": 0.791, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 75.78502415458937, |
|
"grad_norm": 2.9486844539642334, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7757, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 75.93599033816425, |
|
"grad_norm": 2.679515838623047, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7772, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7535673839184598, |
|
"eval_f1_macro": 0.40368077376831135, |
|
"eval_f1_micro": 0.7535673839184598, |
|
"eval_loss": 0.6996731758117676, |
|
"eval_runtime": 220.8571, |
|
"eval_samples_per_second": 159.922, |
|
"eval_steps_per_second": 4.999, |
|
"learning_rate": 0.0001, |
|
"step": 251712 |
|
}, |
|
{ |
|
"epoch": 76.08695652173913, |
|
"grad_norm": 2.108654022216797, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7759, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 76.237922705314, |
|
"grad_norm": 2.2799570560455322, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7589, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 76.38888888888889, |
|
"grad_norm": 1.7830220460891724, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7727, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 76.53985507246377, |
|
"grad_norm": 2.1638951301574707, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7691, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 76.69082125603865, |
|
"grad_norm": 2.263577938079834, |
|
"learning_rate": 0.0001, |
|
"loss": 0.778, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 76.84178743961353, |
|
"grad_norm": 2.4441773891448975, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7863, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 76.9927536231884, |
|
"grad_norm": 2.222770929336548, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7813, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.7534824462061155, |
|
"eval_f1_macro": 0.409233768348257, |
|
"eval_f1_micro": 0.7534824462061155, |
|
"eval_loss": 0.6985901594161987, |
|
"eval_runtime": 214.6867, |
|
"eval_samples_per_second": 164.519, |
|
"eval_steps_per_second": 5.142, |
|
"learning_rate": 0.0001, |
|
"step": 255024 |
|
}, |
|
{ |
|
"epoch": 77.14371980676329, |
|
"grad_norm": 2.7884366512298584, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7578, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 77.29468599033817, |
|
"grad_norm": 1.6742238998413086, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7796, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 77.44565217391305, |
|
"grad_norm": 2.1514461040496826, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7785, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 77.59661835748793, |
|
"grad_norm": 2.1306777000427246, |
|
"learning_rate": 0.0001, |
|
"loss": 0.761, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 77.7475845410628, |
|
"grad_norm": 2.3739430904388428, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7854, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 77.89855072463769, |
|
"grad_norm": 2.6595206260681152, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7938, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.7529728199320498, |
|
"eval_f1_macro": 0.4084385355970208, |
|
"eval_f1_micro": 0.7529728199320498, |
|
"eval_loss": 0.6982055306434631, |
|
"eval_runtime": 212.4703, |
|
"eval_samples_per_second": 166.235, |
|
"eval_steps_per_second": 5.196, |
|
"learning_rate": 0.0001, |
|
"step": 258336 |
|
}, |
|
{ |
|
"epoch": 78.04951690821257, |
|
"grad_norm": 2.7604596614837646, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7834, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 78.20048309178743, |
|
"grad_norm": 2.867987632751465, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7736, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 78.35144927536231, |
|
"grad_norm": 3.00175404548645, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7743, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 78.5024154589372, |
|
"grad_norm": 2.370760679244995, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7804, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 78.65338164251207, |
|
"grad_norm": 3.652926206588745, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7884, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 78.80434782608695, |
|
"grad_norm": 2.2053353786468506, |
|
"learning_rate": 0.0001, |
|
"loss": 0.7668, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 78.95531400966183, |
|
"grad_norm": 2.3484444618225098, |
|
"learning_rate": 0.0001, |
|
"loss": 0.776, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7545300113250283, |
|
"eval_f1_macro": 0.40554177789578655, |
|
"eval_f1_micro": 0.7545300113250283, |
|
"eval_loss": 0.6958198547363281, |
|
"eval_runtime": 216.388, |
|
"eval_samples_per_second": 163.225, |
|
"eval_steps_per_second": 5.102, |
|
"learning_rate": 0.0001, |
|
"step": 261648 |
|
}, |
|
{ |
|
"epoch": 79.10628019323671, |
|
"grad_norm": 2.6035516262054443, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7645, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 79.2572463768116, |
|
"grad_norm": 2.6548285484313965, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7838, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 79.40821256038647, |
|
"grad_norm": 2.1750309467315674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7733, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 79.55917874396135, |
|
"grad_norm": 2.2618377208709717, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7674, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 79.71014492753623, |
|
"grad_norm": 2.5647144317626953, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7596, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 79.86111111111111, |
|
"grad_norm": 2.199589967727661, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7613, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7548131370328426, |
|
"eval_f1_macro": 0.4061063082083746, |
|
"eval_f1_micro": 0.7548131370328426, |
|
"eval_loss": 0.6934341788291931, |
|
"eval_runtime": 219.4797, |
|
"eval_samples_per_second": 160.926, |
|
"eval_steps_per_second": 5.03, |
|
"learning_rate": 1e-05, |
|
"step": 264960 |
|
}, |
|
{ |
|
"epoch": 80.012077294686, |
|
"grad_norm": 2.2339282035827637, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7649, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 80.16304347826087, |
|
"grad_norm": 1.8621363639831543, |
|
"learning_rate": 1e-05, |
|
"loss": 0.777, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 80.31400966183575, |
|
"grad_norm": 2.7088050842285156, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7606, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 80.46497584541063, |
|
"grad_norm": 3.5997519493103027, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7645, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 80.6159420289855, |
|
"grad_norm": 2.1306052207946777, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7522, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 80.76690821256038, |
|
"grad_norm": 2.9289278984069824, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7785, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 80.91787439613526, |
|
"grad_norm": 2.9643545150756836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7647, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.756030577576444, |
|
"eval_f1_macro": 0.41080502241376626, |
|
"eval_f1_micro": 0.756030577576444, |
|
"eval_loss": 0.6922488212585449, |
|
"eval_runtime": 223.7966, |
|
"eval_samples_per_second": 157.822, |
|
"eval_steps_per_second": 4.933, |
|
"learning_rate": 1e-05, |
|
"step": 268272 |
|
}, |
|
{ |
|
"epoch": 81.06884057971014, |
|
"grad_norm": 2.629472494125366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7544, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 81.21980676328502, |
|
"grad_norm": 2.5886995792388916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7457, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 81.3707729468599, |
|
"grad_norm": 2.5300049781799316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7647, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 81.52173913043478, |
|
"grad_norm": 2.208451509475708, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7606, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 81.67270531400966, |
|
"grad_norm": 1.6408979892730713, |
|
"learning_rate": 1e-05, |
|
"loss": 0.764, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 81.82367149758454, |
|
"grad_norm": 2.4585461616516113, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7709, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 81.97463768115942, |
|
"grad_norm": 2.8251771926879883, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7842, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.7543318233295583, |
|
"eval_f1_macro": 0.4068674463917489, |
|
"eval_f1_micro": 0.7543318233295583, |
|
"eval_loss": 0.6933410167694092, |
|
"eval_runtime": 221.2843, |
|
"eval_samples_per_second": 159.614, |
|
"eval_steps_per_second": 4.989, |
|
"learning_rate": 1e-05, |
|
"step": 271584 |
|
}, |
|
{ |
|
"epoch": 82.1256038647343, |
|
"grad_norm": 1.0586947202682495, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7631, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 82.27657004830918, |
|
"grad_norm": 1.943528413772583, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7757, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 82.42753623188406, |
|
"grad_norm": 2.1721386909484863, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7679, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 82.57850241545894, |
|
"grad_norm": 1.7928928136825562, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7722, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 82.72946859903382, |
|
"grad_norm": 2.544050931930542, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7353, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 82.8804347826087, |
|
"grad_norm": 3.048557758331299, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7689, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.753510758776897, |
|
"eval_f1_macro": 0.4067501722537251, |
|
"eval_f1_micro": 0.753510758776897, |
|
"eval_loss": 0.6952932476997375, |
|
"eval_runtime": 222.2091, |
|
"eval_samples_per_second": 158.949, |
|
"eval_steps_per_second": 4.968, |
|
"learning_rate": 1e-05, |
|
"step": 274896 |
|
}, |
|
{ |
|
"epoch": 83.03140096618357, |
|
"grad_norm": 2.39086651802063, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7612, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 83.18236714975845, |
|
"grad_norm": 2.1913161277770996, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7481, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 83.33333333333333, |
|
"grad_norm": 2.3297812938690186, |
|
"learning_rate": 1e-05, |
|
"loss": 0.748, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 83.48429951690821, |
|
"grad_norm": 2.2055962085723877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7677, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 83.63526570048309, |
|
"grad_norm": 2.183980941772461, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7714, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 83.78623188405797, |
|
"grad_norm": 2.8484814167022705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7712, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 83.93719806763285, |
|
"grad_norm": 2.133021593093872, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7674, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7570215175537939, |
|
"eval_f1_macro": 0.41404658203548567, |
|
"eval_f1_micro": 0.7570215175537939, |
|
"eval_loss": 0.6912673115730286, |
|
"eval_runtime": 229.7218, |
|
"eval_samples_per_second": 153.751, |
|
"eval_steps_per_second": 4.806, |
|
"learning_rate": 1e-05, |
|
"step": 278208 |
|
}, |
|
{ |
|
"epoch": 84.08816425120773, |
|
"grad_norm": 2.4487361907958984, |
|
"learning_rate": 1e-05, |
|
"loss": 0.767, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 84.23913043478261, |
|
"grad_norm": 1.9755173921585083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7651, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 84.39009661835749, |
|
"grad_norm": 2.915226459503174, |
|
"learning_rate": 1e-05, |
|
"loss": 0.765, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 84.54106280193237, |
|
"grad_norm": 2.0337023735046387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7546, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 84.69202898550725, |
|
"grad_norm": 2.662229299545288, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7593, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 84.84299516908213, |
|
"grad_norm": 2.855032444000244, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7635, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 84.99396135265701, |
|
"grad_norm": 2.1355550289154053, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7607, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.7564269535673839, |
|
"eval_f1_macro": 0.4116923602189749, |
|
"eval_f1_micro": 0.7564269535673839, |
|
"eval_loss": 0.6910523772239685, |
|
"eval_runtime": 217.5309, |
|
"eval_samples_per_second": 162.368, |
|
"eval_steps_per_second": 5.075, |
|
"learning_rate": 1e-05, |
|
"step": 281520 |
|
}, |
|
{ |
|
"epoch": 85.14492753623189, |
|
"grad_norm": 2.5106825828552246, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7671, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 85.29589371980677, |
|
"grad_norm": 1.8243615627288818, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7562, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 85.44685990338164, |
|
"grad_norm": 2.3742074966430664, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7686, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 85.59782608695652, |
|
"grad_norm": 2.150203227996826, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7543, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 85.7487922705314, |
|
"grad_norm": 3.4924771785736084, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7615, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 85.89975845410628, |
|
"grad_norm": 2.538518190383911, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7744, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.7563420158550397, |
|
"eval_f1_macro": 0.4128340042693249, |
|
"eval_f1_micro": 0.7563420158550397, |
|
"eval_loss": 0.6916475296020508, |
|
"eval_runtime": 222.1239, |
|
"eval_samples_per_second": 159.01, |
|
"eval_steps_per_second": 4.97, |
|
"learning_rate": 1e-05, |
|
"step": 284832 |
|
}, |
|
{ |
|
"epoch": 86.05072463768116, |
|
"grad_norm": 3.2334766387939453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7476, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 86.20169082125604, |
|
"grad_norm": 2.5590665340423584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7714, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 86.35265700483092, |
|
"grad_norm": 3.104130744934082, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7657, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 86.5036231884058, |
|
"grad_norm": 2.214571714401245, |
|
"learning_rate": 1e-05, |
|
"loss": 0.761, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 86.65458937198068, |
|
"grad_norm": 2.1867547035217285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7566, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 86.80555555555556, |
|
"grad_norm": 1.6422173976898193, |
|
"learning_rate": 1e-05, |
|
"loss": 0.769, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 86.95652173913044, |
|
"grad_norm": 1.9688115119934082, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7639, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.7549546998867497, |
|
"eval_f1_macro": 0.40886676984680725, |
|
"eval_f1_micro": 0.7549546998867497, |
|
"eval_loss": 0.6928644180297852, |
|
"eval_runtime": 208.4118, |
|
"eval_samples_per_second": 169.472, |
|
"eval_steps_per_second": 5.297, |
|
"learning_rate": 1e-05, |
|
"step": 288144 |
|
}, |
|
{ |
|
"epoch": 87.10748792270532, |
|
"grad_norm": 2.8994855880737305, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7707, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 87.2584541062802, |
|
"grad_norm": 2.275305986404419, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7694, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 87.40942028985508, |
|
"grad_norm": 3.0227978229522705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7594, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 87.56038647342996, |
|
"grad_norm": 3.3725409507751465, |
|
"learning_rate": 1e-05, |
|
"loss": 0.766, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 87.71135265700484, |
|
"grad_norm": 2.8421378135681152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7562, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 87.8623188405797, |
|
"grad_norm": 2.772217035293579, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7515, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7565402038505096, |
|
"eval_f1_macro": 0.420962036953915, |
|
"eval_f1_micro": 0.7565402038505096, |
|
"eval_loss": 0.690355122089386, |
|
"eval_runtime": 208.3912, |
|
"eval_samples_per_second": 169.489, |
|
"eval_steps_per_second": 5.298, |
|
"learning_rate": 1e-05, |
|
"step": 291456 |
|
}, |
|
{ |
|
"epoch": 88.01328502415458, |
|
"grad_norm": 2.544933795928955, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7618, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 88.16425120772946, |
|
"grad_norm": 2.5325067043304443, |
|
"learning_rate": 1e-05, |
|
"loss": 0.77, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 88.31521739130434, |
|
"grad_norm": 2.440011739730835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7731, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 88.46618357487922, |
|
"grad_norm": 2.3057007789611816, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7648, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 88.6171497584541, |
|
"grad_norm": 2.1060709953308105, |
|
"learning_rate": 1e-05, |
|
"loss": 0.764, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 88.76811594202898, |
|
"grad_norm": 1.846860408782959, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7556, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 88.91908212560386, |
|
"grad_norm": 2.313117504119873, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7529, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.7553793884484711, |
|
"eval_f1_macro": 0.4082321179284884, |
|
"eval_f1_micro": 0.7553793884484711, |
|
"eval_loss": 0.6912076473236084, |
|
"eval_runtime": 208.3205, |
|
"eval_samples_per_second": 169.546, |
|
"eval_steps_per_second": 5.3, |
|
"learning_rate": 1e-05, |
|
"step": 294768 |
|
}, |
|
{ |
|
"epoch": 89.07004830917874, |
|
"grad_norm": 1.8952932357788086, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7531, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 89.22101449275362, |
|
"grad_norm": 2.8988664150238037, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7589, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 89.3719806763285, |
|
"grad_norm": 2.967485189437866, |
|
"learning_rate": 1e-05, |
|
"loss": 0.754, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 89.52294685990339, |
|
"grad_norm": 3.6572604179382324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7601, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 89.67391304347827, |
|
"grad_norm": 2.4497523307800293, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7637, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 89.82487922705315, |
|
"grad_norm": 2.146918296813965, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7659, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 89.97584541062803, |
|
"grad_norm": 2.3516407012939453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7575, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.7557191392978483, |
|
"eval_f1_macro": 0.41024391615144784, |
|
"eval_f1_micro": 0.7557191392978483, |
|
"eval_loss": 0.6930990815162659, |
|
"eval_runtime": 208.0967, |
|
"eval_samples_per_second": 169.729, |
|
"eval_steps_per_second": 5.305, |
|
"learning_rate": 1e-05, |
|
"step": 298080 |
|
}, |
|
{ |
|
"epoch": 90.1268115942029, |
|
"grad_norm": 2.572826385498047, |
|
"learning_rate": 1e-05, |
|
"loss": 0.76, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 90.27777777777777, |
|
"grad_norm": 2.2904913425445557, |
|
"learning_rate": 1e-05, |
|
"loss": 0.745, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 90.42874396135265, |
|
"grad_norm": 2.661425828933716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7558, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 90.57971014492753, |
|
"grad_norm": 2.6880910396575928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7649, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 90.73067632850241, |
|
"grad_norm": 2.799816846847534, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7575, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 90.88164251207729, |
|
"grad_norm": 2.5382304191589355, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7715, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.7554643261608154, |
|
"eval_f1_macro": 0.4130015915297761, |
|
"eval_f1_micro": 0.7554643261608154, |
|
"eval_loss": 0.6912401914596558, |
|
"eval_runtime": 214.0252, |
|
"eval_samples_per_second": 165.027, |
|
"eval_steps_per_second": 5.158, |
|
"learning_rate": 1e-05, |
|
"step": 301392 |
|
}, |
|
{ |
|
"epoch": 91.03260869565217, |
|
"grad_norm": 2.888077974319458, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7584, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 91.18357487922705, |
|
"grad_norm": 2.5411763191223145, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7651, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 91.33454106280193, |
|
"grad_norm": 2.0645570755004883, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7457, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 91.48550724637681, |
|
"grad_norm": 2.1666808128356934, |
|
"learning_rate": 1e-05, |
|
"loss": 0.766, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 91.63647342995169, |
|
"grad_norm": 2.985408306121826, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7511, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 91.78743961352657, |
|
"grad_norm": 2.2410295009613037, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7773, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 91.93840579710145, |
|
"grad_norm": 2.231506824493408, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7512, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7533691959229898, |
|
"eval_f1_macro": 0.4113121570400507, |
|
"eval_f1_micro": 0.7533691959229898, |
|
"eval_loss": 0.6949923634529114, |
|
"eval_runtime": 210.6356, |
|
"eval_samples_per_second": 167.683, |
|
"eval_steps_per_second": 5.241, |
|
"learning_rate": 1e-05, |
|
"step": 304704 |
|
}, |
|
{ |
|
"epoch": 92.08937198067633, |
|
"grad_norm": 2.164560317993164, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7597, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 92.24033816425121, |
|
"grad_norm": 2.2248928546905518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7517, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 92.3913043478261, |
|
"grad_norm": 2.246734142303467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.762, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 92.54227053140096, |
|
"grad_norm": 2.577650547027588, |
|
"learning_rate": 1e-05, |
|
"loss": 0.761, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 92.69323671497584, |
|
"grad_norm": 2.056898593902588, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7591, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 92.84420289855072, |
|
"grad_norm": 2.593987464904785, |
|
"learning_rate": 1e-05, |
|
"loss": 0.765, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 92.9951690821256, |
|
"grad_norm": 3.157461166381836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7514, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.7538788221970555, |
|
"eval_f1_macro": 0.4075305019994295, |
|
"eval_f1_micro": 0.7538788221970555, |
|
"eval_loss": 0.6944717764854431, |
|
"eval_runtime": 210.8871, |
|
"eval_samples_per_second": 167.483, |
|
"eval_steps_per_second": 5.235, |
|
"learning_rate": 1e-05, |
|
"step": 308016 |
|
}, |
|
{ |
|
"epoch": 93.14613526570048, |
|
"grad_norm": 1.5309593677520752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7661, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 93.29710144927536, |
|
"grad_norm": 2.500260829925537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7463, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 93.44806763285024, |
|
"grad_norm": 2.891855478286743, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7534, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 93.59903381642512, |
|
"grad_norm": 2.2749195098876953, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7828, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 93.75, |
|
"grad_norm": 2.692596435546875, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7421, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 93.90096618357488, |
|
"grad_norm": 3.1196775436401367, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7529, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.7563986409966025, |
|
"eval_f1_macro": 0.4139533419143493, |
|
"eval_f1_micro": 0.7563986409966025, |
|
"eval_loss": 0.6904072761535645, |
|
"eval_runtime": 215.9645, |
|
"eval_samples_per_second": 163.545, |
|
"eval_steps_per_second": 5.112, |
|
"learning_rate": 1e-05, |
|
"step": 311328 |
|
}, |
|
{ |
|
"epoch": 94.05193236714976, |
|
"grad_norm": 2.5435197353363037, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7618, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 94.20289855072464, |
|
"grad_norm": 2.673288583755493, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7617, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 94.35386473429952, |
|
"grad_norm": 2.2841262817382812, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7706, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 94.5048309178744, |
|
"grad_norm": 2.476738691329956, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7537, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 94.65579710144928, |
|
"grad_norm": 2.4850478172302246, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7445, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 94.80676328502416, |
|
"grad_norm": 2.6494836807250977, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7557, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 94.95772946859904, |
|
"grad_norm": 2.6753244400024414, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7731, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.7555209513023783, |
|
"eval_f1_macro": 0.4120916882954865, |
|
"eval_f1_micro": 0.7555209513023783, |
|
"eval_loss": 0.6918609142303467, |
|
"eval_runtime": 214.6022, |
|
"eval_samples_per_second": 164.584, |
|
"eval_steps_per_second": 5.144, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 314640 |
|
}, |
|
{ |
|
"epoch": 95.1086956521739, |
|
"grad_norm": 2.624093770980835, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7365, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 95.25966183574879, |
|
"grad_norm": 2.516775131225586, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7459, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 95.41062801932367, |
|
"grad_norm": 2.3330860137939453, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.769, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 95.56159420289855, |
|
"grad_norm": 2.966632843017578, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7718, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 95.71256038647343, |
|
"grad_norm": 2.455122709274292, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7692, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 95.86352657004831, |
|
"grad_norm": 3.1468164920806885, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7561, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.7563137032842582, |
|
"eval_f1_macro": 0.40922720296834614, |
|
"eval_f1_micro": 0.7563137032842582, |
|
"eval_loss": 0.6894093155860901, |
|
"eval_runtime": 229.4488, |
|
"eval_samples_per_second": 153.934, |
|
"eval_steps_per_second": 4.812, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 317952 |
|
}, |
|
{ |
|
"epoch": 96.01449275362319, |
|
"grad_norm": 2.4865055084228516, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7446, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 96.16545893719807, |
|
"grad_norm": 1.840039849281311, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7434, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 96.31642512077295, |
|
"grad_norm": 2.1709177494049072, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7645, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 96.46739130434783, |
|
"grad_norm": 3.2466864585876465, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7593, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 96.61835748792271, |
|
"grad_norm": 2.8996708393096924, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7538, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 96.76932367149759, |
|
"grad_norm": 2.626403331756592, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7483, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 96.92028985507247, |
|
"grad_norm": 2.0331873893737793, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7702, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.7565402038505096, |
|
"eval_f1_macro": 0.41307852034337844, |
|
"eval_f1_micro": 0.7565402038505096, |
|
"eval_loss": 0.6900209188461304, |
|
"eval_runtime": 220.7443, |
|
"eval_samples_per_second": 160.004, |
|
"eval_steps_per_second": 5.001, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 321264 |
|
}, |
|
{ |
|
"epoch": 97.07125603864735, |
|
"grad_norm": 2.5619101524353027, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7596, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 97.22222222222223, |
|
"grad_norm": 3.2552084922790527, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7705, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 97.3731884057971, |
|
"grad_norm": 1.9496020078659058, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7557, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 97.52415458937197, |
|
"grad_norm": 2.925794839859009, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7529, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 97.67512077294685, |
|
"grad_norm": 2.24346923828125, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7639, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 97.82608695652173, |
|
"grad_norm": 2.7352709770202637, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7536, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 97.97705314009661, |
|
"grad_norm": 3.0642807483673096, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7506, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.7566251415628539, |
|
"eval_f1_macro": 0.41356661114123644, |
|
"eval_f1_micro": 0.7566251415628539, |
|
"eval_loss": 0.6900044083595276, |
|
"eval_runtime": 222.6489, |
|
"eval_samples_per_second": 158.635, |
|
"eval_steps_per_second": 4.958, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 324576 |
|
}, |
|
{ |
|
"epoch": 98.1280193236715, |
|
"grad_norm": 3.5656516551971436, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7526, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 98.27898550724638, |
|
"grad_norm": 2.4275550842285156, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7676, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 98.42995169082126, |
|
"grad_norm": 3.6914291381835938, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7595, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 98.58091787439614, |
|
"grad_norm": 2.8449277877807617, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7539, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 98.73188405797102, |
|
"grad_norm": 2.5142879486083984, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7521, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 98.8828502415459, |
|
"grad_norm": 2.5960116386413574, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7512, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.7563703284258211, |
|
"eval_f1_macro": 0.4168375246168502, |
|
"eval_f1_micro": 0.7563703284258211, |
|
"eval_loss": 0.690902054309845, |
|
"eval_runtime": 227.3589, |
|
"eval_samples_per_second": 155.349, |
|
"eval_steps_per_second": 4.856, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 327888 |
|
}, |
|
{ |
|
"epoch": 99.03381642512078, |
|
"grad_norm": 2.297607183456421, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7571, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 99.18478260869566, |
|
"grad_norm": 2.8327066898345947, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7482, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 99.33574879227054, |
|
"grad_norm": 2.6676270961761475, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7582, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 99.48671497584542, |
|
"grad_norm": 3.0485763549804688, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7451, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 99.6376811594203, |
|
"grad_norm": 4.068350315093994, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.76, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 99.78864734299516, |
|
"grad_norm": 2.083367109298706, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7615, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 99.93961352657004, |
|
"grad_norm": 2.314098834991455, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7694, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.7561721404303511, |
|
"eval_f1_macro": 0.41553616915095626, |
|
"eval_f1_micro": 0.7561721404303511, |
|
"eval_loss": 0.6912137866020203, |
|
"eval_runtime": 232.1679, |
|
"eval_samples_per_second": 152.131, |
|
"eval_steps_per_second": 4.755, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 331200 |
|
}, |
|
{ |
|
"epoch": 100.09057971014492, |
|
"grad_norm": 2.8212807178497314, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7507, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 100.2415458937198, |
|
"grad_norm": 3.2656657695770264, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7764, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 100.39251207729468, |
|
"grad_norm": 3.15940523147583, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7538, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 100.54347826086956, |
|
"grad_norm": 2.2110767364501953, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7664, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 100.69444444444444, |
|
"grad_norm": 2.5000598430633545, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7593, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 100.84541062801932, |
|
"grad_norm": 2.8894083499908447, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7428, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 100.9963768115942, |
|
"grad_norm": 2.6859490871429443, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7487, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.755039637599094, |
|
"eval_f1_macro": 0.4158180260828054, |
|
"eval_f1_micro": 0.755039637599094, |
|
"eval_loss": 0.6903811097145081, |
|
"eval_runtime": 232.4252, |
|
"eval_samples_per_second": 151.963, |
|
"eval_steps_per_second": 4.75, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 334512 |
|
}, |
|
{ |
|
"epoch": 101.14734299516908, |
|
"grad_norm": 1.655178427696228, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7546, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 101.29830917874396, |
|
"grad_norm": 1.9348598718643188, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7558, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 101.44927536231884, |
|
"grad_norm": 2.7666141986846924, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7568, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 101.60024154589372, |
|
"grad_norm": 3.338181972503662, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7692, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 101.7512077294686, |
|
"grad_norm": 2.86478328704834, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7567, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 101.90217391304348, |
|
"grad_norm": 2.3418822288513184, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7543, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.7569648924122311, |
|
"eval_f1_macro": 0.41747069582135204, |
|
"eval_f1_micro": 0.7569648924122311, |
|
"eval_loss": 0.6890261173248291, |
|
"eval_runtime": 227.6405, |
|
"eval_samples_per_second": 155.157, |
|
"eval_steps_per_second": 4.85, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 337824 |
|
}, |
|
{ |
|
"epoch": 102.05314009661836, |
|
"grad_norm": 2.9169716835021973, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7571, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 102.20410628019323, |
|
"grad_norm": 1.8482954502105713, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7542, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 102.35507246376811, |
|
"grad_norm": 2.9988529682159424, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7634, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 102.50603864734299, |
|
"grad_norm": 2.1122984886169434, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7483, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 102.65700483091787, |
|
"grad_norm": 3.089597225189209, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7557, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 102.80797101449275, |
|
"grad_norm": 2.3444504737854004, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7717, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 102.95893719806763, |
|
"grad_norm": 2.9209277629852295, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7743, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.7545583238958098, |
|
"eval_f1_macro": 0.413729218203792, |
|
"eval_f1_micro": 0.7545583238958098, |
|
"eval_loss": 0.6923326253890991, |
|
"eval_runtime": 232.6734, |
|
"eval_samples_per_second": 151.801, |
|
"eval_steps_per_second": 4.745, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 341136 |
|
}, |
|
{ |
|
"epoch": 103.10990338164251, |
|
"grad_norm": 2.1903257369995117, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7563, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 103.26086956521739, |
|
"grad_norm": 2.3659310340881348, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7559, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 103.41183574879227, |
|
"grad_norm": 2.9739909172058105, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7584, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 103.56280193236715, |
|
"grad_norm": 3.081482172012329, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7514, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 103.71376811594203, |
|
"grad_norm": 2.4748103618621826, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.761, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 103.86473429951691, |
|
"grad_norm": 2.350468397140503, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.757, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.7560022650056625, |
|
"eval_f1_macro": 0.418257195239873, |
|
"eval_f1_micro": 0.7560022650056625, |
|
"eval_loss": 0.6911641359329224, |
|
"eval_runtime": 231.0753, |
|
"eval_samples_per_second": 152.851, |
|
"eval_steps_per_second": 4.778, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 344448 |
|
}, |
|
{ |
|
"epoch": 104.01570048309179, |
|
"grad_norm": 2.1691606044769287, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7642, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 104.16666666666667, |
|
"grad_norm": 2.2919552326202393, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7595, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 104.31763285024155, |
|
"grad_norm": 2.3565289974212646, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.758, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 104.46859903381643, |
|
"grad_norm": 2.844158887863159, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7571, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 104.6195652173913, |
|
"grad_norm": 2.3312838077545166, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7555, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 104.77053140096618, |
|
"grad_norm": 2.8080615997314453, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7479, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 104.92149758454106, |
|
"grad_norm": 2.749624252319336, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7631, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.7560588901472254, |
|
"eval_f1_macro": 0.40884936189500287, |
|
"eval_f1_micro": 0.7560588901472254, |
|
"eval_loss": 0.6899089217185974, |
|
"eval_runtime": 231.8214, |
|
"eval_samples_per_second": 152.359, |
|
"eval_steps_per_second": 4.762, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 347760 |
|
}, |
|
{ |
|
"epoch": 105.07246376811594, |
|
"grad_norm": 2.3580851554870605, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7609, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 105.22342995169082, |
|
"grad_norm": 2.7042460441589355, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7514, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 105.3743961352657, |
|
"grad_norm": 3.214773654937744, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7522, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 105.52536231884058, |
|
"grad_norm": 2.758049964904785, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7544, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 105.67632850241546, |
|
"grad_norm": 3.082301139831543, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7647, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 105.82729468599034, |
|
"grad_norm": 2.2984790802001953, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7708, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 105.97826086956522, |
|
"grad_norm": 2.3610072135925293, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.755, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.7555775764439411, |
|
"eval_f1_macro": 0.41022696199143, |
|
"eval_f1_micro": 0.7555775764439411, |
|
"eval_loss": 0.6911600828170776, |
|
"eval_runtime": 228.6277, |
|
"eval_samples_per_second": 154.487, |
|
"eval_steps_per_second": 4.829, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 351072 |
|
}, |
|
{ |
|
"epoch": 106.1292270531401, |
|
"grad_norm": 2.5288326740264893, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.757, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 106.28019323671498, |
|
"grad_norm": 2.042731285095215, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7561, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 106.43115942028986, |
|
"grad_norm": 2.8303983211517334, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7719, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 106.58212560386474, |
|
"grad_norm": 2.7303476333618164, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7533, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 106.73309178743962, |
|
"grad_norm": 2.3027992248535156, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7529, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 106.8840579710145, |
|
"grad_norm": 2.981818437576294, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7545, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.7573329558323896, |
|
"eval_f1_macro": 0.4107271190379426, |
|
"eval_f1_micro": 0.7573329558323896, |
|
"eval_loss": 0.6897767782211304, |
|
"eval_runtime": 233.0239, |
|
"eval_samples_per_second": 151.572, |
|
"eval_steps_per_second": 4.738, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 354384 |
|
}, |
|
{ |
|
"epoch": 107.03502415458937, |
|
"grad_norm": 1.8198853731155396, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7718, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 107.18599033816425, |
|
"grad_norm": 2.596832752227783, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7458, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 107.33695652173913, |
|
"grad_norm": 1.9544519186019897, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7692, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 107.487922705314, |
|
"grad_norm": 2.4098520278930664, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7672, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 107.63888888888889, |
|
"grad_norm": 1.8802995681762695, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7468, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 107.78985507246377, |
|
"grad_norm": 2.2621710300445557, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7574, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 107.94082125603865, |
|
"grad_norm": 2.034428358078003, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.7533, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.7538221970554927, |
|
"eval_f1_macro": 0.4113790924843219, |
|
"eval_f1_micro": 0.7538221970554927, |
|
"eval_loss": 0.6910150647163391, |
|
"eval_runtime": 233.9308, |
|
"eval_samples_per_second": 150.985, |
|
"eval_steps_per_second": 4.719, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 357696 |
|
}, |
|
{ |
|
"epoch": 108.09178743961353, |
|
"grad_norm": 2.0702438354492188, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7466, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 108.2427536231884, |
|
"grad_norm": 2.602733612060547, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7459, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 108.39371980676329, |
|
"grad_norm": 2.700967311859131, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7456, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 108.54468599033817, |
|
"grad_norm": 2.0624594688415527, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7654, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 108.69565217391305, |
|
"grad_norm": 2.51285719871521, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7489, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 108.84661835748793, |
|
"grad_norm": 2.6085429191589355, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7659, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 108.9975845410628, |
|
"grad_norm": 2.4694600105285645, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7725, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.7564552661381654, |
|
"eval_f1_macro": 0.41338387108756985, |
|
"eval_f1_micro": 0.7564552661381654, |
|
"eval_loss": 0.6899031400680542, |
|
"eval_runtime": 232.9956, |
|
"eval_samples_per_second": 151.591, |
|
"eval_steps_per_second": 4.738, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 361008 |
|
}, |
|
{ |
|
"epoch": 109.14855072463769, |
|
"grad_norm": 2.1322195529937744, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7608, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 109.29951690821257, |
|
"grad_norm": 2.610919713973999, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7643, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 109.45048309178743, |
|
"grad_norm": 2.5677592754364014, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7588, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 109.60144927536231, |
|
"grad_norm": 2.2580180168151855, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7459, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 109.7524154589372, |
|
"grad_norm": 2.9064154624938965, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7549, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 109.90338164251207, |
|
"grad_norm": 2.8899617195129395, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7544, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.7554643261608154, |
|
"eval_f1_macro": 0.41101621102420904, |
|
"eval_f1_micro": 0.7554643261608154, |
|
"eval_loss": 0.692249059677124, |
|
"eval_runtime": 228.5976, |
|
"eval_samples_per_second": 154.507, |
|
"eval_steps_per_second": 4.829, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 364320 |
|
}, |
|
{ |
|
"epoch": 110.05434782608695, |
|
"grad_norm": 3.057371139526367, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7625, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 110.20531400966183, |
|
"grad_norm": 2.688296318054199, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7573, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 110.35628019323671, |
|
"grad_norm": 3.2767984867095947, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.758, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 110.5072463768116, |
|
"grad_norm": 1.783074975013733, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7629, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 110.65821256038647, |
|
"grad_norm": 2.1098732948303223, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7553, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 110.80917874396135, |
|
"grad_norm": 2.3171918392181396, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7445, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 110.96014492753623, |
|
"grad_norm": 2.462982654571533, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.758, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.7559456398640997, |
|
"eval_f1_macro": 0.41414305686867453, |
|
"eval_f1_micro": 0.7559456398640997, |
|
"eval_loss": 0.690136194229126, |
|
"eval_runtime": 234.7142, |
|
"eval_samples_per_second": 150.481, |
|
"eval_steps_per_second": 4.704, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 367632 |
|
}, |
|
{ |
|
"epoch": 111.11111111111111, |
|
"grad_norm": 2.9526002407073975, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7563, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 111.262077294686, |
|
"grad_norm": 2.2364773750305176, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.757, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 111.41304347826087, |
|
"grad_norm": 2.5117523670196533, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7673, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 111.56400966183575, |
|
"grad_norm": 2.751983880996704, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7553, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 111.71497584541063, |
|
"grad_norm": 2.5421624183654785, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7561, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 111.8659420289855, |
|
"grad_norm": 2.4700706005096436, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.7674, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.756030577576444, |
|
"eval_f1_macro": 0.4127357640164436, |
|
"eval_f1_micro": 0.756030577576444, |
|
"eval_loss": 0.6902924180030823, |
|
"eval_runtime": 234.0028, |
|
"eval_samples_per_second": 150.938, |
|
"eval_steps_per_second": 4.718, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 370944 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 370944, |
|
"total_flos": 6.926864611971372e+20, |
|
"train_loss": 0.8085850866355923, |
|
"train_runtime": 128547.5283, |
|
"train_samples_per_second": 123.643, |
|
"train_steps_per_second": 3.865 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 496800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.926864611971372e+20, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|