{ "best_metric": 0.1206900030374527, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/DinoVdeau-giant-2024_08_28-batch-size32_epochs150_freeze/checkpoint-25935", "epoch": 105.0, "eval_steps": 500, "global_step": 28665, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.21205821205821207, "eval_f1_macro": 0.5175126673232894, "eval_f1_micro": 0.7424333879451582, "eval_loss": 0.17437300086021423, "eval_roc_auc": 0.8285535192873753, "eval_runtime": 747.1492, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 273 }, { "epoch": 1.8315018315018317, "grad_norm": 0.29891085624694824, "learning_rate": 0.001, "loss": 0.2593, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.24774774774774774, "eval_f1_macro": 0.5912510936495889, "eval_f1_micro": 0.7776526996039191, "eval_loss": 0.1514047533273697, "eval_roc_auc": 0.856455760350861, "eval_runtime": 745.2688, "eval_samples_per_second": 3.872, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 546 }, { "epoch": 3.0, "eval_accuracy": 0.23873873873873874, "eval_f1_macro": 0.6203462640123141, "eval_f1_micro": 0.7752795082305376, "eval_loss": 0.1557399332523346, "eval_roc_auc": 0.8580342914691714, "eval_runtime": 748.2805, "eval_samples_per_second": 3.857, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 819 }, { "epoch": 3.663003663003663, "grad_norm": 0.24181818962097168, "learning_rate": 0.001, "loss": 0.1694, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.2494802494802495, "eval_f1_macro": 0.6112936548561337, "eval_f1_micro": 0.7691087713115115, "eval_loss": 0.1499096304178238, "eval_roc_auc": 0.8372664798756062, "eval_runtime": 747.4138, "eval_samples_per_second": 3.861, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 1092 }, { "epoch": 5.0, "eval_accuracy": 0.24497574497574498, "eval_f1_macro": 0.6316545255681125, "eval_f1_micro": 0.7744962975718961, "eval_loss": 0.15773828327655792, "eval_roc_auc": 0.8461026726645842, "eval_runtime": 747.0386, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 1365 }, { "epoch": 5.4945054945054945, "grad_norm": 0.17729038000106812, "learning_rate": 0.001, "loss": 0.1637, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.25744975744975745, "eval_f1_macro": 0.6220908262048482, "eval_f1_micro": 0.7803354441211706, "eval_loss": 0.1529887616634369, "eval_roc_auc": 0.8508892919574323, "eval_runtime": 747.6468, "eval_samples_per_second": 3.86, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 1638 }, { "epoch": 7.0, "eval_accuracy": 0.2616077616077616, "eval_f1_macro": 0.6318272608971183, "eval_f1_micro": 0.7837652308220353, "eval_loss": 0.14232446253299713, "eval_roc_auc": 0.8519980061789139, "eval_runtime": 743.8547, "eval_samples_per_second": 3.88, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 1911 }, { "epoch": 7.326007326007326, "grad_norm": 0.21456240117549896, "learning_rate": 0.001, "loss": 0.1598, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.2591822591822592, "eval_f1_macro": 0.6268140575796306, "eval_f1_micro": 0.7824785045129828, "eval_loss": 0.14342056214809418, "eval_roc_auc": 0.8521029956678926, "eval_runtime": 745.5826, "eval_samples_per_second": 3.871, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 2184 }, { "epoch": 9.0, "eval_accuracy": 0.25848925848925847, "eval_f1_macro": 0.6406683603322132, "eval_f1_micro": 0.7840562521179261, "eval_loss": 0.14322087168693542, "eval_roc_auc": 0.8556312702614824, "eval_runtime": 746.6555, "eval_samples_per_second": 3.865, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 2457 }, { "epoch": 9.157509157509157, "grad_norm": 0.17193137109279633, "learning_rate": 0.001, "loss": 0.157, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.2591822591822592, "eval_f1_macro": 0.6350156993693012, "eval_f1_micro": 0.7779440239394473, "eval_loss": 0.15065954625606537, "eval_roc_auc": 0.8421810798397646, "eval_runtime": 749.2424, "eval_samples_per_second": 3.852, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 2730 }, { "epoch": 10.989010989010989, "grad_norm": 0.17156100273132324, "learning_rate": 0.001, "loss": 0.1564, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.26853776853776856, "eval_f1_macro": 0.6442254017268965, "eval_f1_micro": 0.7905542412977358, "eval_loss": 0.14012028276920319, "eval_roc_auc": 0.8599228950325096, "eval_runtime": 743.9581, "eval_samples_per_second": 3.879, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 3003 }, { "epoch": 12.0, "eval_accuracy": 0.26056826056826055, "eval_f1_macro": 0.6412994039301575, "eval_f1_micro": 0.7896027049873203, "eval_loss": 0.14037516713142395, "eval_roc_auc": 0.8592624552114599, "eval_runtime": 747.0487, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 3276 }, { "epoch": 12.820512820512821, "grad_norm": 0.14995847642421722, "learning_rate": 0.001, "loss": 0.1556, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.2695772695772696, "eval_f1_macro": 0.6359393136512833, "eval_f1_micro": 0.7822141560798549, "eval_loss": 0.1420680731534958, "eval_roc_auc": 0.8492469381754499, "eval_runtime": 742.4635, "eval_samples_per_second": 3.887, "eval_steps_per_second": 0.123, "learning_rate": 0.001, "step": 3549 }, { "epoch": 14.0, "eval_accuracy": 0.2636867636867637, "eval_f1_macro": 0.6459907944955716, "eval_f1_micro": 0.7887275978034142, "eval_loss": 0.13944004476070404, "eval_roc_auc": 0.8568078446879906, "eval_runtime": 744.9297, "eval_samples_per_second": 3.874, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 3822 }, { "epoch": 14.652014652014651, "grad_norm": 0.1688154637813568, "learning_rate": 0.001, "loss": 0.1547, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.2553707553707554, "eval_f1_macro": 0.6554204386045119, "eval_f1_micro": 0.7915315007683115, "eval_loss": 0.13796783983707428, "eval_roc_auc": 0.8575869560318454, "eval_runtime": 749.7594, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 4095 }, { "epoch": 16.0, "eval_accuracy": 0.255024255024255, "eval_f1_macro": 0.6452554527968026, "eval_f1_micro": 0.7857792404624779, "eval_loss": 0.1441228836774826, "eval_roc_auc": 0.8505811645074093, "eval_runtime": 751.9487, "eval_samples_per_second": 3.838, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 4368 }, { "epoch": 16.483516483516482, "grad_norm": 0.15101341903209686, "learning_rate": 0.001, "loss": 0.1539, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.26784476784476785, "eval_f1_macro": 0.6485416937632181, "eval_f1_micro": 0.7904489177124567, "eval_loss": 0.14113685488700867, "eval_roc_auc": 0.8607338640531657, "eval_runtime": 751.954, "eval_samples_per_second": 3.838, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 4641 }, { "epoch": 18.0, "eval_accuracy": 0.26056826056826055, "eval_f1_macro": 0.654854199500387, "eval_f1_micro": 0.7940517933336151, "eval_loss": 0.1381485015153885, "eval_roc_auc": 0.8618218271900107, "eval_runtime": 756.1006, "eval_samples_per_second": 3.817, "eval_steps_per_second": 0.12, "learning_rate": 0.001, "step": 4914 }, { "epoch": 18.315018315018314, "grad_norm": 0.17647762596607208, "learning_rate": 0.001, "loss": 0.1552, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.2654192654192654, "eval_f1_macro": 0.6522812524843972, "eval_f1_micro": 0.793669650812508, "eval_loss": 0.13720253109931946, "eval_roc_auc": 0.8604083523719281, "eval_runtime": 753.1197, "eval_samples_per_second": 3.832, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 5187 }, { "epoch": 20.0, "eval_accuracy": 0.253984753984754, "eval_f1_macro": 0.6515497507659908, "eval_f1_micro": 0.791502353390154, "eval_loss": 0.13964051008224487, "eval_roc_auc": 0.8593941380801585, "eval_runtime": 760.0428, "eval_samples_per_second": 3.797, "eval_steps_per_second": 0.12, "learning_rate": 0.001, "step": 5460 }, { "epoch": 20.146520146520146, "grad_norm": 0.15846939384937286, "learning_rate": 0.001, "loss": 0.1531, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.2577962577962578, "eval_f1_macro": 0.6542904488686327, "eval_f1_micro": 0.7925025501530093, "eval_loss": 0.13785456120967865, "eval_roc_auc": 0.8592903826569759, "eval_runtime": 757.5213, "eval_samples_per_second": 3.81, "eval_steps_per_second": 0.12, "learning_rate": 0.001, "step": 5733 }, { "epoch": 21.978021978021978, "grad_norm": 0.16983690857887268, "learning_rate": 0.001, "loss": 0.1536, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.2661122661122661, "eval_f1_macro": 0.6524154901292529, "eval_f1_micro": 0.7952276188864443, "eval_loss": 0.13633865118026733, "eval_roc_auc": 0.8620495257431491, "eval_runtime": 758.4735, "eval_samples_per_second": 3.805, "eval_steps_per_second": 0.12, "learning_rate": 0.001, "step": 6006 }, { "epoch": 23.0, "eval_accuracy": 0.27096327096327094, "eval_f1_macro": 0.656651787807274, "eval_f1_micro": 0.7961679924728424, "eval_loss": 0.13627886772155762, "eval_roc_auc": 0.8595478597543244, "eval_runtime": 753.7633, "eval_samples_per_second": 3.829, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 6279 }, { "epoch": 23.80952380952381, "grad_norm": 0.1691550612449646, "learning_rate": 0.001, "loss": 0.1535, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.2661122661122661, "eval_f1_macro": 0.6438900918479138, "eval_f1_micro": 0.7871861324722778, "eval_loss": 0.14012865722179413, "eval_roc_auc": 0.8565085837324373, "eval_runtime": 758.2383, "eval_samples_per_second": 3.806, "eval_steps_per_second": 0.12, "learning_rate": 0.001, "step": 6552 }, { "epoch": 25.0, "eval_accuracy": 0.27546777546777546, "eval_f1_macro": 0.6538094573584412, "eval_f1_micro": 0.7960565795113589, "eval_loss": 0.1359640210866928, "eval_roc_auc": 0.8588707063899927, "eval_runtime": 765.0178, "eval_samples_per_second": 3.772, "eval_steps_per_second": 0.119, "learning_rate": 0.001, "step": 6825 }, { "epoch": 25.641025641025642, "grad_norm": 0.14603881537914276, "learning_rate": 0.001, "loss": 0.153, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.2692307692307692, "eval_f1_macro": 0.6407905722004358, "eval_f1_micro": 0.7942222975262623, "eval_loss": 0.1370791494846344, "eval_roc_auc": 0.8611700794845683, "eval_runtime": 750.2435, "eval_samples_per_second": 3.847, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 7098 }, { "epoch": 27.0, "eval_accuracy": 0.2654192654192654, "eval_f1_macro": 0.6469565906332285, "eval_f1_micro": 0.7902460077686664, "eval_loss": 0.13669614493846893, "eval_roc_auc": 0.8538650806596136, "eval_runtime": 744.2164, "eval_samples_per_second": 3.878, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 7371 }, { "epoch": 27.47252747252747, "grad_norm": 0.1542704999446869, "learning_rate": 0.001, "loss": 0.1532, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.26888426888426886, "eval_f1_macro": 0.642689033704319, "eval_f1_micro": 0.7912144926283021, "eval_loss": 0.1371130496263504, "eval_roc_auc": 0.8539010295328042, "eval_runtime": 744.0106, "eval_samples_per_second": 3.879, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 7644 }, { "epoch": 29.0, "eval_accuracy": 0.2692307692307692, "eval_f1_macro": 0.6484600603294314, "eval_f1_micro": 0.7944120277694962, "eval_loss": 0.13781629502773285, "eval_roc_auc": 0.8597476308619466, "eval_runtime": 751.4281, "eval_samples_per_second": 3.841, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 7917 }, { "epoch": 29.304029304029303, "grad_norm": 0.15774671733379364, "learning_rate": 0.001, "loss": 0.1539, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.26507276507276506, "eval_f1_macro": 0.6472439075890195, "eval_f1_micro": 0.7938241064573914, "eval_loss": 0.13641151785850525, "eval_roc_auc": 0.8590391831986771, "eval_runtime": 743.8204, "eval_samples_per_second": 3.88, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 8190 }, { "epoch": 31.0, "eval_accuracy": 0.2747747747747748, "eval_f1_macro": 0.6533472550118105, "eval_f1_micro": 0.7999161777032691, "eval_loss": 0.13565559685230255, "eval_roc_auc": 0.8672849828142924, "eval_runtime": 745.046, "eval_samples_per_second": 3.874, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 8463 }, { "epoch": 31.135531135531135, "grad_norm": 0.15824691951274872, "learning_rate": 0.001, "loss": 0.1527, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.2664587664587665, "eval_f1_macro": 0.662032330499469, "eval_f1_micro": 0.7928646379853095, "eval_loss": 0.137930765748024, "eval_roc_auc": 0.8629893205019107, "eval_runtime": 747.6199, "eval_samples_per_second": 3.86, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 8736 }, { "epoch": 32.967032967032964, "grad_norm": 0.17653779685497284, "learning_rate": 0.001, "loss": 0.1524, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.273042273042273, "eval_f1_macro": 0.6722007856831675, "eval_f1_micro": 0.7989514185446704, "eval_loss": 0.13557712733745575, "eval_roc_auc": 0.8642597778252326, "eval_runtime": 743.3529, "eval_samples_per_second": 3.882, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 9009 }, { "epoch": 34.0, "eval_accuracy": 0.273042273042273, "eval_f1_macro": 0.670590685863264, "eval_f1_micro": 0.7966670917825107, "eval_loss": 0.1347290426492691, "eval_roc_auc": 0.8614922779674185, "eval_runtime": 743.2445, "eval_samples_per_second": 3.883, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 9282 }, { "epoch": 34.798534798534796, "grad_norm": 0.15610426664352417, "learning_rate": 0.001, "loss": 0.1516, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.2772002772002772, "eval_f1_macro": 0.6482708127714739, "eval_f1_micro": 0.7946646145953571, "eval_loss": 0.13544337451457977, "eval_roc_auc": 0.8588431142884431, "eval_runtime": 750.5786, "eval_samples_per_second": 3.845, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 9555 }, { "epoch": 36.0, "eval_accuracy": 0.25848925848925847, "eval_f1_macro": 0.6552995006011981, "eval_f1_micro": 0.7927604900328681, "eval_loss": 0.13763058185577393, "eval_roc_auc": 0.8582396561141522, "eval_runtime": 746.9319, "eval_samples_per_second": 3.864, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 9828 }, { "epoch": 36.63003663003663, "grad_norm": 0.176735520362854, "learning_rate": 0.001, "loss": 0.1527, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.2747747747747748, "eval_f1_macro": 0.6680976075122991, "eval_f1_micro": 0.7992204380799051, "eval_loss": 0.13456694781780243, "eval_roc_auc": 0.8638335422302681, "eval_runtime": 744.024, "eval_samples_per_second": 3.879, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 10101 }, { "epoch": 38.0, "eval_accuracy": 0.27165627165627165, "eval_f1_macro": 0.6543467314054483, "eval_f1_micro": 0.7889066758966815, "eval_loss": 0.13784632086753845, "eval_roc_auc": 0.8524819477636044, "eval_runtime": 745.3518, "eval_samples_per_second": 3.872, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 10374 }, { "epoch": 38.46153846153846, "grad_norm": 0.16059936583042145, "learning_rate": 0.001, "loss": 0.1503, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.2664587664587665, "eval_f1_macro": 0.6627442989440849, "eval_f1_micro": 0.7965357098029371, "eval_loss": 0.13671767711639404, "eval_roc_auc": 0.865910488378856, "eval_runtime": 745.9061, "eval_samples_per_second": 3.869, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 10647 }, { "epoch": 40.0, "eval_accuracy": 0.27373527373527373, "eval_f1_macro": 0.670153584497431, "eval_f1_micro": 0.8004978220286246, "eval_loss": 0.13730555772781372, "eval_roc_auc": 0.8705375510125241, "eval_runtime": 744.6796, "eval_samples_per_second": 3.875, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 10920 }, { "epoch": 40.29304029304029, "grad_norm": 0.16920654475688934, "learning_rate": 0.001, "loss": 0.152, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.26576576576576577, "eval_f1_macro": 0.6610276871242879, "eval_f1_micro": 0.7942296990711015, "eval_loss": 0.13770104944705963, "eval_roc_auc": 0.8582536198369102, "eval_runtime": 744.9969, "eval_samples_per_second": 3.874, "eval_steps_per_second": 0.122, "learning_rate": 0.001, "step": 11193 }, { "epoch": 42.0, "eval_accuracy": 0.28101178101178104, "eval_f1_macro": 0.6705886094654014, "eval_f1_micro": 0.8001525876319246, "eval_loss": 0.13536451756954193, "eval_roc_auc": 0.8642216961644161, "eval_runtime": 751.3727, "eval_samples_per_second": 3.841, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 11466 }, { "epoch": 42.124542124542124, "grad_norm": 0.1676277071237564, "learning_rate": 0.001, "loss": 0.1515, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.26507276507276506, "eval_f1_macro": 0.6619628883017729, "eval_f1_micro": 0.8000498525196295, "eval_loss": 0.13665379583835602, "eval_roc_auc": 0.8698817657657271, "eval_runtime": 749.8198, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.121, "learning_rate": 0.001, "step": 11739 }, { "epoch": 43.956043956043956, "grad_norm": 0.15791508555412292, "learning_rate": 0.0001, "loss": 0.147, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.2869022869022869, "eval_f1_macro": 0.6825865030851337, "eval_f1_micro": 0.808658516161447, "eval_loss": 0.12908011674880981, "eval_roc_auc": 0.8723907154255005, "eval_runtime": 750.2309, "eval_samples_per_second": 3.847, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 12012 }, { "epoch": 45.0, "eval_accuracy": 0.29972279972279975, "eval_f1_macro": 0.6938587241702103, "eval_f1_micro": 0.811512367788968, "eval_loss": 0.12761357426643372, "eval_roc_auc": 0.8720936945676423, "eval_runtime": 758.8984, "eval_samples_per_second": 3.803, "eval_steps_per_second": 0.12, "learning_rate": 0.0001, "step": 12285 }, { "epoch": 45.78754578754579, "grad_norm": 0.16074201464653015, "learning_rate": 0.0001, "loss": 0.139, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.2959112959112959, "eval_f1_macro": 0.6856377454961721, "eval_f1_micro": 0.8103163511624953, "eval_loss": 0.12698666751384735, "eval_roc_auc": 0.8699996458767716, "eval_runtime": 752.5715, "eval_samples_per_second": 3.835, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 12558 }, { "epoch": 47.0, "eval_accuracy": 0.2972972972972973, "eval_f1_macro": 0.6942647446672258, "eval_f1_micro": 0.8124920976103174, "eval_loss": 0.12690682709217072, "eval_roc_auc": 0.8725812846946867, "eval_runtime": 759.108, "eval_samples_per_second": 3.802, "eval_steps_per_second": 0.12, "learning_rate": 0.0001, "step": 12831 }, { "epoch": 47.61904761904762, "grad_norm": 0.17895784974098206, "learning_rate": 0.0001, "loss": 0.1375, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.29799029799029797, "eval_f1_macro": 0.694151320978192, "eval_f1_micro": 0.8131711409395973, "eval_loss": 0.12617328763008118, "eval_roc_auc": 0.8743386078020858, "eval_runtime": 767.7622, "eval_samples_per_second": 3.759, "eval_steps_per_second": 0.119, "learning_rate": 0.0001, "step": 13104 }, { "epoch": 49.0, "eval_accuracy": 0.2966042966042966, "eval_f1_macro": 0.6956458198072734, "eval_f1_micro": 0.8147346514047868, "eval_loss": 0.1263018250465393, "eval_roc_auc": 0.8774737921983433, "eval_runtime": 752.7691, "eval_samples_per_second": 3.834, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 13377 }, { "epoch": 49.45054945054945, "grad_norm": 0.22477330267429352, "learning_rate": 0.0001, "loss": 0.1353, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.2927927927927928, "eval_f1_macro": 0.7006577033751422, "eval_f1_micro": 0.8153475224476222, "eval_loss": 0.1258096992969513, "eval_roc_auc": 0.8781952512075065, "eval_runtime": 751.7275, "eval_samples_per_second": 3.839, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 13650 }, { "epoch": 51.0, "eval_accuracy": 0.2972972972972973, "eval_f1_macro": 0.6994505755010588, "eval_f1_micro": 0.8151571934207786, "eval_loss": 0.12573884427547455, "eval_roc_auc": 0.8775850056713371, "eval_runtime": 754.5773, "eval_samples_per_second": 3.825, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 13923 }, { "epoch": 51.282051282051285, "grad_norm": 0.1825592815876007, "learning_rate": 0.0001, "loss": 0.1337, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.2972972972972973, "eval_f1_macro": 0.6974514657531053, "eval_f1_micro": 0.8134649455833967, "eval_loss": 0.12501972913742065, "eval_roc_auc": 0.8728563740571469, "eval_runtime": 748.8299, "eval_samples_per_second": 3.854, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 14196 }, { "epoch": 53.0, "eval_accuracy": 0.2948717948717949, "eval_f1_macro": 0.6962280886309719, "eval_f1_micro": 0.8132960287301124, "eval_loss": 0.12481856346130371, "eval_roc_auc": 0.8757195542554345, "eval_runtime": 754.8846, "eval_samples_per_second": 3.823, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 14469 }, { "epoch": 53.11355311355312, "grad_norm": 0.16182786226272583, "learning_rate": 0.0001, "loss": 0.1338, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.30180180180180183, "eval_f1_macro": 0.6980743235485474, "eval_f1_micro": 0.8143470573377115, "eval_loss": 0.12473563104867935, "eval_roc_auc": 0.8739288040614714, "eval_runtime": 764.2531, "eval_samples_per_second": 3.776, "eval_steps_per_second": 0.119, "learning_rate": 0.0001, "step": 14742 }, { "epoch": 54.94505494505494, "grad_norm": 0.22775864601135254, "learning_rate": 0.0001, "loss": 0.1322, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.30076230076230076, "eval_f1_macro": 0.7020497284253308, "eval_f1_micro": 0.8165587111775452, "eval_loss": 0.12453257292509079, "eval_roc_auc": 0.8792131676966645, "eval_runtime": 758.6078, "eval_samples_per_second": 3.804, "eval_steps_per_second": 0.12, "learning_rate": 0.0001, "step": 15015 }, { "epoch": 56.0, "eval_accuracy": 0.3011088011088011, "eval_f1_macro": 0.7041152638460181, "eval_f1_micro": 0.8185497191939213, "eval_loss": 0.12440259009599686, "eval_roc_auc": 0.8819546448626913, "eval_runtime": 755.48, "eval_samples_per_second": 3.82, "eval_steps_per_second": 0.12, "learning_rate": 0.0001, "step": 15288 }, { "epoch": 56.776556776556774, "grad_norm": 0.26265445351600647, "learning_rate": 0.0001, "loss": 0.1313, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.3004158004158004, "eval_f1_macro": 0.6984123654445143, "eval_f1_micro": 0.8162207357859533, "eval_loss": 0.12393573671579361, "eval_roc_auc": 0.8770029692696153, "eval_runtime": 749.3127, "eval_samples_per_second": 3.852, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 15561 }, { "epoch": 58.0, "eval_accuracy": 0.30006930006930005, "eval_f1_macro": 0.7041206694443728, "eval_f1_micro": 0.8171478565179352, "eval_loss": 0.12355069816112518, "eval_roc_auc": 0.8785400518736873, "eval_runtime": 751.5939, "eval_samples_per_second": 3.84, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 15834 }, { "epoch": 58.608058608058606, "grad_norm": 0.19159354269504547, "learning_rate": 0.0001, "loss": 0.1309, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.701908769020469, "eval_f1_micro": 0.8158932617269447, "eval_loss": 0.1237163171172142, "eval_roc_auc": 0.8757623441455382, "eval_runtime": 749.4527, "eval_samples_per_second": 3.851, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 16107 }, { "epoch": 60.0, "eval_accuracy": 0.29902979902979904, "eval_f1_macro": 0.7008492179245241, "eval_f1_micro": 0.8152564590468943, "eval_loss": 0.12339853495359421, "eval_roc_auc": 0.8731348839280636, "eval_runtime": 748.7843, "eval_samples_per_second": 3.854, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 16380 }, { "epoch": 60.43956043956044, "grad_norm": 0.19487616419792175, "learning_rate": 0.0001, "loss": 0.13, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.3024948024948025, "eval_f1_macro": 0.7083200505706103, "eval_f1_micro": 0.8188720173535793, "eval_loss": 0.12294851988554001, "eval_roc_auc": 0.8791109816832443, "eval_runtime": 752.7718, "eval_samples_per_second": 3.834, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 16653 }, { "epoch": 62.0, "eval_accuracy": 0.30284130284130284, "eval_f1_macro": 0.7054890147149661, "eval_f1_micro": 0.8166017506386899, "eval_loss": 0.12270853668451309, "eval_roc_auc": 0.876682675540494, "eval_runtime": 746.0294, "eval_samples_per_second": 3.868, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 16926 }, { "epoch": 62.27106227106227, "grad_norm": 0.20640559494495392, "learning_rate": 0.0001, "loss": 0.1288, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.3038808038808039, "eval_f1_macro": 0.7105833307429198, "eval_f1_micro": 0.8176490288010717, "eval_loss": 0.12301415950059891, "eval_roc_auc": 0.8773957777780161, "eval_runtime": 748.1364, "eval_samples_per_second": 3.858, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 17199 }, { "epoch": 64.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.7085844813380441, "eval_f1_micro": 0.8191759178412541, "eval_loss": 0.12328237295150757, "eval_roc_auc": 0.880258676287372, "eval_runtime": 749.8061, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.121, "learning_rate": 0.0001, "step": 17472 }, { "epoch": 64.1025641025641, "grad_norm": 0.2363331913948059, "learning_rate": 0.0001, "loss": 0.1291, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.3049203049203049, "eval_f1_macro": 0.7103887558295827, "eval_f1_micro": 0.8187567612548888, "eval_loss": 0.12309526652097702, "eval_roc_auc": 0.8798153918051592, "eval_runtime": 745.0937, "eval_samples_per_second": 3.873, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 17745 }, { "epoch": 65.93406593406593, "grad_norm": 0.26966458559036255, "learning_rate": 0.0001, "loss": 0.1283, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.30284130284130284, "eval_f1_macro": 0.7061406642055487, "eval_f1_micro": 0.8186407442947141, "eval_loss": 0.12194398790597916, "eval_roc_auc": 0.8789458717279818, "eval_runtime": 744.2128, "eval_samples_per_second": 3.878, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 18018 }, { "epoch": 67.0, "eval_accuracy": 0.3042273042273042, "eval_f1_macro": 0.7154558287425048, "eval_f1_micro": 0.8196775527077305, "eval_loss": 0.12292120605707169, "eval_roc_auc": 0.8822622625898855, "eval_runtime": 743.6955, "eval_samples_per_second": 3.881, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 18291 }, { "epoch": 67.76556776556777, "grad_norm": 0.2636018991470337, "learning_rate": 0.0001, "loss": 0.1273, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.30803880803880807, "eval_f1_macro": 0.7153434473934246, "eval_f1_micro": 0.8209686046990085, "eval_loss": 0.12254418432712555, "eval_roc_auc": 0.8843888396454903, "eval_runtime": 743.6093, "eval_samples_per_second": 3.881, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 18564 }, { "epoch": 69.0, "eval_accuracy": 0.3031878031878032, "eval_f1_macro": 0.7101570111652898, "eval_f1_micro": 0.8195983668027664, "eval_loss": 0.12215162813663483, "eval_roc_auc": 0.87988510310888, "eval_runtime": 744.98, "eval_samples_per_second": 3.874, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 18837 }, { "epoch": 69.59706959706959, "grad_norm": 0.19965404272079468, "learning_rate": 0.0001, "loss": 0.1265, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.30838530838530837, "eval_f1_macro": 0.7109091736321397, "eval_f1_micro": 0.8184682603033231, "eval_loss": 0.12227334082126617, "eval_roc_auc": 0.8767948413903521, "eval_runtime": 744.4872, "eval_samples_per_second": 3.876, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 19110 }, { "epoch": 71.0, "eval_accuracy": 0.3076923076923077, "eval_f1_macro": 0.7120407268503043, "eval_f1_micro": 0.8170385739086251, "eval_loss": 0.12237659096717834, "eval_roc_auc": 0.8737123194105673, "eval_runtime": 747.0787, "eval_samples_per_second": 3.863, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 19383 }, { "epoch": 71.42857142857143, "grad_norm": 0.2734057903289795, "learning_rate": 0.0001, "loss": 0.1264, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.3063063063063063, "eval_f1_macro": 0.7203981522602361, "eval_f1_micro": 0.8203632727878687, "eval_loss": 0.1220996230840683, "eval_roc_auc": 0.8803336591982435, "eval_runtime": 742.9487, "eval_samples_per_second": 3.885, "eval_steps_per_second": 0.122, "learning_rate": 0.0001, "step": 19656 }, { "epoch": 73.0, "eval_accuracy": 0.3087318087318087, "eval_f1_macro": 0.7144193511981376, "eval_f1_micro": 0.8198457369189076, "eval_loss": 0.12169401347637177, "eval_roc_auc": 0.8798110725748728, "eval_runtime": 752.9878, "eval_samples_per_second": 3.833, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 19929 }, { "epoch": 73.26007326007326, "grad_norm": 0.20597431063652039, "learning_rate": 1e-05, "loss": 0.1249, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.30665280665280664, "eval_f1_macro": 0.7124121424308173, "eval_f1_micro": 0.8190452070406484, "eval_loss": 0.12149834632873535, "eval_roc_auc": 0.8757233637628921, "eval_runtime": 756.5322, "eval_samples_per_second": 3.815, "eval_steps_per_second": 0.12, "learning_rate": 1e-05, "step": 20202 }, { "epoch": 75.0, "eval_accuracy": 0.30561330561330563, "eval_f1_macro": 0.7145366354361308, "eval_f1_micro": 0.8208643316893754, "eval_loss": 0.12120900303125381, "eval_roc_auc": 0.879641026356426, "eval_runtime": 752.1644, "eval_samples_per_second": 3.837, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 20475 }, { "epoch": 75.0915750915751, "grad_norm": 0.25457698106765747, "learning_rate": 1e-05, "loss": 0.1236, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.30803880803880807, "eval_f1_macro": 0.7191205487713891, "eval_f1_micro": 0.8218541121766927, "eval_loss": 0.1215985044836998, "eval_roc_auc": 0.8821938390069956, "eval_runtime": 752.3495, "eval_samples_per_second": 3.836, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 20748 }, { "epoch": 76.92307692307692, "grad_norm": 0.2589890658855438, "learning_rate": 1e-05, "loss": 0.1233, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.31323631323631324, "eval_f1_macro": 0.7202749659896155, "eval_f1_micro": 0.8236983547367989, "eval_loss": 0.1214083805680275, "eval_roc_auc": 0.8867951606378082, "eval_runtime": 755.0282, "eval_samples_per_second": 3.822, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 21021 }, { "epoch": 78.0, "eval_accuracy": 0.3097713097713098, "eval_f1_macro": 0.7168480610158249, "eval_f1_micro": 0.8222591362126246, "eval_loss": 0.12110316008329391, "eval_roc_auc": 0.8823316922046746, "eval_runtime": 752.7354, "eval_samples_per_second": 3.834, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 21294 }, { "epoch": 78.75457875457876, "grad_norm": 0.26676803827285767, "learning_rate": 1e-05, "loss": 0.123, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.30665280665280664, "eval_f1_macro": 0.7160500850094047, "eval_f1_micro": 0.8202977563430488, "eval_loss": 0.12149946391582489, "eval_roc_auc": 0.878321716124089, "eval_runtime": 752.3192, "eval_samples_per_second": 3.836, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 21567 }, { "epoch": 80.0, "eval_accuracy": 0.30734580734580735, "eval_f1_macro": 0.7150848378423871, "eval_f1_micro": 0.8219257062844905, "eval_loss": 0.121590256690979, "eval_roc_auc": 0.8846639290079505, "eval_runtime": 747.5776, "eval_samples_per_second": 3.86, "eval_steps_per_second": 0.122, "learning_rate": 1e-05, "step": 21840 }, { "epoch": 80.58608058608058, "grad_norm": 0.2525629699230194, "learning_rate": 1e-05, "loss": 0.123, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.3115038115038115, "eval_f1_macro": 0.7187103786018064, "eval_f1_micro": 0.8216162121591194, "eval_loss": 0.12097962200641632, "eval_roc_auc": 0.8807537244642276, "eval_runtime": 755.3491, "eval_samples_per_second": 3.821, "eval_steps_per_second": 0.12, "learning_rate": 1e-05, "step": 22113 }, { "epoch": 82.0, "eval_accuracy": 0.30942480942480943, "eval_f1_macro": 0.7156786549052798, "eval_f1_micro": 0.821175978238125, "eval_loss": 0.12082336097955704, "eval_roc_auc": 0.8794272915260457, "eval_runtime": 753.7414, "eval_samples_per_second": 3.829, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 22386 }, { "epoch": 82.41758241758242, "grad_norm": 0.23939679563045502, "learning_rate": 1e-05, "loss": 0.1214, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.30006930006930005, "eval_f1_macro": 0.7102312532643303, "eval_f1_micro": 0.8180206046275968, "eval_loss": 0.12147542089223862, "eval_roc_auc": 0.8750765523206339, "eval_runtime": 745.2706, "eval_samples_per_second": 3.872, "eval_steps_per_second": 0.122, "learning_rate": 1e-05, "step": 22659 }, { "epoch": 84.0, "eval_accuracy": 0.31185031185031187, "eval_f1_macro": 0.7195842513107142, "eval_f1_micro": 0.8215978053038491, "eval_loss": 0.12100570648908615, "eval_roc_auc": 0.8816901523695672, "eval_runtime": 742.6349, "eval_samples_per_second": 3.886, "eval_steps_per_second": 0.123, "learning_rate": 1e-05, "step": 22932 }, { "epoch": 84.24908424908425, "grad_norm": 0.30801209807395935, "learning_rate": 1e-05, "loss": 0.1234, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.31011781011781014, "eval_f1_macro": 0.7201395616901511, "eval_f1_micro": 0.8233587533156498, "eval_loss": 0.1208326444029808, "eval_roc_auc": 0.8835425924395763, "eval_runtime": 742.1618, "eval_samples_per_second": 3.889, "eval_steps_per_second": 0.123, "learning_rate": 1e-05, "step": 23205 }, { "epoch": 86.0, "eval_accuracy": 0.30942480942480943, "eval_f1_macro": 0.7215167678270465, "eval_f1_micro": 0.8218151540383014, "eval_loss": 0.1210438683629036, "eval_roc_auc": 0.8813373302757117, "eval_runtime": 754.9986, "eval_samples_per_second": 3.823, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 23478 }, { "epoch": 86.08058608058609, "grad_norm": 0.23295313119888306, "learning_rate": 1e-05, "loss": 0.1216, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.3087318087318087, "eval_f1_macro": 0.7141558876633265, "eval_f1_micro": 0.8207271207689094, "eval_loss": 0.1212099939584732, "eval_roc_auc": 0.8796150036646389, "eval_runtime": 753.045, "eval_samples_per_second": 3.832, "eval_steps_per_second": 0.121, "learning_rate": 1e-05, "step": 23751 }, { "epoch": 87.91208791208791, "grad_norm": 0.21838252246379852, "learning_rate": 1e-05, "loss": 0.1219, "step": 24000 }, { "epoch": 88.0, "eval_accuracy": 0.31011781011781014, "eval_f1_macro": 0.7124615854591595, "eval_f1_micro": 0.8223957468017943, "eval_loss": 0.12096676975488663, "eval_roc_auc": 0.8823577148964619, "eval_runtime": 758.2188, "eval_samples_per_second": 3.806, "eval_steps_per_second": 0.12, "learning_rate": 1e-05, "step": 24024 }, { "epoch": 89.0, "eval_accuracy": 0.3121968121968122, "eval_f1_macro": 0.7249978662662346, "eval_f1_micro": 0.8240642149234173, "eval_loss": 0.12144902348518372, "eval_roc_auc": 0.8875640104562932, "eval_runtime": 760.5663, "eval_samples_per_second": 3.795, "eval_steps_per_second": 0.12, "learning_rate": 1.0000000000000002e-06, "step": 24297 }, { "epoch": 89.74358974358974, "grad_norm": 0.21705362200737, "learning_rate": 1.0000000000000002e-06, "loss": 0.1219, "step": 24500 }, { "epoch": 90.0, "eval_accuracy": 0.31046431046431044, "eval_f1_macro": 0.7198781344667567, "eval_f1_micro": 0.8233893154847453, "eval_loss": 0.12115956842899323, "eval_roc_auc": 0.8863713931744356, "eval_runtime": 763.5088, "eval_samples_per_second": 3.78, "eval_steps_per_second": 0.119, "learning_rate": 1.0000000000000002e-06, "step": 24570 }, { "epoch": 91.0, "eval_accuracy": 0.3097713097713098, "eval_f1_macro": 0.7159843095789674, "eval_f1_micro": 0.8212459126351974, "eval_loss": 0.1208055168390274, "eval_roc_auc": 0.8789555162204534, "eval_runtime": 757.8368, "eval_samples_per_second": 3.808, "eval_steps_per_second": 0.12, "learning_rate": 1.0000000000000002e-06, "step": 24843 }, { "epoch": 91.57509157509158, "grad_norm": 0.23301896452903748, "learning_rate": 1.0000000000000002e-06, "loss": 0.1213, "step": 25000 }, { "epoch": 92.0, "eval_accuracy": 0.30734580734580735, "eval_f1_macro": 0.7144036362020703, "eval_f1_micro": 0.8223893065998329, "eval_loss": 0.12069901078939438, "eval_roc_auc": 0.8806577087797879, "eval_runtime": 763.0077, "eval_samples_per_second": 3.782, "eval_steps_per_second": 0.119, "learning_rate": 1.0000000000000002e-06, "step": 25116 }, { "epoch": 93.0, "eval_accuracy": 0.30803880803880807, "eval_f1_macro": 0.7189178649032102, "eval_f1_micro": 0.8226574468966088, "eval_loss": 0.12093978375196457, "eval_roc_auc": 0.8834391187053254, "eval_runtime": 763.7654, "eval_samples_per_second": 3.779, "eval_steps_per_second": 0.119, "learning_rate": 1.0000000000000002e-06, "step": 25389 }, { "epoch": 93.4065934065934, "grad_norm": 0.2630571126937866, "learning_rate": 1.0000000000000002e-06, "loss": 0.122, "step": 25500 }, { "epoch": 94.0, "eval_accuracy": 0.3097713097713098, "eval_f1_macro": 0.7187657914933285, "eval_f1_micro": 0.8223438666334908, "eval_loss": 0.12092197686433792, "eval_roc_auc": 0.8828028504773688, "eval_runtime": 758.2573, "eval_samples_per_second": 3.806, "eval_steps_per_second": 0.12, "learning_rate": 1.0000000000000002e-06, "step": 25662 }, { "epoch": 95.0, "eval_accuracy": 0.30942480942480943, "eval_f1_macro": 0.7127077698746517, "eval_f1_micro": 0.8221934621968021, "eval_loss": 0.1206900030374527, "eval_roc_auc": 0.8807116052620565, "eval_runtime": 755.4845, "eval_samples_per_second": 3.82, "eval_steps_per_second": 0.12, "learning_rate": 1.0000000000000002e-06, "step": 25935 }, { "epoch": 95.23809523809524, "grad_norm": 0.32719686627388, "learning_rate": 1.0000000000000002e-06, "loss": 0.1209, "step": 26000 }, { "epoch": 96.0, "eval_accuracy": 0.30665280665280664, "eval_f1_macro": 0.7160309422692305, "eval_f1_micro": 0.8218438538205979, "eval_loss": 0.12142115086317062, "eval_roc_auc": 0.882100908487046, "eval_runtime": 764.2068, "eval_samples_per_second": 3.776, "eval_steps_per_second": 0.119, "learning_rate": 1.0000000000000002e-06, "step": 26208 }, { "epoch": 97.0, "eval_accuracy": 0.30942480942480943, "eval_f1_macro": 0.71586766610014, "eval_f1_micro": 0.8208711661575798, "eval_loss": 0.12264719605445862, "eval_roc_auc": 0.879308955347207, "eval_runtime": 783.117, "eval_samples_per_second": 3.685, "eval_steps_per_second": 0.116, "learning_rate": 1.0000000000000002e-06, "step": 26481 }, { "epoch": 97.06959706959707, "grad_norm": 0.27319103479385376, "learning_rate": 1.0000000000000002e-06, "loss": 0.122, "step": 26500 }, { "epoch": 98.0, "eval_accuracy": 0.31185031185031187, "eval_f1_macro": 0.7190138873820752, "eval_f1_micro": 0.8224561403508771, "eval_loss": 0.12095578759908676, "eval_roc_auc": 0.8842500877259815, "eval_runtime": 761.8672, "eval_samples_per_second": 3.788, "eval_steps_per_second": 0.119, "learning_rate": 1.0000000000000002e-06, "step": 26754 }, { "epoch": 98.9010989010989, "grad_norm": 0.314969539642334, "learning_rate": 1.0000000000000002e-07, "loss": 0.1218, "step": 27000 }, { "epoch": 99.0, "eval_accuracy": 0.3097713097713098, "eval_f1_macro": 0.7177436878101541, "eval_f1_micro": 0.821403230518803, "eval_loss": 0.12075632065534592, "eval_roc_auc": 0.8803494740196957, "eval_runtime": 749.7836, "eval_samples_per_second": 3.849, "eval_steps_per_second": 0.121, "learning_rate": 1.0000000000000002e-07, "step": 27027 }, { "epoch": 100.0, "eval_accuracy": 0.3108108108108108, "eval_f1_macro": 0.7191112023643382, "eval_f1_micro": 0.8218776194467728, "eval_loss": 0.12078335881233215, "eval_roc_auc": 0.8793780496180298, "eval_runtime": 751.4627, "eval_samples_per_second": 3.841, "eval_steps_per_second": 0.121, "learning_rate": 1.0000000000000002e-07, "step": 27300 }, { "epoch": 100.73260073260073, "grad_norm": 0.3180501163005829, "learning_rate": 1.0000000000000002e-07, "loss": 0.1222, "step": 27500 }, { "epoch": 101.0, "eval_accuracy": 0.3097713097713098, "eval_f1_macro": 0.7199208624613478, "eval_f1_micro": 0.8230599775551769, "eval_loss": 0.12071150541305542, "eval_roc_auc": 0.8825144680800833, "eval_runtime": 753.7405, "eval_samples_per_second": 3.829, "eval_steps_per_second": 0.121, "learning_rate": 1.0000000000000002e-07, "step": 27573 }, { "epoch": 102.0, "eval_accuracy": 0.31011781011781014, "eval_f1_macro": 0.7181176324357539, "eval_f1_micro": 0.821560093739538, "eval_loss": 0.12102664262056351, "eval_roc_auc": 0.8796515695707274, "eval_runtime": 750.0067, "eval_samples_per_second": 3.848, "eval_steps_per_second": 0.121, "learning_rate": 1.0000000000000002e-07, "step": 27846 }, { "epoch": 102.56410256410257, "grad_norm": 0.257368803024292, "learning_rate": 1.0000000000000002e-07, "loss": 0.1212, "step": 28000 }, { "epoch": 103.0, "eval_accuracy": 0.31115731115731116, "eval_f1_macro": 0.7156251632807489, "eval_f1_micro": 0.8218559116391932, "eval_loss": 0.12072332948446274, "eval_roc_auc": 0.879889475994201, "eval_runtime": 747.7283, "eval_samples_per_second": 3.86, "eval_steps_per_second": 0.122, "learning_rate": 1.0000000000000002e-07, "step": 28119 }, { "epoch": 104.0, "eval_accuracy": 0.3090783090783091, "eval_f1_macro": 0.7151217785983346, "eval_f1_micro": 0.8214226220223222, "eval_loss": 0.12122868001461029, "eval_roc_auc": 0.8810201217110805, "eval_runtime": 751.9776, "eval_samples_per_second": 3.838, "eval_steps_per_second": 0.121, "learning_rate": 1.0000000000000002e-07, "step": 28392 }, { "epoch": 104.3956043956044, "grad_norm": 0.2758227586746216, "learning_rate": 1.0000000000000004e-08, "loss": 0.1204, "step": 28500 }, { "epoch": 105.0, "eval_accuracy": 0.30838530838530837, "eval_f1_macro": 0.7175066761763569, "eval_f1_micro": 0.8216449497883642, "eval_loss": 0.12081456929445267, "eval_roc_auc": 0.882214590091632, "eval_runtime": 750.7114, "eval_samples_per_second": 3.844, "eval_steps_per_second": 0.121, "learning_rate": 1.0000000000000004e-08, "step": 28665 }, { "epoch": 105.0, "learning_rate": 1.0000000000000004e-08, "step": 28665, "total_flos": 5.049640374682393e+21, "train_loss": 0.023157235795491324, "train_runtime": 62002.1626, "train_samples_per_second": 21.086, "train_steps_per_second": 0.66 } ], "logging_steps": 500, "max_steps": 40950, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.049640374682393e+21, "train_batch_size": 32, "trial_name": null, "trial_params": null }