{ "best_metric": 0.9713785046728972, "best_model_checkpoint": "swin-large-patch4-window7-224-in22k-finetuned-lora-medmnistv2/checkpoint-1870", "epoch": 10.0, "eval_steps": 500, "global_step": 1870, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 2.163198709487915, "learning_rate": 0.004973262032085562, "loss": 1.5101, "step": 10 }, { "epoch": 0.11, "grad_norm": 1.4286870956420898, "learning_rate": 0.004946524064171123, "loss": 0.8667, "step": 20 }, { "epoch": 0.16, "grad_norm": 1.847931981086731, "learning_rate": 0.004919786096256685, "loss": 0.7414, "step": 30 }, { "epoch": 0.21, "grad_norm": 1.5748757123947144, "learning_rate": 0.004893048128342246, "loss": 0.755, "step": 40 }, { "epoch": 0.27, "grad_norm": 2.017432928085327, "learning_rate": 0.004866310160427808, "loss": 0.6683, "step": 50 }, { "epoch": 0.32, "grad_norm": 1.5988194942474365, "learning_rate": 0.004839572192513369, "loss": 0.7084, "step": 60 }, { "epoch": 0.37, "grad_norm": 1.7127466201782227, "learning_rate": 0.004812834224598931, "loss": 0.6459, "step": 70 }, { "epoch": 0.43, "grad_norm": 2.1388797760009766, "learning_rate": 0.004786096256684492, "loss": 0.7116, "step": 80 }, { "epoch": 0.48, "grad_norm": 2.5939793586730957, "learning_rate": 0.004759358288770054, "loss": 0.5753, "step": 90 }, { "epoch": 0.53, "grad_norm": 1.463460087776184, "learning_rate": 0.004732620320855615, "loss": 0.5938, "step": 100 }, { "epoch": 0.59, "grad_norm": 1.9902774095535278, "learning_rate": 0.004705882352941177, "loss": 0.5525, "step": 110 }, { "epoch": 0.64, "grad_norm": 1.881441593170166, "learning_rate": 0.004679144385026738, "loss": 0.5788, "step": 120 }, { "epoch": 0.7, "grad_norm": 2.161348581314087, "learning_rate": 0.0046524064171123, "loss": 0.5378, "step": 130 }, { "epoch": 0.75, "grad_norm": 1.5160846710205078, "learning_rate": 0.0046256684491978615, "loss": 0.479, "step": 140 }, { "epoch": 0.8, "grad_norm": 1.4215080738067627, "learning_rate": 0.004598930481283423, "loss": 0.5123, "step": 150 }, { "epoch": 0.86, "grad_norm": 1.2568920850753784, "learning_rate": 0.004572192513368984, "loss": 0.5499, "step": 160 }, { "epoch": 0.91, "grad_norm": 0.9570059180259705, "learning_rate": 0.004545454545454545, "loss": 0.4845, "step": 170 }, { "epoch": 0.96, "grad_norm": 2.3021810054779053, "learning_rate": 0.004518716577540107, "loss": 0.5141, "step": 180 }, { "epoch": 1.0, "eval_accuracy": 0.9065420560747663, "eval_f1": 0.8872552707825272, "eval_loss": 0.2832600176334381, "eval_precision": 0.8954019032094056, "eval_recall": 0.8949326095168356, "eval_runtime": 19.6658, "eval_samples_per_second": 87.054, "eval_steps_per_second": 5.441, "step": 187 }, { "epoch": 1.02, "grad_norm": 1.8552567958831787, "learning_rate": 0.004491978609625669, "loss": 0.4441, "step": 190 }, { "epoch": 1.07, "grad_norm": 2.53872013092041, "learning_rate": 0.00446524064171123, "loss": 0.4436, "step": 200 }, { "epoch": 1.12, "grad_norm": 1.3826777935028076, "learning_rate": 0.004438502673796791, "loss": 0.4632, "step": 210 }, { "epoch": 1.18, "grad_norm": 2.2216227054595947, "learning_rate": 0.004411764705882353, "loss": 0.4429, "step": 220 }, { "epoch": 1.23, "grad_norm": 1.8521422147750854, "learning_rate": 0.004385026737967914, "loss": 0.4472, "step": 230 }, { "epoch": 1.28, "grad_norm": 2.058058977127075, "learning_rate": 0.00436096256684492, "loss": 0.4757, "step": 240 }, { "epoch": 1.34, "grad_norm": 1.1437183618545532, "learning_rate": 0.004334224598930481, "loss": 0.3436, "step": 250 }, { "epoch": 1.39, "grad_norm": 1.761400580406189, "learning_rate": 0.0043074866310160425, "loss": 0.4958, "step": 260 }, { "epoch": 1.44, "grad_norm": 1.4134129285812378, "learning_rate": 0.004280748663101605, "loss": 0.4519, "step": 270 }, { "epoch": 1.5, "grad_norm": 1.7341545820236206, "learning_rate": 0.004254010695187166, "loss": 0.528, "step": 280 }, { "epoch": 1.55, "grad_norm": 2.980020761489868, "learning_rate": 0.004227272727272727, "loss": 0.5021, "step": 290 }, { "epoch": 1.6, "grad_norm": 0.6755030751228333, "learning_rate": 0.004200534759358289, "loss": 0.4601, "step": 300 }, { "epoch": 1.66, "grad_norm": 1.8686202764511108, "learning_rate": 0.00417379679144385, "loss": 0.4433, "step": 310 }, { "epoch": 1.71, "grad_norm": 1.371077299118042, "learning_rate": 0.004147058823529412, "loss": 0.4323, "step": 320 }, { "epoch": 1.76, "grad_norm": 1.0771093368530273, "learning_rate": 0.004120320855614973, "loss": 0.4251, "step": 330 }, { "epoch": 1.82, "grad_norm": 1.185023546218872, "learning_rate": 0.004093582887700535, "loss": 0.4881, "step": 340 }, { "epoch": 1.87, "grad_norm": 0.9843281507492065, "learning_rate": 0.004066844919786096, "loss": 0.4483, "step": 350 }, { "epoch": 1.93, "grad_norm": 1.6477869749069214, "learning_rate": 0.004040106951871658, "loss": 0.4956, "step": 360 }, { "epoch": 1.98, "grad_norm": 1.7044633626937866, "learning_rate": 0.004013368983957219, "loss": 0.4176, "step": 370 }, { "epoch": 2.0, "eval_accuracy": 0.9310747663551402, "eval_f1": 0.9182291375322846, "eval_loss": 0.198581263422966, "eval_precision": 0.924344363515161, "eval_recall": 0.9209393532374213, "eval_runtime": 19.6732, "eval_samples_per_second": 87.022, "eval_steps_per_second": 5.439, "step": 374 }, { "epoch": 2.03, "grad_norm": 1.662027359008789, "learning_rate": 0.003986631016042781, "loss": 0.4022, "step": 380 }, { "epoch": 2.09, "grad_norm": 1.188351035118103, "learning_rate": 0.003959893048128342, "loss": 0.3758, "step": 390 }, { "epoch": 2.14, "grad_norm": 2.2225048542022705, "learning_rate": 0.003933155080213904, "loss": 0.4491, "step": 400 }, { "epoch": 2.19, "grad_norm": 1.683356761932373, "learning_rate": 0.0039064171122994654, "loss": 0.3647, "step": 410 }, { "epoch": 2.25, "grad_norm": 1.7646687030792236, "learning_rate": 0.0038796791443850265, "loss": 0.4666, "step": 420 }, { "epoch": 2.3, "grad_norm": 2.173644781112671, "learning_rate": 0.0038529411764705885, "loss": 0.4314, "step": 430 }, { "epoch": 2.35, "grad_norm": 0.8064551949501038, "learning_rate": 0.00382620320855615, "loss": 0.3944, "step": 440 }, { "epoch": 2.41, "grad_norm": 0.9698677062988281, "learning_rate": 0.003799465240641711, "loss": 0.4314, "step": 450 }, { "epoch": 2.46, "grad_norm": 0.9321346879005432, "learning_rate": 0.0037727272727272726, "loss": 0.467, "step": 460 }, { "epoch": 2.51, "grad_norm": 2.6592769622802734, "learning_rate": 0.003745989304812834, "loss": 0.4024, "step": 470 }, { "epoch": 2.57, "grad_norm": 1.7124016284942627, "learning_rate": 0.003719251336898396, "loss": 0.3283, "step": 480 }, { "epoch": 2.62, "grad_norm": 3.178034543991089, "learning_rate": 0.0036925133689839572, "loss": 0.4377, "step": 490 }, { "epoch": 2.67, "grad_norm": 1.2681751251220703, "learning_rate": 0.0036657754010695188, "loss": 0.3866, "step": 500 }, { "epoch": 2.73, "grad_norm": 1.1923668384552002, "learning_rate": 0.0036390374331550803, "loss": 0.3366, "step": 510 }, { "epoch": 2.78, "grad_norm": 1.499803066253662, "learning_rate": 0.0036122994652406414, "loss": 0.4578, "step": 520 }, { "epoch": 2.83, "grad_norm": 1.887222409248352, "learning_rate": 0.0035855614973262034, "loss": 0.4189, "step": 530 }, { "epoch": 2.89, "grad_norm": 1.3592134714126587, "learning_rate": 0.003558823529411765, "loss": 0.4008, "step": 540 }, { "epoch": 2.94, "grad_norm": 3.0257527828216553, "learning_rate": 0.0035320855614973264, "loss": 0.3774, "step": 550 }, { "epoch": 2.99, "grad_norm": 1.093493103981018, "learning_rate": 0.0035053475935828875, "loss": 0.3454, "step": 560 }, { "epoch": 3.0, "eval_accuracy": 0.9503504672897196, "eval_f1": 0.9402807880914787, "eval_loss": 0.15674300491809845, "eval_precision": 0.9426615409260363, "eval_recall": 0.9397047025483766, "eval_runtime": 19.5644, "eval_samples_per_second": 87.506, "eval_steps_per_second": 5.469, "step": 561 }, { "epoch": 3.05, "grad_norm": 1.7053543329238892, "learning_rate": 0.003478609625668449, "loss": 0.3776, "step": 570 }, { "epoch": 3.1, "grad_norm": 1.5041882991790771, "learning_rate": 0.003451871657754011, "loss": 0.4058, "step": 580 }, { "epoch": 3.16, "grad_norm": 1.3619967699050903, "learning_rate": 0.0034251336898395725, "loss": 0.3646, "step": 590 }, { "epoch": 3.21, "grad_norm": 1.1415998935699463, "learning_rate": 0.0033983957219251336, "loss": 0.4906, "step": 600 }, { "epoch": 3.26, "grad_norm": 1.6870795488357544, "learning_rate": 0.003371657754010695, "loss": 0.3828, "step": 610 }, { "epoch": 3.32, "grad_norm": 1.0538561344146729, "learning_rate": 0.0033449197860962567, "loss": 0.3728, "step": 620 }, { "epoch": 3.37, "grad_norm": 2.340454339981079, "learning_rate": 0.0033181818181818186, "loss": 0.3809, "step": 630 }, { "epoch": 3.42, "grad_norm": 2.317230224609375, "learning_rate": 0.0032914438502673797, "loss": 0.3391, "step": 640 }, { "epoch": 3.48, "grad_norm": 1.242281436920166, "learning_rate": 0.0032647058823529413, "loss": 0.4091, "step": 650 }, { "epoch": 3.53, "grad_norm": 1.23116934299469, "learning_rate": 0.003237967914438503, "loss": 0.3592, "step": 660 }, { "epoch": 3.58, "grad_norm": 1.117090106010437, "learning_rate": 0.003211229946524064, "loss": 0.3867, "step": 670 }, { "epoch": 3.64, "grad_norm": 1.0917716026306152, "learning_rate": 0.0031844919786096254, "loss": 0.4386, "step": 680 }, { "epoch": 3.69, "grad_norm": 1.2080508470535278, "learning_rate": 0.0031577540106951874, "loss": 0.3466, "step": 690 }, { "epoch": 3.74, "grad_norm": 1.695580244064331, "learning_rate": 0.003131016042780749, "loss": 0.3147, "step": 700 }, { "epoch": 3.8, "grad_norm": 1.1604491472244263, "learning_rate": 0.00310427807486631, "loss": 0.3585, "step": 710 }, { "epoch": 3.85, "grad_norm": 1.8931636810302734, "learning_rate": 0.0030775401069518715, "loss": 0.3578, "step": 720 }, { "epoch": 3.9, "grad_norm": 1.4620869159698486, "learning_rate": 0.003050802139037433, "loss": 0.3522, "step": 730 }, { "epoch": 3.96, "grad_norm": 1.3944414854049683, "learning_rate": 0.003024064171122995, "loss": 0.3228, "step": 740 }, { "epoch": 4.0, "eval_accuracy": 0.9357476635514018, "eval_f1": 0.9283408808159395, "eval_loss": 0.1848856657743454, "eval_precision": 0.9231661406901872, "eval_recall": 0.9426484043891363, "eval_runtime": 19.683, "eval_samples_per_second": 86.979, "eval_steps_per_second": 5.436, "step": 748 }, { "epoch": 4.01, "grad_norm": 1.7614619731903076, "learning_rate": 0.002997326203208556, "loss": 0.3463, "step": 750 }, { "epoch": 4.06, "grad_norm": 2.866691827774048, "learning_rate": 0.0029705882352941177, "loss": 0.3431, "step": 760 }, { "epoch": 4.12, "grad_norm": 3.0871615409851074, "learning_rate": 0.002943850267379679, "loss": 0.4329, "step": 770 }, { "epoch": 4.17, "grad_norm": 1.3399722576141357, "learning_rate": 0.0029171122994652403, "loss": 0.3992, "step": 780 }, { "epoch": 4.22, "grad_norm": 1.440559983253479, "learning_rate": 0.0028903743315508022, "loss": 0.3333, "step": 790 }, { "epoch": 4.28, "grad_norm": 1.4606270790100098, "learning_rate": 0.0028636363636363638, "loss": 0.3108, "step": 800 }, { "epoch": 4.33, "grad_norm": 2.4641544818878174, "learning_rate": 0.0028368983957219253, "loss": 0.3436, "step": 810 }, { "epoch": 4.39, "grad_norm": 1.9653208255767822, "learning_rate": 0.0028101604278074864, "loss": 0.2766, "step": 820 }, { "epoch": 4.44, "grad_norm": 1.0840091705322266, "learning_rate": 0.002783422459893048, "loss": 0.2568, "step": 830 }, { "epoch": 4.49, "grad_norm": 1.0625332593917847, "learning_rate": 0.00275668449197861, "loss": 0.3366, "step": 840 }, { "epoch": 4.55, "grad_norm": 0.9171143174171448, "learning_rate": 0.0027299465240641714, "loss": 0.339, "step": 850 }, { "epoch": 4.6, "grad_norm": 1.6296868324279785, "learning_rate": 0.0027032085561497325, "loss": 0.359, "step": 860 }, { "epoch": 4.65, "grad_norm": 1.949312448501587, "learning_rate": 0.002676470588235294, "loss": 0.3529, "step": 870 }, { "epoch": 4.71, "grad_norm": 1.6241270303726196, "learning_rate": 0.0026497326203208556, "loss": 0.3364, "step": 880 }, { "epoch": 4.76, "grad_norm": 2.172145366668701, "learning_rate": 0.0026229946524064175, "loss": 0.3374, "step": 890 }, { "epoch": 4.81, "grad_norm": 3.377912998199463, "learning_rate": 0.0025962566844919786, "loss": 0.3555, "step": 900 }, { "epoch": 4.87, "grad_norm": 1.194082260131836, "learning_rate": 0.00256951871657754, "loss": 0.3354, "step": 910 }, { "epoch": 4.92, "grad_norm": 1.774932861328125, "learning_rate": 0.0025427807486631017, "loss": 0.3728, "step": 920 }, { "epoch": 4.97, "grad_norm": 0.9065486192703247, "learning_rate": 0.002516042780748663, "loss": 0.3382, "step": 930 }, { "epoch": 5.0, "eval_accuracy": 0.9398364485981309, "eval_f1": 0.9320964504504674, "eval_loss": 0.16266803443431854, "eval_precision": 0.9301560138584124, "eval_recall": 0.9396981551324834, "eval_runtime": 19.6531, "eval_samples_per_second": 87.111, "eval_steps_per_second": 5.444, "step": 935 }, { "epoch": 5.03, "grad_norm": 0.8373203873634338, "learning_rate": 0.0024893048128342248, "loss": 0.3115, "step": 940 }, { "epoch": 5.08, "grad_norm": 1.6470876932144165, "learning_rate": 0.002462566844919786, "loss": 0.3746, "step": 950 }, { "epoch": 5.13, "grad_norm": 2.556999444961548, "learning_rate": 0.002435828877005348, "loss": 0.3411, "step": 960 }, { "epoch": 5.19, "grad_norm": 1.753217101097107, "learning_rate": 0.002409090909090909, "loss": 0.3095, "step": 970 }, { "epoch": 5.24, "grad_norm": 2.667759895324707, "learning_rate": 0.0023823529411764704, "loss": 0.3358, "step": 980 }, { "epoch": 5.29, "grad_norm": 1.6711212396621704, "learning_rate": 0.002355614973262032, "loss": 0.3263, "step": 990 }, { "epoch": 5.35, "grad_norm": 1.8793816566467285, "learning_rate": 0.0023288770053475935, "loss": 0.3245, "step": 1000 }, { "epoch": 5.4, "grad_norm": 1.3059521913528442, "learning_rate": 0.002302139037433155, "loss": 0.2904, "step": 1010 }, { "epoch": 5.45, "grad_norm": 1.765958309173584, "learning_rate": 0.0022754010695187166, "loss": 0.3424, "step": 1020 }, { "epoch": 5.51, "grad_norm": 0.9322473406791687, "learning_rate": 0.002248663101604278, "loss": 0.3716, "step": 1030 }, { "epoch": 5.56, "grad_norm": 2.082515239715576, "learning_rate": 0.0022219251336898396, "loss": 0.2967, "step": 1040 }, { "epoch": 5.61, "grad_norm": 1.6903836727142334, "learning_rate": 0.002195187165775401, "loss": 0.3244, "step": 1050 }, { "epoch": 5.67, "grad_norm": 1.1631466150283813, "learning_rate": 0.0021684491978609627, "loss": 0.3141, "step": 1060 }, { "epoch": 5.72, "grad_norm": 2.086376428604126, "learning_rate": 0.002141711229946524, "loss": 0.3211, "step": 1070 }, { "epoch": 5.78, "grad_norm": 1.709187626838684, "learning_rate": 0.0021149732620320857, "loss": 0.3039, "step": 1080 }, { "epoch": 5.83, "grad_norm": 1.7365305423736572, "learning_rate": 0.0020882352941176473, "loss": 0.2937, "step": 1090 }, { "epoch": 5.88, "grad_norm": 1.2648741006851196, "learning_rate": 0.0020614973262032084, "loss": 0.2951, "step": 1100 }, { "epoch": 5.94, "grad_norm": 1.2121895551681519, "learning_rate": 0.00203475935828877, "loss": 0.242, "step": 1110 }, { "epoch": 5.99, "grad_norm": 1.6397563219070435, "learning_rate": 0.0020080213903743314, "loss": 0.3363, "step": 1120 }, { "epoch": 6.0, "eval_accuracy": 0.9509345794392523, "eval_f1": 0.9456282248184136, "eval_loss": 0.14138737320899963, "eval_precision": 0.9497944760971885, "eval_recall": 0.9441674024122191, "eval_runtime": 19.937, "eval_samples_per_second": 85.87, "eval_steps_per_second": 5.367, "step": 1122 }, { "epoch": 6.04, "grad_norm": 1.3460767269134521, "learning_rate": 0.001981283422459893, "loss": 0.3134, "step": 1130 }, { "epoch": 6.1, "grad_norm": 1.2124683856964111, "learning_rate": 0.0019545454545454545, "loss": 0.3028, "step": 1140 }, { "epoch": 6.15, "grad_norm": 0.8806934952735901, "learning_rate": 0.001927807486631016, "loss": 0.2589, "step": 1150 }, { "epoch": 6.2, "grad_norm": 1.059187889099121, "learning_rate": 0.0019010695187165775, "loss": 0.2888, "step": 1160 }, { "epoch": 6.26, "grad_norm": 2.5121827125549316, "learning_rate": 0.001874331550802139, "loss": 0.2741, "step": 1170 }, { "epoch": 6.31, "grad_norm": 1.0052329301834106, "learning_rate": 0.0018475935828877006, "loss": 0.3519, "step": 1180 }, { "epoch": 6.36, "grad_norm": 1.4301072359085083, "learning_rate": 0.0018208556149732621, "loss": 0.2937, "step": 1190 }, { "epoch": 6.42, "grad_norm": 1.09031343460083, "learning_rate": 0.0017941176470588236, "loss": 0.2252, "step": 1200 }, { "epoch": 6.47, "grad_norm": 1.9657083749771118, "learning_rate": 0.001767379679144385, "loss": 0.267, "step": 1210 }, { "epoch": 6.52, "grad_norm": 3.7427196502685547, "learning_rate": 0.0017406417112299467, "loss": 0.2493, "step": 1220 }, { "epoch": 6.58, "grad_norm": 1.7291096448898315, "learning_rate": 0.001713903743315508, "loss": 0.2558, "step": 1230 }, { "epoch": 6.63, "grad_norm": 2.8834567070007324, "learning_rate": 0.0016871657754010698, "loss": 0.3167, "step": 1240 }, { "epoch": 6.68, "grad_norm": 1.6702009439468384, "learning_rate": 0.001660427807486631, "loss": 0.274, "step": 1250 }, { "epoch": 6.74, "grad_norm": 1.7623697519302368, "learning_rate": 0.0016336898395721924, "loss": 0.2481, "step": 1260 }, { "epoch": 6.79, "grad_norm": 1.8855972290039062, "learning_rate": 0.0016069518716577541, "loss": 0.2424, "step": 1270 }, { "epoch": 6.84, "grad_norm": 1.7909148931503296, "learning_rate": 0.0015802139037433154, "loss": 0.2361, "step": 1280 }, { "epoch": 6.9, "grad_norm": 1.424047589302063, "learning_rate": 0.001553475935828877, "loss": 0.2834, "step": 1290 }, { "epoch": 6.95, "grad_norm": 1.3470966815948486, "learning_rate": 0.0015267379679144385, "loss": 0.2981, "step": 1300 }, { "epoch": 7.0, "eval_accuracy": 0.9544392523364486, "eval_f1": 0.9480272544883982, "eval_loss": 0.11172817647457123, "eval_precision": 0.9458066711610336, "eval_recall": 0.9541586489707353, "eval_runtime": 19.6986, "eval_samples_per_second": 86.91, "eval_steps_per_second": 5.432, "step": 1309 }, { "epoch": 7.01, "grad_norm": 1.9716545343399048, "learning_rate": 0.0015, "loss": 0.2591, "step": 1310 }, { "epoch": 7.06, "grad_norm": 2.347787618637085, "learning_rate": 0.0014732620320855616, "loss": 0.2324, "step": 1320 }, { "epoch": 7.11, "grad_norm": 1.5514649152755737, "learning_rate": 0.001446524064171123, "loss": 0.2163, "step": 1330 }, { "epoch": 7.17, "grad_norm": 3.073544979095459, "learning_rate": 0.0014197860962566844, "loss": 0.2889, "step": 1340 }, { "epoch": 7.22, "grad_norm": 1.5972115993499756, "learning_rate": 0.0013930481283422461, "loss": 0.2589, "step": 1350 }, { "epoch": 7.27, "grad_norm": 1.8408401012420654, "learning_rate": 0.0013663101604278075, "loss": 0.2333, "step": 1360 }, { "epoch": 7.33, "grad_norm": 1.3704335689544678, "learning_rate": 0.0013395721925133692, "loss": 0.2103, "step": 1370 }, { "epoch": 7.38, "grad_norm": 3.6621859073638916, "learning_rate": 0.0013128342245989305, "loss": 0.2413, "step": 1380 }, { "epoch": 7.43, "grad_norm": 1.345258355140686, "learning_rate": 0.0012860962566844918, "loss": 0.2444, "step": 1390 }, { "epoch": 7.49, "grad_norm": 1.354202389717102, "learning_rate": 0.0012593582887700536, "loss": 0.2288, "step": 1400 }, { "epoch": 7.54, "grad_norm": 0.983450174331665, "learning_rate": 0.0012326203208556149, "loss": 0.2995, "step": 1410 }, { "epoch": 7.59, "grad_norm": 1.7251689434051514, "learning_rate": 0.0012058823529411764, "loss": 0.2898, "step": 1420 }, { "epoch": 7.65, "grad_norm": 1.4366217851638794, "learning_rate": 0.001179144385026738, "loss": 0.2509, "step": 1430 }, { "epoch": 7.7, "grad_norm": 1.6491020917892456, "learning_rate": 0.0011524064171122995, "loss": 0.2191, "step": 1440 }, { "epoch": 7.75, "grad_norm": 1.4462454319000244, "learning_rate": 0.001125668449197861, "loss": 0.2307, "step": 1450 }, { "epoch": 7.81, "grad_norm": 1.5503740310668945, "learning_rate": 0.0010989304812834225, "loss": 0.2167, "step": 1460 }, { "epoch": 7.86, "grad_norm": 1.5065810680389404, "learning_rate": 0.001072192513368984, "loss": 0.3377, "step": 1470 }, { "epoch": 7.91, "grad_norm": 1.3696374893188477, "learning_rate": 0.0010454545454545454, "loss": 0.24, "step": 1480 }, { "epoch": 7.97, "grad_norm": 0.9576804041862488, "learning_rate": 0.001018716577540107, "loss": 0.2214, "step": 1490 }, { "epoch": 8.0, "eval_accuracy": 0.9649532710280374, "eval_f1": 0.9609815836403053, "eval_loss": 0.11309263855218887, "eval_precision": 0.9642473014777337, "eval_recall": 0.9584474051621633, "eval_runtime": 19.6442, "eval_samples_per_second": 87.15, "eval_steps_per_second": 5.447, "step": 1496 }, { "epoch": 8.02, "grad_norm": 1.6305640935897827, "learning_rate": 0.0009919786096256684, "loss": 0.2645, "step": 1500 }, { "epoch": 8.07, "grad_norm": 1.0711798667907715, "learning_rate": 0.00096524064171123, "loss": 0.2063, "step": 1510 }, { "epoch": 8.13, "grad_norm": 1.2606171369552612, "learning_rate": 0.0009385026737967915, "loss": 0.1904, "step": 1520 }, { "epoch": 8.18, "grad_norm": 0.8554580807685852, "learning_rate": 0.0009117647058823529, "loss": 0.2078, "step": 1530 }, { "epoch": 8.24, "grad_norm": 1.0638494491577148, "learning_rate": 0.0008850267379679144, "loss": 0.2129, "step": 1540 }, { "epoch": 8.29, "grad_norm": 1.4322021007537842, "learning_rate": 0.000858288770053476, "loss": 0.2761, "step": 1550 }, { "epoch": 8.34, "grad_norm": 1.2639697790145874, "learning_rate": 0.0008315508021390375, "loss": 0.1979, "step": 1560 }, { "epoch": 8.4, "grad_norm": 1.108430027961731, "learning_rate": 0.0008048128342245989, "loss": 0.2051, "step": 1570 }, { "epoch": 8.45, "grad_norm": 2.08953857421875, "learning_rate": 0.0007780748663101605, "loss": 0.2306, "step": 1580 }, { "epoch": 8.5, "grad_norm": 1.464694857597351, "learning_rate": 0.000751336898395722, "loss": 0.1992, "step": 1590 }, { "epoch": 8.56, "grad_norm": 1.4773173332214355, "learning_rate": 0.0007245989304812835, "loss": 0.1764, "step": 1600 }, { "epoch": 8.61, "grad_norm": 2.048029661178589, "learning_rate": 0.000697860962566845, "loss": 0.237, "step": 1610 }, { "epoch": 8.66, "grad_norm": 1.0951212644577026, "learning_rate": 0.0006711229946524064, "loss": 0.1821, "step": 1620 }, { "epoch": 8.72, "grad_norm": 1.084712028503418, "learning_rate": 0.0006443850267379679, "loss": 0.1947, "step": 1630 }, { "epoch": 8.77, "grad_norm": 1.007285714149475, "learning_rate": 0.0006176470588235294, "loss": 0.2014, "step": 1640 }, { "epoch": 8.82, "grad_norm": 1.0643844604492188, "learning_rate": 0.0005909090909090909, "loss": 0.2411, "step": 1650 }, { "epoch": 8.88, "grad_norm": 2.0171964168548584, "learning_rate": 0.0005641711229946525, "loss": 0.2297, "step": 1660 }, { "epoch": 8.93, "grad_norm": 0.8814995884895325, "learning_rate": 0.0005374331550802139, "loss": 0.2052, "step": 1670 }, { "epoch": 8.98, "grad_norm": 1.338088035583496, "learning_rate": 0.0005106951871657754, "loss": 0.1928, "step": 1680 }, { "epoch": 9.0, "eval_accuracy": 0.9649532710280374, "eval_f1": 0.9624133353031232, "eval_loss": 0.09664417803287506, "eval_precision": 0.9632215980141733, "eval_recall": 0.9628324486352646, "eval_runtime": 19.7505, "eval_samples_per_second": 86.681, "eval_steps_per_second": 5.418, "step": 1683 }, { "epoch": 9.04, "grad_norm": 1.1753814220428467, "learning_rate": 0.0004839572192513369, "loss": 0.1862, "step": 1690 }, { "epoch": 9.09, "grad_norm": 0.9707505702972412, "learning_rate": 0.0004572192513368984, "loss": 0.2182, "step": 1700 }, { "epoch": 9.14, "grad_norm": 0.9967671632766724, "learning_rate": 0.0004304812834224599, "loss": 0.1923, "step": 1710 }, { "epoch": 9.2, "grad_norm": 1.496031641960144, "learning_rate": 0.00040374331550802143, "loss": 0.2105, "step": 1720 }, { "epoch": 9.25, "grad_norm": 0.8774816393852234, "learning_rate": 0.00037700534759358285, "loss": 0.1969, "step": 1730 }, { "epoch": 9.3, "grad_norm": 0.6063610315322876, "learning_rate": 0.0003502673796791444, "loss": 0.1577, "step": 1740 }, { "epoch": 9.36, "grad_norm": 0.8216743469238281, "learning_rate": 0.0003235294117647059, "loss": 0.2064, "step": 1750 }, { "epoch": 9.41, "grad_norm": 0.7338688373565674, "learning_rate": 0.0002967914438502674, "loss": 0.1793, "step": 1760 }, { "epoch": 9.47, "grad_norm": 0.910650372505188, "learning_rate": 0.00027005347593582886, "loss": 0.194, "step": 1770 }, { "epoch": 9.52, "grad_norm": 0.7778304219245911, "learning_rate": 0.00024331550802139036, "loss": 0.2203, "step": 1780 }, { "epoch": 9.57, "grad_norm": 1.0693227052688599, "learning_rate": 0.00021657754010695186, "loss": 0.1718, "step": 1790 }, { "epoch": 9.63, "grad_norm": 1.4808011054992676, "learning_rate": 0.0001898395721925134, "loss": 0.1696, "step": 1800 }, { "epoch": 9.68, "grad_norm": 0.8625634908676147, "learning_rate": 0.0001631016042780749, "loss": 0.1875, "step": 1810 }, { "epoch": 9.73, "grad_norm": 1.1236218214035034, "learning_rate": 0.00013636363636363637, "loss": 0.1772, "step": 1820 }, { "epoch": 9.79, "grad_norm": 1.027061939239502, "learning_rate": 0.00010962566844919787, "loss": 0.2274, "step": 1830 }, { "epoch": 9.84, "grad_norm": 0.977976381778717, "learning_rate": 8.288770053475936e-05, "loss": 0.1672, "step": 1840 }, { "epoch": 9.89, "grad_norm": 0.957969069480896, "learning_rate": 5.614973262032086e-05, "loss": 0.1966, "step": 1850 }, { "epoch": 9.95, "grad_norm": 0.6182002425193787, "learning_rate": 2.9411764705882354e-05, "loss": 0.1546, "step": 1860 }, { "epoch": 10.0, "grad_norm": 1.8023217916488647, "learning_rate": 2.6737967914438504e-06, "loss": 0.1901, "step": 1870 }, { "epoch": 10.0, "eval_accuracy": 0.9713785046728972, "eval_f1": 0.9692014832223894, "eval_loss": 0.07747028768062592, "eval_precision": 0.968992240300534, "eval_recall": 0.9698888041231651, "eval_runtime": 19.6225, "eval_samples_per_second": 87.247, "eval_steps_per_second": 5.453, "step": 1870 }, { "epoch": 10.0, "step": 1870, "total_flos": 2.1188849626596557e+19, "train_loss": 0.34750947773775315, "train_runtime": 3122.2212, "train_samples_per_second": 38.303, "train_steps_per_second": 0.599 } ], "logging_steps": 10, "max_steps": 1870, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.1188849626596557e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }