|
{ |
|
"best_metric": 0.9713785046728972, |
|
"best_model_checkpoint": "swin-large-patch4-window7-224-in22k-finetuned-lora-medmnistv2/checkpoint-1870", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 1870, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.163198709487915, |
|
"learning_rate": 0.004973262032085562, |
|
"loss": 1.5101, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.4286870956420898, |
|
"learning_rate": 0.004946524064171123, |
|
"loss": 0.8667, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.847931981086731, |
|
"learning_rate": 0.004919786096256685, |
|
"loss": 0.7414, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.5748757123947144, |
|
"learning_rate": 0.004893048128342246, |
|
"loss": 0.755, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.017432928085327, |
|
"learning_rate": 0.004866310160427808, |
|
"loss": 0.6683, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.5988194942474365, |
|
"learning_rate": 0.004839572192513369, |
|
"loss": 0.7084, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.7127466201782227, |
|
"learning_rate": 0.004812834224598931, |
|
"loss": 0.6459, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.1388797760009766, |
|
"learning_rate": 0.004786096256684492, |
|
"loss": 0.7116, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.5939793586730957, |
|
"learning_rate": 0.004759358288770054, |
|
"loss": 0.5753, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.463460087776184, |
|
"learning_rate": 0.004732620320855615, |
|
"loss": 0.5938, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9902774095535278, |
|
"learning_rate": 0.004705882352941177, |
|
"loss": 0.5525, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.881441593170166, |
|
"learning_rate": 0.004679144385026738, |
|
"loss": 0.5788, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.161348581314087, |
|
"learning_rate": 0.0046524064171123, |
|
"loss": 0.5378, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.5160846710205078, |
|
"learning_rate": 0.0046256684491978615, |
|
"loss": 0.479, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.4215080738067627, |
|
"learning_rate": 0.004598930481283423, |
|
"loss": 0.5123, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.2568920850753784, |
|
"learning_rate": 0.004572192513368984, |
|
"loss": 0.5499, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.9570059180259705, |
|
"learning_rate": 0.004545454545454545, |
|
"loss": 0.4845, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.3021810054779053, |
|
"learning_rate": 0.004518716577540107, |
|
"loss": 0.5141, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9065420560747663, |
|
"eval_f1": 0.8872552707825272, |
|
"eval_loss": 0.2832600176334381, |
|
"eval_precision": 0.8954019032094056, |
|
"eval_recall": 0.8949326095168356, |
|
"eval_runtime": 19.6658, |
|
"eval_samples_per_second": 87.054, |
|
"eval_steps_per_second": 5.441, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 1.8552567958831787, |
|
"learning_rate": 0.004491978609625669, |
|
"loss": 0.4441, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 2.53872013092041, |
|
"learning_rate": 0.00446524064171123, |
|
"loss": 0.4436, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.3826777935028076, |
|
"learning_rate": 0.004438502673796791, |
|
"loss": 0.4632, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 2.2216227054595947, |
|
"learning_rate": 0.004411764705882353, |
|
"loss": 0.4429, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 1.8521422147750854, |
|
"learning_rate": 0.004385026737967914, |
|
"loss": 0.4472, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 2.058058977127075, |
|
"learning_rate": 0.00436096256684492, |
|
"loss": 0.4757, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 1.1437183618545532, |
|
"learning_rate": 0.004334224598930481, |
|
"loss": 0.3436, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 1.761400580406189, |
|
"learning_rate": 0.0043074866310160425, |
|
"loss": 0.4958, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.4134129285812378, |
|
"learning_rate": 0.004280748663101605, |
|
"loss": 0.4519, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.7341545820236206, |
|
"learning_rate": 0.004254010695187166, |
|
"loss": 0.528, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 2.980020761489868, |
|
"learning_rate": 0.004227272727272727, |
|
"loss": 0.5021, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.6755030751228333, |
|
"learning_rate": 0.004200534759358289, |
|
"loss": 0.4601, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 1.8686202764511108, |
|
"learning_rate": 0.00417379679144385, |
|
"loss": 0.4433, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 1.371077299118042, |
|
"learning_rate": 0.004147058823529412, |
|
"loss": 0.4323, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.0771093368530273, |
|
"learning_rate": 0.004120320855614973, |
|
"loss": 0.4251, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 1.185023546218872, |
|
"learning_rate": 0.004093582887700535, |
|
"loss": 0.4881, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 0.9843281507492065, |
|
"learning_rate": 0.004066844919786096, |
|
"loss": 0.4483, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 1.6477869749069214, |
|
"learning_rate": 0.004040106951871658, |
|
"loss": 0.4956, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 1.7044633626937866, |
|
"learning_rate": 0.004013368983957219, |
|
"loss": 0.4176, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9310747663551402, |
|
"eval_f1": 0.9182291375322846, |
|
"eval_loss": 0.198581263422966, |
|
"eval_precision": 0.924344363515161, |
|
"eval_recall": 0.9209393532374213, |
|
"eval_runtime": 19.6732, |
|
"eval_samples_per_second": 87.022, |
|
"eval_steps_per_second": 5.439, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 1.662027359008789, |
|
"learning_rate": 0.003986631016042781, |
|
"loss": 0.4022, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 1.188351035118103, |
|
"learning_rate": 0.003959893048128342, |
|
"loss": 0.3758, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 2.2225048542022705, |
|
"learning_rate": 0.003933155080213904, |
|
"loss": 0.4491, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 1.683356761932373, |
|
"learning_rate": 0.0039064171122994654, |
|
"loss": 0.3647, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 1.7646687030792236, |
|
"learning_rate": 0.0038796791443850265, |
|
"loss": 0.4666, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 2.173644781112671, |
|
"learning_rate": 0.0038529411764705885, |
|
"loss": 0.4314, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.8064551949501038, |
|
"learning_rate": 0.00382620320855615, |
|
"loss": 0.3944, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"grad_norm": 0.9698677062988281, |
|
"learning_rate": 0.003799465240641711, |
|
"loss": 0.4314, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 0.9321346879005432, |
|
"learning_rate": 0.0037727272727272726, |
|
"loss": 0.467, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 2.6592769622802734, |
|
"learning_rate": 0.003745989304812834, |
|
"loss": 0.4024, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 1.7124016284942627, |
|
"learning_rate": 0.003719251336898396, |
|
"loss": 0.3283, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 3.178034543991089, |
|
"learning_rate": 0.0036925133689839572, |
|
"loss": 0.4377, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 1.2681751251220703, |
|
"learning_rate": 0.0036657754010695188, |
|
"loss": 0.3866, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 1.1923668384552002, |
|
"learning_rate": 0.0036390374331550803, |
|
"loss": 0.3366, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 1.499803066253662, |
|
"learning_rate": 0.0036122994652406414, |
|
"loss": 0.4578, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.887222409248352, |
|
"learning_rate": 0.0035855614973262034, |
|
"loss": 0.4189, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"grad_norm": 1.3592134714126587, |
|
"learning_rate": 0.003558823529411765, |
|
"loss": 0.4008, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 3.0257527828216553, |
|
"learning_rate": 0.0035320855614973264, |
|
"loss": 0.3774, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 1.093493103981018, |
|
"learning_rate": 0.0035053475935828875, |
|
"loss": 0.3454, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9503504672897196, |
|
"eval_f1": 0.9402807880914787, |
|
"eval_loss": 0.15674300491809845, |
|
"eval_precision": 0.9426615409260363, |
|
"eval_recall": 0.9397047025483766, |
|
"eval_runtime": 19.5644, |
|
"eval_samples_per_second": 87.506, |
|
"eval_steps_per_second": 5.469, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 1.7053543329238892, |
|
"learning_rate": 0.003478609625668449, |
|
"loss": 0.3776, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 1.5041882991790771, |
|
"learning_rate": 0.003451871657754011, |
|
"loss": 0.4058, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 1.3619967699050903, |
|
"learning_rate": 0.0034251336898395725, |
|
"loss": 0.3646, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 1.1415998935699463, |
|
"learning_rate": 0.0033983957219251336, |
|
"loss": 0.4906, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"grad_norm": 1.6870795488357544, |
|
"learning_rate": 0.003371657754010695, |
|
"loss": 0.3828, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 1.0538561344146729, |
|
"learning_rate": 0.0033449197860962567, |
|
"loss": 0.3728, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"grad_norm": 2.340454339981079, |
|
"learning_rate": 0.0033181818181818186, |
|
"loss": 0.3809, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"grad_norm": 2.317230224609375, |
|
"learning_rate": 0.0032914438502673797, |
|
"loss": 0.3391, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 1.242281436920166, |
|
"learning_rate": 0.0032647058823529413, |
|
"loss": 0.4091, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 1.23116934299469, |
|
"learning_rate": 0.003237967914438503, |
|
"loss": 0.3592, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"grad_norm": 1.117090106010437, |
|
"learning_rate": 0.003211229946524064, |
|
"loss": 0.3867, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 1.0917716026306152, |
|
"learning_rate": 0.0031844919786096254, |
|
"loss": 0.4386, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 1.2080508470535278, |
|
"learning_rate": 0.0031577540106951874, |
|
"loss": 0.3466, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 1.695580244064331, |
|
"learning_rate": 0.003131016042780749, |
|
"loss": 0.3147, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 1.1604491472244263, |
|
"learning_rate": 0.00310427807486631, |
|
"loss": 0.3585, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 1.8931636810302734, |
|
"learning_rate": 0.0030775401069518715, |
|
"loss": 0.3578, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 1.4620869159698486, |
|
"learning_rate": 0.003050802139037433, |
|
"loss": 0.3522, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 1.3944414854049683, |
|
"learning_rate": 0.003024064171122995, |
|
"loss": 0.3228, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9357476635514018, |
|
"eval_f1": 0.9283408808159395, |
|
"eval_loss": 0.1848856657743454, |
|
"eval_precision": 0.9231661406901872, |
|
"eval_recall": 0.9426484043891363, |
|
"eval_runtime": 19.683, |
|
"eval_samples_per_second": 86.979, |
|
"eval_steps_per_second": 5.436, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"grad_norm": 1.7614619731903076, |
|
"learning_rate": 0.002997326203208556, |
|
"loss": 0.3463, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 2.866691827774048, |
|
"learning_rate": 0.0029705882352941177, |
|
"loss": 0.3431, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 3.0871615409851074, |
|
"learning_rate": 0.002943850267379679, |
|
"loss": 0.4329, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 1.3399722576141357, |
|
"learning_rate": 0.0029171122994652403, |
|
"loss": 0.3992, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"grad_norm": 1.440559983253479, |
|
"learning_rate": 0.0028903743315508022, |
|
"loss": 0.3333, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"grad_norm": 1.4606270790100098, |
|
"learning_rate": 0.0028636363636363638, |
|
"loss": 0.3108, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"grad_norm": 2.4641544818878174, |
|
"learning_rate": 0.0028368983957219253, |
|
"loss": 0.3436, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 1.9653208255767822, |
|
"learning_rate": 0.0028101604278074864, |
|
"loss": 0.2766, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"grad_norm": 1.0840091705322266, |
|
"learning_rate": 0.002783422459893048, |
|
"loss": 0.2568, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"grad_norm": 1.0625332593917847, |
|
"learning_rate": 0.00275668449197861, |
|
"loss": 0.3366, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 0.9171143174171448, |
|
"learning_rate": 0.0027299465240641714, |
|
"loss": 0.339, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"grad_norm": 1.6296868324279785, |
|
"learning_rate": 0.0027032085561497325, |
|
"loss": 0.359, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"grad_norm": 1.949312448501587, |
|
"learning_rate": 0.002676470588235294, |
|
"loss": 0.3529, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 1.6241270303726196, |
|
"learning_rate": 0.0026497326203208556, |
|
"loss": 0.3364, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"grad_norm": 2.172145366668701, |
|
"learning_rate": 0.0026229946524064175, |
|
"loss": 0.3374, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"grad_norm": 3.377912998199463, |
|
"learning_rate": 0.0025962566844919786, |
|
"loss": 0.3555, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"grad_norm": 1.194082260131836, |
|
"learning_rate": 0.00256951871657754, |
|
"loss": 0.3354, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"grad_norm": 1.774932861328125, |
|
"learning_rate": 0.0025427807486631017, |
|
"loss": 0.3728, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"grad_norm": 0.9065486192703247, |
|
"learning_rate": 0.002516042780748663, |
|
"loss": 0.3382, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9398364485981309, |
|
"eval_f1": 0.9320964504504674, |
|
"eval_loss": 0.16266803443431854, |
|
"eval_precision": 0.9301560138584124, |
|
"eval_recall": 0.9396981551324834, |
|
"eval_runtime": 19.6531, |
|
"eval_samples_per_second": 87.111, |
|
"eval_steps_per_second": 5.444, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"grad_norm": 0.8373203873634338, |
|
"learning_rate": 0.0024893048128342248, |
|
"loss": 0.3115, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"grad_norm": 1.6470876932144165, |
|
"learning_rate": 0.002462566844919786, |
|
"loss": 0.3746, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"grad_norm": 2.556999444961548, |
|
"learning_rate": 0.002435828877005348, |
|
"loss": 0.3411, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"grad_norm": 1.753217101097107, |
|
"learning_rate": 0.002409090909090909, |
|
"loss": 0.3095, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"grad_norm": 2.667759895324707, |
|
"learning_rate": 0.0023823529411764704, |
|
"loss": 0.3358, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"grad_norm": 1.6711212396621704, |
|
"learning_rate": 0.002355614973262032, |
|
"loss": 0.3263, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"grad_norm": 1.8793816566467285, |
|
"learning_rate": 0.0023288770053475935, |
|
"loss": 0.3245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"grad_norm": 1.3059521913528442, |
|
"learning_rate": 0.002302139037433155, |
|
"loss": 0.2904, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 1.765958309173584, |
|
"learning_rate": 0.0022754010695187166, |
|
"loss": 0.3424, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"grad_norm": 0.9322473406791687, |
|
"learning_rate": 0.002248663101604278, |
|
"loss": 0.3716, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"grad_norm": 2.082515239715576, |
|
"learning_rate": 0.0022219251336898396, |
|
"loss": 0.2967, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"grad_norm": 1.6903836727142334, |
|
"learning_rate": 0.002195187165775401, |
|
"loss": 0.3244, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"grad_norm": 1.1631466150283813, |
|
"learning_rate": 0.0021684491978609627, |
|
"loss": 0.3141, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"grad_norm": 2.086376428604126, |
|
"learning_rate": 0.002141711229946524, |
|
"loss": 0.3211, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"grad_norm": 1.709187626838684, |
|
"learning_rate": 0.0021149732620320857, |
|
"loss": 0.3039, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"grad_norm": 1.7365305423736572, |
|
"learning_rate": 0.0020882352941176473, |
|
"loss": 0.2937, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"grad_norm": 1.2648741006851196, |
|
"learning_rate": 0.0020614973262032084, |
|
"loss": 0.2951, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 1.2121895551681519, |
|
"learning_rate": 0.00203475935828877, |
|
"loss": 0.242, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"grad_norm": 1.6397563219070435, |
|
"learning_rate": 0.0020080213903743314, |
|
"loss": 0.3363, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9509345794392523, |
|
"eval_f1": 0.9456282248184136, |
|
"eval_loss": 0.14138737320899963, |
|
"eval_precision": 0.9497944760971885, |
|
"eval_recall": 0.9441674024122191, |
|
"eval_runtime": 19.937, |
|
"eval_samples_per_second": 85.87, |
|
"eval_steps_per_second": 5.367, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 1.3460767269134521, |
|
"learning_rate": 0.001981283422459893, |
|
"loss": 0.3134, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"grad_norm": 1.2124683856964111, |
|
"learning_rate": 0.0019545454545454545, |
|
"loss": 0.3028, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"grad_norm": 0.8806934952735901, |
|
"learning_rate": 0.001927807486631016, |
|
"loss": 0.2589, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"grad_norm": 1.059187889099121, |
|
"learning_rate": 0.0019010695187165775, |
|
"loss": 0.2888, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"grad_norm": 2.5121827125549316, |
|
"learning_rate": 0.001874331550802139, |
|
"loss": 0.2741, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"grad_norm": 1.0052329301834106, |
|
"learning_rate": 0.0018475935828877006, |
|
"loss": 0.3519, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"grad_norm": 1.4301072359085083, |
|
"learning_rate": 0.0018208556149732621, |
|
"loss": 0.2937, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"grad_norm": 1.09031343460083, |
|
"learning_rate": 0.0017941176470588236, |
|
"loss": 0.2252, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"grad_norm": 1.9657083749771118, |
|
"learning_rate": 0.001767379679144385, |
|
"loss": 0.267, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"grad_norm": 3.7427196502685547, |
|
"learning_rate": 0.0017406417112299467, |
|
"loss": 0.2493, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"grad_norm": 1.7291096448898315, |
|
"learning_rate": 0.001713903743315508, |
|
"loss": 0.2558, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"grad_norm": 2.8834567070007324, |
|
"learning_rate": 0.0016871657754010698, |
|
"loss": 0.3167, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"grad_norm": 1.6702009439468384, |
|
"learning_rate": 0.001660427807486631, |
|
"loss": 0.274, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"grad_norm": 1.7623697519302368, |
|
"learning_rate": 0.0016336898395721924, |
|
"loss": 0.2481, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"grad_norm": 1.8855972290039062, |
|
"learning_rate": 0.0016069518716577541, |
|
"loss": 0.2424, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"grad_norm": 1.7909148931503296, |
|
"learning_rate": 0.0015802139037433154, |
|
"loss": 0.2361, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 1.424047589302063, |
|
"learning_rate": 0.001553475935828877, |
|
"loss": 0.2834, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"grad_norm": 1.3470966815948486, |
|
"learning_rate": 0.0015267379679144385, |
|
"loss": 0.2981, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9544392523364486, |
|
"eval_f1": 0.9480272544883982, |
|
"eval_loss": 0.11172817647457123, |
|
"eval_precision": 0.9458066711610336, |
|
"eval_recall": 0.9541586489707353, |
|
"eval_runtime": 19.6986, |
|
"eval_samples_per_second": 86.91, |
|
"eval_steps_per_second": 5.432, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"grad_norm": 1.9716545343399048, |
|
"learning_rate": 0.0015, |
|
"loss": 0.2591, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 2.347787618637085, |
|
"learning_rate": 0.0014732620320855616, |
|
"loss": 0.2324, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"grad_norm": 1.5514649152755737, |
|
"learning_rate": 0.001446524064171123, |
|
"loss": 0.2163, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"grad_norm": 3.073544979095459, |
|
"learning_rate": 0.0014197860962566844, |
|
"loss": 0.2889, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"grad_norm": 1.5972115993499756, |
|
"learning_rate": 0.0013930481283422461, |
|
"loss": 0.2589, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"grad_norm": 1.8408401012420654, |
|
"learning_rate": 0.0013663101604278075, |
|
"loss": 0.2333, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"grad_norm": 1.3704335689544678, |
|
"learning_rate": 0.0013395721925133692, |
|
"loss": 0.2103, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"grad_norm": 3.6621859073638916, |
|
"learning_rate": 0.0013128342245989305, |
|
"loss": 0.2413, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"grad_norm": 1.345258355140686, |
|
"learning_rate": 0.0012860962566844918, |
|
"loss": 0.2444, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 1.354202389717102, |
|
"learning_rate": 0.0012593582887700536, |
|
"loss": 0.2288, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"grad_norm": 0.983450174331665, |
|
"learning_rate": 0.0012326203208556149, |
|
"loss": 0.2995, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"grad_norm": 1.7251689434051514, |
|
"learning_rate": 0.0012058823529411764, |
|
"loss": 0.2898, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"grad_norm": 1.4366217851638794, |
|
"learning_rate": 0.001179144385026738, |
|
"loss": 0.2509, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"grad_norm": 1.6491020917892456, |
|
"learning_rate": 0.0011524064171122995, |
|
"loss": 0.2191, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"grad_norm": 1.4462454319000244, |
|
"learning_rate": 0.001125668449197861, |
|
"loss": 0.2307, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"grad_norm": 1.5503740310668945, |
|
"learning_rate": 0.0010989304812834225, |
|
"loss": 0.2167, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"grad_norm": 1.5065810680389404, |
|
"learning_rate": 0.001072192513368984, |
|
"loss": 0.3377, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"grad_norm": 1.3696374893188477, |
|
"learning_rate": 0.0010454545454545454, |
|
"loss": 0.24, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"grad_norm": 0.9576804041862488, |
|
"learning_rate": 0.001018716577540107, |
|
"loss": 0.2214, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9649532710280374, |
|
"eval_f1": 0.9609815836403053, |
|
"eval_loss": 0.11309263855218887, |
|
"eval_precision": 0.9642473014777337, |
|
"eval_recall": 0.9584474051621633, |
|
"eval_runtime": 19.6442, |
|
"eval_samples_per_second": 87.15, |
|
"eval_steps_per_second": 5.447, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"grad_norm": 1.6305640935897827, |
|
"learning_rate": 0.0009919786096256684, |
|
"loss": 0.2645, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"grad_norm": 1.0711798667907715, |
|
"learning_rate": 0.00096524064171123, |
|
"loss": 0.2063, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"grad_norm": 1.2606171369552612, |
|
"learning_rate": 0.0009385026737967915, |
|
"loss": 0.1904, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"grad_norm": 0.8554580807685852, |
|
"learning_rate": 0.0009117647058823529, |
|
"loss": 0.2078, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"grad_norm": 1.0638494491577148, |
|
"learning_rate": 0.0008850267379679144, |
|
"loss": 0.2129, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"grad_norm": 1.4322021007537842, |
|
"learning_rate": 0.000858288770053476, |
|
"loss": 0.2761, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"grad_norm": 1.2639697790145874, |
|
"learning_rate": 0.0008315508021390375, |
|
"loss": 0.1979, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 1.108430027961731, |
|
"learning_rate": 0.0008048128342245989, |
|
"loss": 0.2051, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"grad_norm": 2.08953857421875, |
|
"learning_rate": 0.0007780748663101605, |
|
"loss": 0.2306, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"grad_norm": 1.464694857597351, |
|
"learning_rate": 0.000751336898395722, |
|
"loss": 0.1992, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"grad_norm": 1.4773173332214355, |
|
"learning_rate": 0.0007245989304812835, |
|
"loss": 0.1764, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"grad_norm": 2.048029661178589, |
|
"learning_rate": 0.000697860962566845, |
|
"loss": 0.237, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"grad_norm": 1.0951212644577026, |
|
"learning_rate": 0.0006711229946524064, |
|
"loss": 0.1821, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"grad_norm": 1.084712028503418, |
|
"learning_rate": 0.0006443850267379679, |
|
"loss": 0.1947, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"grad_norm": 1.007285714149475, |
|
"learning_rate": 0.0006176470588235294, |
|
"loss": 0.2014, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"grad_norm": 1.0643844604492188, |
|
"learning_rate": 0.0005909090909090909, |
|
"loss": 0.2411, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"grad_norm": 2.0171964168548584, |
|
"learning_rate": 0.0005641711229946525, |
|
"loss": 0.2297, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"grad_norm": 0.8814995884895325, |
|
"learning_rate": 0.0005374331550802139, |
|
"loss": 0.2052, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"grad_norm": 1.338088035583496, |
|
"learning_rate": 0.0005106951871657754, |
|
"loss": 0.1928, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9649532710280374, |
|
"eval_f1": 0.9624133353031232, |
|
"eval_loss": 0.09664417803287506, |
|
"eval_precision": 0.9632215980141733, |
|
"eval_recall": 0.9628324486352646, |
|
"eval_runtime": 19.7505, |
|
"eval_samples_per_second": 86.681, |
|
"eval_steps_per_second": 5.418, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 1.1753814220428467, |
|
"learning_rate": 0.0004839572192513369, |
|
"loss": 0.1862, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 0.9707505702972412, |
|
"learning_rate": 0.0004572192513368984, |
|
"loss": 0.2182, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"grad_norm": 0.9967671632766724, |
|
"learning_rate": 0.0004304812834224599, |
|
"loss": 0.1923, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"grad_norm": 1.496031641960144, |
|
"learning_rate": 0.00040374331550802143, |
|
"loss": 0.2105, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.8774816393852234, |
|
"learning_rate": 0.00037700534759358285, |
|
"loss": 0.1969, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"grad_norm": 0.6063610315322876, |
|
"learning_rate": 0.0003502673796791444, |
|
"loss": 0.1577, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"grad_norm": 0.8216743469238281, |
|
"learning_rate": 0.0003235294117647059, |
|
"loss": 0.2064, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.7338688373565674, |
|
"learning_rate": 0.0002967914438502674, |
|
"loss": 0.1793, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"grad_norm": 0.910650372505188, |
|
"learning_rate": 0.00027005347593582886, |
|
"loss": 0.194, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"grad_norm": 0.7778304219245911, |
|
"learning_rate": 0.00024331550802139036, |
|
"loss": 0.2203, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"grad_norm": 1.0693227052688599, |
|
"learning_rate": 0.00021657754010695186, |
|
"loss": 0.1718, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"grad_norm": 1.4808011054992676, |
|
"learning_rate": 0.0001898395721925134, |
|
"loss": 0.1696, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 0.8625634908676147, |
|
"learning_rate": 0.0001631016042780749, |
|
"loss": 0.1875, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"grad_norm": 1.1236218214035034, |
|
"learning_rate": 0.00013636363636363637, |
|
"loss": 0.1772, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"grad_norm": 1.027061939239502, |
|
"learning_rate": 0.00010962566844919787, |
|
"loss": 0.2274, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"grad_norm": 0.977976381778717, |
|
"learning_rate": 8.288770053475936e-05, |
|
"loss": 0.1672, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"grad_norm": 0.957969069480896, |
|
"learning_rate": 5.614973262032086e-05, |
|
"loss": 0.1966, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"grad_norm": 0.6182002425193787, |
|
"learning_rate": 2.9411764705882354e-05, |
|
"loss": 0.1546, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.8023217916488647, |
|
"learning_rate": 2.6737967914438504e-06, |
|
"loss": 0.1901, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9713785046728972, |
|
"eval_f1": 0.9692014832223894, |
|
"eval_loss": 0.07747028768062592, |
|
"eval_precision": 0.968992240300534, |
|
"eval_recall": 0.9698888041231651, |
|
"eval_runtime": 19.6225, |
|
"eval_samples_per_second": 87.247, |
|
"eval_steps_per_second": 5.453, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1870, |
|
"total_flos": 2.1188849626596557e+19, |
|
"train_loss": 0.34750947773775315, |
|
"train_runtime": 3122.2212, |
|
"train_samples_per_second": 38.303, |
|
"train_steps_per_second": 0.599 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1870, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2.1188849626596557e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|