|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-face-recognition/checkpoint-1488", |
|
"epoch": 8.0, |
|
"global_step": 2976, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.026845637583892e-06, |
|
"loss": 2.214, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.053691275167785e-06, |
|
"loss": 2.1532, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2080536912751678e-05, |
|
"loss": 2.0195, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.610738255033557e-05, |
|
"loss": 1.7918, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.0134228187919465e-05, |
|
"loss": 1.4794, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.4161073825503356e-05, |
|
"loss": 1.1516, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8187919463087248e-05, |
|
"loss": 0.8457, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.221476510067114e-05, |
|
"loss": 0.6145, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.6241610738255034e-05, |
|
"loss": 0.446, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.026845637583893e-05, |
|
"loss": 0.3573, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4295302013422824e-05, |
|
"loss": 0.289, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.832214765100671e-05, |
|
"loss": 0.2559, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 5.23489932885906e-05, |
|
"loss": 0.2186, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.6375838926174495e-05, |
|
"loss": 0.1936, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.04026845637584e-05, |
|
"loss": 0.181, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.442953020134228e-05, |
|
"loss": 0.1621, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.845637583892617e-05, |
|
"loss": 0.1518, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.248322147651007e-05, |
|
"loss": 0.1361, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.651006711409396e-05, |
|
"loss": 0.1268, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.053691275167786e-05, |
|
"loss": 0.1159, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.456375838926175e-05, |
|
"loss": 0.1088, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.859060402684565e-05, |
|
"loss": 0.1015, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.261744966442954e-05, |
|
"loss": 0.0947, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.664429530201342e-05, |
|
"loss": 0.087, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010067114093959731, |
|
"loss": 0.0823, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001046979865771812, |
|
"loss": 0.0772, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001087248322147651, |
|
"loss": 0.0694, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00011275167785234899, |
|
"loss": 0.0642, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00011677852348993289, |
|
"loss": 0.0592, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00011991038088125467, |
|
"loss": 0.0556, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.000119462285287528, |
|
"loss": 0.0516, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011901418969380135, |
|
"loss": 0.0679, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011856609410007469, |
|
"loss": 0.0582, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00011811799850634802, |
|
"loss": 0.0487, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00011766990291262137, |
|
"loss": 0.0462, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001172218073188947, |
|
"loss": 0.0411, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00011677371172516803, |
|
"loss": 0.0368, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.999957997311828, |
|
"eval_loss": 0.034596964716911316, |
|
"eval_runtime": 326.4848, |
|
"eval_samples_per_second": 72.922, |
|
"eval_steps_per_second": 1.139, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00011632561613144138, |
|
"loss": 0.0335, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00011587752053771471, |
|
"loss": 0.0316, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00011542942494398806, |
|
"loss": 0.0303, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00011498132935026138, |
|
"loss": 0.0285, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00011453323375653473, |
|
"loss": 0.0273, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00011408513816280807, |
|
"loss": 0.0258, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00011363704256908141, |
|
"loss": 0.0248, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00011318894697535474, |
|
"loss": 0.0257, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00011274085138162807, |
|
"loss": 0.0251, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00011229275578790142, |
|
"loss": 0.0225, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00011184466019417477, |
|
"loss": 0.0212, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00011139656460044809, |
|
"loss": 0.0214, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00011094846900672144, |
|
"loss": 0.0197, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00011050037341299477, |
|
"loss": 0.0203, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00011005227781926812, |
|
"loss": 0.0183, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00010960418222554145, |
|
"loss": 0.0202, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00010915608663181478, |
|
"loss": 0.0258, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00010870799103808813, |
|
"loss": 0.0198, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00010825989544436148, |
|
"loss": 0.0194, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00010781179985063481, |
|
"loss": 0.0171, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00010736370425690814, |
|
"loss": 0.0152, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00010691560866318148, |
|
"loss": 0.0162, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00010646751306945482, |
|
"loss": 0.0143, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00010601941747572817, |
|
"loss": 0.014, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00010557132188200149, |
|
"loss": 0.0132, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00010512322628827484, |
|
"loss": 0.0127, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00010467513069454817, |
|
"loss": 0.0123, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00010422703510082152, |
|
"loss": 0.0124, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00010377893950709485, |
|
"loss": 0.0118, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00010333084391336818, |
|
"loss": 0.0128, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00010288274831964153, |
|
"loss": 0.0139, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00010243465272591486, |
|
"loss": 0.0158, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001019865571321882, |
|
"loss": 0.0112, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010153846153846155, |
|
"loss": 0.0104, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010109036594473488, |
|
"loss": 0.0099, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010064227035100823, |
|
"loss": 0.0095, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010019417475728155, |
|
"loss": 0.0094, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.999957997311828, |
|
"eval_loss": 0.009242160245776176, |
|
"eval_runtime": 315.5591, |
|
"eval_samples_per_second": 75.447, |
|
"eval_steps_per_second": 1.179, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.974607916355489e-05, |
|
"loss": 0.009, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.929798356982824e-05, |
|
"loss": 0.0089, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.884988797610157e-05, |
|
"loss": 0.0086, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.84017923823749e-05, |
|
"loss": 0.0086, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.795369678864824e-05, |
|
"loss": 0.0082, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.750560119492159e-05, |
|
"loss": 0.0081, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.705750560119493e-05, |
|
"loss": 0.0081, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.660941000746825e-05, |
|
"loss": 0.0078, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 9.61613144137416e-05, |
|
"loss": 0.0077, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 9.571321882001495e-05, |
|
"loss": 0.0074, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 9.526512322628828e-05, |
|
"loss": 0.0073, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 9.481702763256161e-05, |
|
"loss": 0.0096, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 9.436893203883495e-05, |
|
"loss": 0.0078, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 9.39208364451083e-05, |
|
"loss": 0.0073, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 9.347274085138164e-05, |
|
"loss": 0.0069, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.302464525765496e-05, |
|
"loss": 0.0067, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.257654966392831e-05, |
|
"loss": 0.0066, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.212845407020164e-05, |
|
"loss": 0.0064, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.168035847647499e-05, |
|
"loss": 0.0063, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 9.123226288274832e-05, |
|
"loss": 0.0061, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.078416728902166e-05, |
|
"loss": 0.006, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 9.0336071695295e-05, |
|
"loss": 0.0059, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.988797610156834e-05, |
|
"loss": 0.0058, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.943988050784167e-05, |
|
"loss": 0.0057, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 8.899178491411502e-05, |
|
"loss": 0.0055, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 8.854368932038835e-05, |
|
"loss": 0.0056, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 8.80955937266617e-05, |
|
"loss": 0.0055, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.764749813293502e-05, |
|
"loss": 0.0053, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 8.719940253920836e-05, |
|
"loss": 0.0052, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 8.675130694548171e-05, |
|
"loss": 0.0052, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 8.630321135175504e-05, |
|
"loss": 0.0051, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 8.585511575802838e-05, |
|
"loss": 0.0051, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.540702016430171e-05, |
|
"loss": 0.005, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.495892457057506e-05, |
|
"loss": 0.0052, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.45108289768484e-05, |
|
"loss": 0.0049, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 8.406273338312172e-05, |
|
"loss": 0.0048, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 8.361463778939507e-05, |
|
"loss": 0.0046, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.999957997311828, |
|
"eval_loss": 0.004723448771983385, |
|
"eval_runtime": 316.511, |
|
"eval_samples_per_second": 75.22, |
|
"eval_steps_per_second": 1.175, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.316654219566842e-05, |
|
"loss": 0.0046, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.271844660194175e-05, |
|
"loss": 0.0046, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 8.22703510082151e-05, |
|
"loss": 0.0045, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 8.182225541448842e-05, |
|
"loss": 0.0044, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 8.137415982076177e-05, |
|
"loss": 0.0044, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.092606422703511e-05, |
|
"loss": 0.0043, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 8.047796863330845e-05, |
|
"loss": 0.0042, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 8.002987303958178e-05, |
|
"loss": 0.0041, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 7.958177744585511e-05, |
|
"loss": 0.0041, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 7.913368185212846e-05, |
|
"loss": 0.004, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.86855862584018e-05, |
|
"loss": 0.004, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.823749066467513e-05, |
|
"loss": 0.004, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 7.778939507094847e-05, |
|
"loss": 0.0039, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 7.73412994772218e-05, |
|
"loss": 0.0039, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 7.689320388349515e-05, |
|
"loss": 0.0038, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 7.644510828976849e-05, |
|
"loss": 0.0037, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 7.599701269604182e-05, |
|
"loss": 0.0036, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 7.554891710231517e-05, |
|
"loss": 0.0036, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 7.51008215085885e-05, |
|
"loss": 0.0036, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.465272591486183e-05, |
|
"loss": 0.0036, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 7.420463032113518e-05, |
|
"loss": 0.0035, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 7.375653472740851e-05, |
|
"loss": 0.0034, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 7.330843913368186e-05, |
|
"loss": 0.0034, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 7.286034353995518e-05, |
|
"loss": 0.0036, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 7.241224794622853e-05, |
|
"loss": 0.0035, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.196415235250188e-05, |
|
"loss": 0.0034, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 7.151605675877521e-05, |
|
"loss": 0.0033, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 7.106796116504854e-05, |
|
"loss": 0.0033, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 7.061986557132189e-05, |
|
"loss": 0.0032, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.017176997759522e-05, |
|
"loss": 0.0032, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 6.972367438386857e-05, |
|
"loss": 0.0031, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.927557879014189e-05, |
|
"loss": 0.0031, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.882748319641524e-05, |
|
"loss": 0.0031, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 6.837938760268858e-05, |
|
"loss": 0.003, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 6.793129200896192e-05, |
|
"loss": 0.0031, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 6.748319641523525e-05, |
|
"loss": 0.003, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 6.703510082150858e-05, |
|
"loss": 0.0029, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.002898953389376402, |
|
"eval_runtime": 315.5752, |
|
"eval_samples_per_second": 75.443, |
|
"eval_steps_per_second": 1.179, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 6.658700522778193e-05, |
|
"loss": 0.003, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 6.613890963405528e-05, |
|
"loss": 0.0029, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 6.56908140403286e-05, |
|
"loss": 0.0029, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 6.524271844660194e-05, |
|
"loss": 0.0028, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 6.479462285287528e-05, |
|
"loss": 0.0028, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 6.434652725914862e-05, |
|
"loss": 0.0028, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 6.389843166542196e-05, |
|
"loss": 0.0028, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 6.345033607169529e-05, |
|
"loss": 0.0027, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 6.300224047796864e-05, |
|
"loss": 0.0027, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 6.255414488424197e-05, |
|
"loss": 0.0027, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 6.21060492905153e-05, |
|
"loss": 0.0027, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 6.165795369678865e-05, |
|
"loss": 0.0026, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 6.120985810306199e-05, |
|
"loss": 0.0026, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 6.076176250933533e-05, |
|
"loss": 0.0026, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 6.031366691560866e-05, |
|
"loss": 0.0026, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 5.9865571321882e-05, |
|
"loss": 0.0025, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 5.941747572815534e-05, |
|
"loss": 0.0025, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.896938013442868e-05, |
|
"loss": 0.0025, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 5.852128454070202e-05, |
|
"loss": 0.0025, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 5.807318894697535e-05, |
|
"loss": 0.0024, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 5.762509335324869e-05, |
|
"loss": 0.0024, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 5.717699775952203e-05, |
|
"loss": 0.0032, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 5.672890216579537e-05, |
|
"loss": 0.003, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 5.6280806572068713e-05, |
|
"loss": 0.0028, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 5.583271097834205e-05, |
|
"loss": 0.0055, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.538461538461539e-05, |
|
"loss": 0.0036, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.493651979088873e-05, |
|
"loss": 0.0029, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.448842419716207e-05, |
|
"loss": 0.0059, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 5.40403286034354e-05, |
|
"loss": 0.0042, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.359223300970874e-05, |
|
"loss": 0.0031, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5.3144137415982074e-05, |
|
"loss": 0.0026, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 5.269604182225542e-05, |
|
"loss": 0.0025, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 5.2247946228528755e-05, |
|
"loss": 0.0025, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 5.1799850634802095e-05, |
|
"loss": 0.0023, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 5.135175504107543e-05, |
|
"loss": 0.0024, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 5.090365944734877e-05, |
|
"loss": 0.0023, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 5.045556385362211e-05, |
|
"loss": 0.0022, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 5.000746825989545e-05, |
|
"loss": 0.0022, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9999159946236559, |
|
"eval_loss": 0.0023123060818761587, |
|
"eval_runtime": 314.9106, |
|
"eval_samples_per_second": 75.602, |
|
"eval_steps_per_second": 1.181, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.955937266616878e-05, |
|
"loss": 0.0023, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 4.911127707244212e-05, |
|
"loss": 0.0021, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 4.866318147871546e-05, |
|
"loss": 0.0044, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.82150858849888e-05, |
|
"loss": 0.0021, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.7766990291262136e-05, |
|
"loss": 0.0022, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.7318894697535476e-05, |
|
"loss": 0.0021, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 4.687079910380881e-05, |
|
"loss": 0.0021, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.642270351008216e-05, |
|
"loss": 0.002, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 4.597460791635549e-05, |
|
"loss": 0.0023, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.552651232262883e-05, |
|
"loss": 0.002, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 4.5078416728902164e-05, |
|
"loss": 0.002, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 4.4630321135175504e-05, |
|
"loss": 0.0019, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.4182225541448844e-05, |
|
"loss": 0.0019, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 4.3734129947722184e-05, |
|
"loss": 0.0019, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.328603435399552e-05, |
|
"loss": 0.0019, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.283793876026886e-05, |
|
"loss": 0.0019, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 4.23898431665422e-05, |
|
"loss": 0.0019, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 4.194174757281554e-05, |
|
"loss": 0.0019, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 4.149365197908887e-05, |
|
"loss": 0.0019, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 4.104555638536221e-05, |
|
"loss": 0.0019, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 4.0597460791635545e-05, |
|
"loss": 0.0019, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.014936519790889e-05, |
|
"loss": 0.0019, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 3.9701269604182226e-05, |
|
"loss": 0.0018, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 3.9253174010455566e-05, |
|
"loss": 0.0018, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 3.88050784167289e-05, |
|
"loss": 0.0018, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 3.835698282300224e-05, |
|
"loss": 0.0018, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 3.790888722927558e-05, |
|
"loss": 0.0018, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 3.746079163554892e-05, |
|
"loss": 0.0018, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 3.701269604182225e-05, |
|
"loss": 0.0018, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 3.656460044809559e-05, |
|
"loss": 0.0017, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 3.6116504854368933e-05, |
|
"loss": 0.0018, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 3.5668409260642274e-05, |
|
"loss": 0.0017, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 3.522031366691561e-05, |
|
"loss": 0.0017, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 3.477221807318895e-05, |
|
"loss": 0.0017, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 3.432412247946228e-05, |
|
"loss": 0.0017, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.387602688573563e-05, |
|
"loss": 0.0017, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 3.342793129200896e-05, |
|
"loss": 0.0017, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.0016880695475265384, |
|
"eval_runtime": 314.9291, |
|
"eval_samples_per_second": 75.598, |
|
"eval_steps_per_second": 1.181, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 3.29798356982823e-05, |
|
"loss": 0.0017, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 3.2531740104555635e-05, |
|
"loss": 0.0017, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 3.2083644510828975e-05, |
|
"loss": 0.0016, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 3.1635548917102315e-05, |
|
"loss": 0.0017, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 3.1187453323375655e-05, |
|
"loss": 0.0016, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 3.0739357729648995e-05, |
|
"loss": 0.0016, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 3.0291262135922332e-05, |
|
"loss": 0.0016, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 2.984316654219567e-05, |
|
"loss": 0.0016, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 2.9395070948469006e-05, |
|
"loss": 0.0016, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 2.8946975354742346e-05, |
|
"loss": 0.0016, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 2.8498879761015683e-05, |
|
"loss": 0.0016, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 2.8050784167289023e-05, |
|
"loss": 0.0016, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 2.760268857356236e-05, |
|
"loss": 0.0016, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 2.71545929798357e-05, |
|
"loss": 0.0016, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 2.6706497386109037e-05, |
|
"loss": 0.0016, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 2.6258401792382373e-05, |
|
"loss": 0.0016, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 2.5810306198655713e-05, |
|
"loss": 0.0016, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 2.536221060492905e-05, |
|
"loss": 0.0016, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 2.491411501120239e-05, |
|
"loss": 0.0016, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 2.4466019417475727e-05, |
|
"loss": 0.0015, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 2.4017923823749067e-05, |
|
"loss": 0.0015, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 2.3569828230022404e-05, |
|
"loss": 0.0015, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 2.312173263629574e-05, |
|
"loss": 0.0015, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 2.267363704256908e-05, |
|
"loss": 0.0015, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 2.2225541448842418e-05, |
|
"loss": 0.0015, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 2.1777445855115758e-05, |
|
"loss": 0.0015, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 2.1329350261389095e-05, |
|
"loss": 0.0015, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 2.0881254667662435e-05, |
|
"loss": 0.0015, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 2.0433159073935772e-05, |
|
"loss": 0.0015, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.998506348020911e-05, |
|
"loss": 0.0015, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.953696788648245e-05, |
|
"loss": 0.0015, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.9088872292755786e-05, |
|
"loss": 0.0015, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.8640776699029126e-05, |
|
"loss": 0.0015, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.8192681105302466e-05, |
|
"loss": 0.0015, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.7744585511575806e-05, |
|
"loss": 0.0015, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 1.7296489917849143e-05, |
|
"loss": 0.0015, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.684839432412248e-05, |
|
"loss": 0.0015, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.001458011451177299, |
|
"eval_runtime": 317.9295, |
|
"eval_samples_per_second": 74.885, |
|
"eval_steps_per_second": 1.17, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.640029873039582e-05, |
|
"loss": 0.0015, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.5952203136669157e-05, |
|
"loss": 0.0015, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 1.5504107542942497e-05, |
|
"loss": 0.0014, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.5056011949215834e-05, |
|
"loss": 0.0014, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.460791635548917e-05, |
|
"loss": 0.0014, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.4159820761762509e-05, |
|
"loss": 0.0014, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1.3711725168035847e-05, |
|
"loss": 0.0014, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.3263629574309186e-05, |
|
"loss": 0.0014, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.2815533980582524e-05, |
|
"loss": 0.0014, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1.2367438386855863e-05, |
|
"loss": 0.0014, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.19193427931292e-05, |
|
"loss": 0.0014, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.147124719940254e-05, |
|
"loss": 0.0014, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.1023151605675878e-05, |
|
"loss": 0.0014, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 1.0575056011949217e-05, |
|
"loss": 0.0014, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.0126960418222555e-05, |
|
"loss": 0.0014, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 9.678864824495894e-06, |
|
"loss": 0.0014, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 0.0014, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 8.782673637042569e-06, |
|
"loss": 0.0014, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 8.334578043315908e-06, |
|
"loss": 0.0014, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 7.886482449589246e-06, |
|
"loss": 0.0014, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 7.4383868558625845e-06, |
|
"loss": 0.0014, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 6.990291262135923e-06, |
|
"loss": 0.0014, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 6.542195668409261e-06, |
|
"loss": 0.0014, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 6.094100074682599e-06, |
|
"loss": 0.0014, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 5.646004480955938e-06, |
|
"loss": 0.0014, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 5.197908887229276e-06, |
|
"loss": 0.0014, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 4.749813293502614e-06, |
|
"loss": 0.0014, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.301717699775952e-06, |
|
"loss": 0.0014, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 3.853622106049291e-06, |
|
"loss": 0.0014, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 3.4055265123226292e-06, |
|
"loss": 0.0014, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 2.9574309185959673e-06, |
|
"loss": 0.0014, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 2.5093353248693058e-06, |
|
"loss": 0.0014, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 2.061239731142644e-06, |
|
"loss": 0.0014, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1.613144137415982e-06, |
|
"loss": 0.0014, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.1650485436893204e-06, |
|
"loss": 0.0014, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 7.169529499626587e-07, |
|
"loss": 0.0014, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 2.68857356235997e-07, |
|
"loss": 0.0014, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.999957997311828, |
|
"eval_loss": 0.0014714967692270875, |
|
"eval_runtime": 317.4979, |
|
"eval_samples_per_second": 74.986, |
|
"eval_steps_per_second": 1.172, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 2976, |
|
"total_flos": 5.9038961296526475e+19, |
|
"train_loss": 0.05853422665912207, |
|
"train_runtime": 16155.4674, |
|
"train_samples_per_second": 47.156, |
|
"train_steps_per_second": 0.184 |
|
} |
|
], |
|
"max_steps": 2976, |
|
"num_train_epochs": 8, |
|
"total_flos": 5.9038961296526475e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|