|
{ |
|
"best_metric": 0.020064357668161392, |
|
"best_model_checkpoint": "./vit-base-mask-finetuned/checkpoint-3500", |
|
"epoch": 4.0, |
|
"global_step": 3608, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019944567627494457, |
|
"loss": 0.3036, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019889135254988916, |
|
"loss": 0.0608, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019833702882483372, |
|
"loss": 0.279, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019778270509977829, |
|
"loss": 0.0541, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019722838137472285, |
|
"loss": 0.0914, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001966740576496674, |
|
"loss": 0.054, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019611973392461197, |
|
"loss": 0.0358, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019556541019955653, |
|
"loss": 0.1087, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019501108647450112, |
|
"loss": 0.0825, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019445676274944569, |
|
"loss": 0.1983, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.8654060066740823, |
|
"eval_loss": 0.4739435613155365, |
|
"eval_runtime": 55.7772, |
|
"eval_samples_per_second": 64.471, |
|
"eval_steps_per_second": 8.068, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019390243902439025, |
|
"loss": 0.0989, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019334811529933484, |
|
"loss": 0.0927, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001927937915742794, |
|
"loss": 0.0285, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019223946784922396, |
|
"loss": 0.0902, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019168514412416852, |
|
"loss": 0.1752, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019113082039911309, |
|
"loss": 0.1193, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019057649667405765, |
|
"loss": 0.0224, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001900221729490022, |
|
"loss": 0.0087, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001894678492239468, |
|
"loss": 0.1051, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00018891352549889136, |
|
"loss": 0.067, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9883203559510567, |
|
"eval_loss": 0.052814140915870667, |
|
"eval_runtime": 60.4371, |
|
"eval_samples_per_second": 59.5, |
|
"eval_steps_per_second": 7.446, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00018835920177383592, |
|
"loss": 0.0395, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001878048780487805, |
|
"loss": 0.0902, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00018725055432372508, |
|
"loss": 0.0561, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00018669623059866964, |
|
"loss": 0.0737, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001861419068736142, |
|
"loss": 0.1694, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00018558758314855876, |
|
"loss": 0.0941, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00018503325942350332, |
|
"loss": 0.0564, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001844789356984479, |
|
"loss": 0.1086, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00018392461197339248, |
|
"loss": 0.0907, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00018337028824833704, |
|
"loss": 0.0313, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9872080088987765, |
|
"eval_loss": 0.04620526358485222, |
|
"eval_runtime": 58.7722, |
|
"eval_samples_per_second": 61.185, |
|
"eval_steps_per_second": 7.657, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001828159645232816, |
|
"loss": 0.0853, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00018226164079822616, |
|
"loss": 0.0177, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018170731707317075, |
|
"loss": 0.0603, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018115299334811531, |
|
"loss": 0.122, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018059866962305988, |
|
"loss": 0.0216, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018004434589800444, |
|
"loss": 0.0723, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000179490022172949, |
|
"loss": 0.1421, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017893569844789356, |
|
"loss": 0.0695, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017838137472283815, |
|
"loss": 0.0655, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00017782705099778271, |
|
"loss": 0.0628, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.9763626251390434, |
|
"eval_loss": 0.08295725286006927, |
|
"eval_runtime": 58.2224, |
|
"eval_samples_per_second": 61.763, |
|
"eval_steps_per_second": 7.729, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00017727272727272728, |
|
"loss": 0.0909, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017671840354767184, |
|
"loss": 0.0186, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00017616407982261643, |
|
"loss": 0.0316, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000175609756097561, |
|
"loss": 0.0313, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00017505543237250555, |
|
"loss": 0.0066, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00017450110864745014, |
|
"loss": 0.0602, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00017394678492239468, |
|
"loss": 0.0727, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00017339246119733924, |
|
"loss": 0.0624, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001728381374722838, |
|
"loss": 0.0064, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001722838137472284, |
|
"loss": 0.0042, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.9924916573971079, |
|
"eval_loss": 0.03670423477888107, |
|
"eval_runtime": 59.4878, |
|
"eval_samples_per_second": 60.449, |
|
"eval_steps_per_second": 7.565, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00017172949002217295, |
|
"loss": 0.0343, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00017117516629711752, |
|
"loss": 0.1144, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001706208425720621, |
|
"loss": 0.0439, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00017006651884700667, |
|
"loss": 0.0852, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00016951219512195123, |
|
"loss": 0.0988, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00016895787139689582, |
|
"loss": 0.0594, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00016840354767184035, |
|
"loss": 0.0483, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00016784922394678492, |
|
"loss": 0.011, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00016729490022172948, |
|
"loss": 0.0241, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00016674057649667407, |
|
"loss": 0.0881, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9866518353726362, |
|
"eval_loss": 0.05000607296824455, |
|
"eval_runtime": 58.7478, |
|
"eval_samples_per_second": 61.211, |
|
"eval_steps_per_second": 7.66, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00016618625277161863, |
|
"loss": 0.0463, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001656319290465632, |
|
"loss": 0.0324, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00016507760532150778, |
|
"loss": 0.1116, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00016452328159645234, |
|
"loss": 0.0725, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001639689578713969, |
|
"loss": 0.0402, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00016341463414634147, |
|
"loss": 0.0205, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00016286031042128606, |
|
"loss": 0.0759, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001623059866962306, |
|
"loss": 0.0658, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00016175166297117515, |
|
"loss": 0.0316, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00016119733924611974, |
|
"loss": 0.0047, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.9899888765294772, |
|
"eval_loss": 0.04604267328977585, |
|
"eval_runtime": 58.9442, |
|
"eval_samples_per_second": 61.007, |
|
"eval_steps_per_second": 7.634, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001606430155210643, |
|
"loss": 0.0808, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00016008869179600887, |
|
"loss": 0.0872, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00015953436807095346, |
|
"loss": 0.0779, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00015898004434589802, |
|
"loss": 0.0614, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00015842572062084258, |
|
"loss": 0.0226, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00015787139689578714, |
|
"loss": 0.0344, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00015731707317073173, |
|
"loss": 0.0044, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001567627494456763, |
|
"loss": 0.0934, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00015620842572062083, |
|
"loss": 0.0315, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00015565410199556542, |
|
"loss": 0.0755, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9888765294771968, |
|
"eval_loss": 0.04636429622769356, |
|
"eval_runtime": 58.4439, |
|
"eval_samples_per_second": 61.529, |
|
"eval_steps_per_second": 7.7, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00015509977827050998, |
|
"loss": 0.0159, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00015454545454545454, |
|
"loss": 0.1032, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001539911308203991, |
|
"loss": 0.0535, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001534368070953437, |
|
"loss": 0.1643, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00015288248337028826, |
|
"loss": 0.0909, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00015232815964523282, |
|
"loss": 0.1228, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001517738359201774, |
|
"loss": 0.0528, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015121951219512197, |
|
"loss": 0.0133, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00015066518847006653, |
|
"loss": 0.0948, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001501108647450111, |
|
"loss": 0.0258, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9880422691879867, |
|
"eval_loss": 0.037437278777360916, |
|
"eval_runtime": 58.2201, |
|
"eval_samples_per_second": 61.766, |
|
"eval_steps_per_second": 7.729, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00014955654101995566, |
|
"loss": 0.0117, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00014900221729490022, |
|
"loss": 0.0229, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00014844789356984478, |
|
"loss": 0.0058, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00014789356984478937, |
|
"loss": 0.0059, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00014733924611973393, |
|
"loss": 0.0039, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001467849223946785, |
|
"loss": 0.0459, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00014623059866962309, |
|
"loss": 0.0564, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00014567627494456765, |
|
"loss": 0.0832, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001451219512195122, |
|
"loss": 0.0419, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00014456762749445675, |
|
"loss": 0.0564, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.9874860956618465, |
|
"eval_loss": 0.05776744335889816, |
|
"eval_runtime": 58.8261, |
|
"eval_samples_per_second": 61.129, |
|
"eval_steps_per_second": 7.65, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00014401330376940133, |
|
"loss": 0.0534, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001434589800443459, |
|
"loss": 0.1231, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00014290465631929046, |
|
"loss": 0.087, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00014235033259423505, |
|
"loss": 0.0739, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001417960088691796, |
|
"loss": 0.0574, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00014124168514412417, |
|
"loss": 0.1169, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00014068736141906876, |
|
"loss": 0.017, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00014013303769401332, |
|
"loss": 0.0614, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0001395787139689579, |
|
"loss": 0.0093, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00013902439024390245, |
|
"loss": 0.0453, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.9844271412680756, |
|
"eval_loss": 0.05230843648314476, |
|
"eval_runtime": 59.332, |
|
"eval_samples_per_second": 60.608, |
|
"eval_steps_per_second": 7.584, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.000138470066518847, |
|
"loss": 0.0061, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013791574279379157, |
|
"loss": 0.0329, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00013736141906873614, |
|
"loss": 0.0105, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00013680709534368072, |
|
"loss": 0.0327, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001362527716186253, |
|
"loss": 0.0884, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00013569844789356985, |
|
"loss": 0.0061, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00013514412416851444, |
|
"loss": 0.0739, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.000134589800443459, |
|
"loss": 0.0185, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00013403547671840356, |
|
"loss": 0.0176, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00013348115299334812, |
|
"loss": 0.0656, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.9805339265850945, |
|
"eval_loss": 0.08926890790462494, |
|
"eval_runtime": 59.14, |
|
"eval_samples_per_second": 60.805, |
|
"eval_steps_per_second": 7.609, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0001329268292682927, |
|
"loss": 0.0047, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00013237250554323725, |
|
"loss": 0.0416, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0001318181818181818, |
|
"loss": 0.0418, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001312638580931264, |
|
"loss": 0.127, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00013070953436807096, |
|
"loss": 0.0412, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00013015521064301553, |
|
"loss": 0.0963, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001296008869179601, |
|
"loss": 0.0722, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00012904656319290468, |
|
"loss": 0.0046, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00012849223946784924, |
|
"loss": 0.0526, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001279379157427938, |
|
"loss": 0.0103, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_accuracy": 0.9911012235817576, |
|
"eval_loss": 0.03496430441737175, |
|
"eval_runtime": 58.9191, |
|
"eval_samples_per_second": 61.033, |
|
"eval_steps_per_second": 7.638, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00012738359201773836, |
|
"loss": 0.0196, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00012682926829268293, |
|
"loss": 0.0037, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001262749445676275, |
|
"loss": 0.0045, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00012572062084257208, |
|
"loss": 0.0038, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00012516629711751664, |
|
"loss": 0.0048, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001246119733924612, |
|
"loss": 0.0692, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00012405764966740576, |
|
"loss": 0.0357, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00012350332594235035, |
|
"loss": 0.0088, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00012294900221729492, |
|
"loss": 0.0126, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00012239467849223948, |
|
"loss": 0.0057, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.9916573971078977, |
|
"eval_loss": 0.02847210131585598, |
|
"eval_runtime": 59.5202, |
|
"eval_samples_per_second": 60.416, |
|
"eval_steps_per_second": 7.56, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00012184035476718405, |
|
"loss": 0.0051, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00012128603104212862, |
|
"loss": 0.0015, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00012073170731707318, |
|
"loss": 0.0022, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00012017738359201774, |
|
"loss": 0.0014, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00011962305986696232, |
|
"loss": 0.0796, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00011906873614190688, |
|
"loss": 0.0088, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00011851441241685144, |
|
"loss": 0.0083, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00011796008869179602, |
|
"loss": 0.0016, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00011740576496674058, |
|
"loss": 0.0449, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00011685144124168514, |
|
"loss": 0.0543, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.989154616240267, |
|
"eval_loss": 0.033378347754478455, |
|
"eval_runtime": 58.4631, |
|
"eval_samples_per_second": 61.509, |
|
"eval_steps_per_second": 7.697, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00011629711751662973, |
|
"loss": 0.0251, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00011574279379157429, |
|
"loss": 0.008, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00011518847006651885, |
|
"loss": 0.0442, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00011463414634146342, |
|
"loss": 0.0382, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00011407982261640799, |
|
"loss": 0.0136, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00011352549889135255, |
|
"loss": 0.0023, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00011297117516629712, |
|
"loss": 0.0022, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001124168514412417, |
|
"loss": 0.0844, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00011186252771618625, |
|
"loss": 0.01, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00011130820399113082, |
|
"loss": 0.078, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.9916573971078977, |
|
"eval_loss": 0.0301674697548151, |
|
"eval_runtime": 58.0308, |
|
"eval_samples_per_second": 61.967, |
|
"eval_steps_per_second": 7.755, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00011075388026607538, |
|
"loss": 0.0041, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00011019955654101997, |
|
"loss": 0.014, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00010964523281596453, |
|
"loss": 0.0286, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00010909090909090909, |
|
"loss": 0.0142, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00010853658536585367, |
|
"loss": 0.0351, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00010798226164079823, |
|
"loss": 0.0525, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00010742793791574279, |
|
"loss": 0.0086, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00010687361419068738, |
|
"loss": 0.0449, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00010631929046563194, |
|
"loss": 0.0341, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00010576496674057649, |
|
"loss": 0.1053, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.9880422691879867, |
|
"eval_loss": 0.03492213040590286, |
|
"eval_runtime": 57.9028, |
|
"eval_samples_per_second": 62.104, |
|
"eval_steps_per_second": 7.772, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00010521064301552105, |
|
"loss": 0.0068, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010465631929046564, |
|
"loss": 0.0121, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0001041019955654102, |
|
"loss": 0.0037, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010354767184035477, |
|
"loss": 0.0164, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010299334811529934, |
|
"loss": 0.0016, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001024390243902439, |
|
"loss": 0.0593, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010188470066518847, |
|
"loss": 0.0313, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010133037694013303, |
|
"loss": 0.0124, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010077605321507762, |
|
"loss": 0.0029, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010022172949002218, |
|
"loss": 0.071, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9902669632925473, |
|
"eval_loss": 0.034953419119119644, |
|
"eval_runtime": 58.2314, |
|
"eval_samples_per_second": 61.754, |
|
"eval_steps_per_second": 7.728, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.966740576496674e-05, |
|
"loss": 0.0145, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.911308203991131e-05, |
|
"loss": 0.0018, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.855875831485588e-05, |
|
"loss": 0.0021, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.800443458980046e-05, |
|
"loss": 0.0016, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.745011086474502e-05, |
|
"loss": 0.0025, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.689578713968958e-05, |
|
"loss": 0.0013, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.634146341463415e-05, |
|
"loss": 0.0441, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.578713968957872e-05, |
|
"loss": 0.0723, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.523281596452328e-05, |
|
"loss": 0.0099, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.467849223946786e-05, |
|
"loss": 0.0454, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.9899888765294772, |
|
"eval_loss": 0.043182216584682465, |
|
"eval_runtime": 58.2341, |
|
"eval_samples_per_second": 61.751, |
|
"eval_steps_per_second": 7.727, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.412416851441242e-05, |
|
"loss": 0.0451, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.356984478935698e-05, |
|
"loss": 0.0121, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.301552106430156e-05, |
|
"loss": 0.0117, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.246119733924612e-05, |
|
"loss": 0.0178, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.19068736141907e-05, |
|
"loss": 0.002, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.135254988913526e-05, |
|
"loss": 0.0023, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 9.079822616407982e-05, |
|
"loss": 0.0159, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.02439024390244e-05, |
|
"loss": 0.0051, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.968957871396896e-05, |
|
"loss": 0.0199, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.913525498891354e-05, |
|
"loss": 0.0465, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.985539488320356, |
|
"eval_loss": 0.05088553577661514, |
|
"eval_runtime": 58.1787, |
|
"eval_samples_per_second": 61.81, |
|
"eval_steps_per_second": 7.735, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 8.85809312638581e-05, |
|
"loss": 0.0223, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 8.802660753880266e-05, |
|
"loss": 0.012, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 8.747228381374724e-05, |
|
"loss": 0.0025, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 8.69179600886918e-05, |
|
"loss": 0.0413, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 8.636363636363637e-05, |
|
"loss": 0.0428, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.580931263858094e-05, |
|
"loss": 0.0134, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.52549889135255e-05, |
|
"loss": 0.063, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.470066518847007e-05, |
|
"loss": 0.0095, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 8.414634146341464e-05, |
|
"loss": 0.0495, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 8.359201773835921e-05, |
|
"loss": 0.0024, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_accuracy": 0.9933259176863182, |
|
"eval_loss": 0.022791525349020958, |
|
"eval_runtime": 58.8696, |
|
"eval_samples_per_second": 61.084, |
|
"eval_steps_per_second": 7.644, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 8.303769401330377e-05, |
|
"loss": 0.0253, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.248337028824834e-05, |
|
"loss": 0.0022, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 8.192904656319291e-05, |
|
"loss": 0.0478, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 8.137472283813747e-05, |
|
"loss": 0.0149, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 8.082039911308205e-05, |
|
"loss": 0.0143, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 8.026607538802661e-05, |
|
"loss": 0.0273, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.971175166297117e-05, |
|
"loss": 0.0021, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.915742793791575e-05, |
|
"loss": 0.0036, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.860310421286031e-05, |
|
"loss": 0.0025, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 7.804878048780489e-05, |
|
"loss": 0.0039, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.993047830923248, |
|
"eval_loss": 0.0303057748824358, |
|
"eval_runtime": 58.3751, |
|
"eval_samples_per_second": 61.602, |
|
"eval_steps_per_second": 7.709, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 7.749445676274945e-05, |
|
"loss": 0.0417, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.694013303769401e-05, |
|
"loss": 0.0094, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.638580931263859e-05, |
|
"loss": 0.0402, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.583148558758315e-05, |
|
"loss": 0.0081, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.527716186252773e-05, |
|
"loss": 0.0083, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.472283813747229e-05, |
|
"loss": 0.0277, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.416851441241685e-05, |
|
"loss": 0.0028, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.361419068736141e-05, |
|
"loss": 0.0051, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.305986696230599e-05, |
|
"loss": 0.009, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.250554323725056e-05, |
|
"loss": 0.0111, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_accuracy": 0.9924916573971079, |
|
"eval_loss": 0.031308963894844055, |
|
"eval_runtime": 58.2011, |
|
"eval_samples_per_second": 61.786, |
|
"eval_steps_per_second": 7.732, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.195121951219513e-05, |
|
"loss": 0.0012, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.139689578713969e-05, |
|
"loss": 0.0015, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.084257206208425e-05, |
|
"loss": 0.0401, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.028824833702883e-05, |
|
"loss": 0.0013, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.97339246119734e-05, |
|
"loss": 0.0013, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.917960088691796e-05, |
|
"loss": 0.0025, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.862527716186254e-05, |
|
"loss": 0.0657, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.807095343680709e-05, |
|
"loss": 0.0433, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.751662971175166e-05, |
|
"loss": 0.0048, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.696230598669624e-05, |
|
"loss": 0.0082, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.9913793103448276, |
|
"eval_loss": 0.03090614266693592, |
|
"eval_runtime": 58.3701, |
|
"eval_samples_per_second": 61.607, |
|
"eval_steps_per_second": 7.709, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.64079822616408e-05, |
|
"loss": 0.0016, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 6.585365853658538e-05, |
|
"loss": 0.0014, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 6.529933481152993e-05, |
|
"loss": 0.0038, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 6.47450110864745e-05, |
|
"loss": 0.0011, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 6.419068736141908e-05, |
|
"loss": 0.0099, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 6.363636363636364e-05, |
|
"loss": 0.031, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.308203991130822e-05, |
|
"loss": 0.041, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.252771618625277e-05, |
|
"loss": 0.0017, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 6.197339246119734e-05, |
|
"loss": 0.0151, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.14190687361419e-05, |
|
"loss": 0.0296, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.9947163515016685, |
|
"eval_loss": 0.024169722571969032, |
|
"eval_runtime": 57.9558, |
|
"eval_samples_per_second": 62.047, |
|
"eval_steps_per_second": 7.765, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.086474501108648e-05, |
|
"loss": 0.0013, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.031042128603105e-05, |
|
"loss": 0.0013, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.975609756097561e-05, |
|
"loss": 0.0015, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.920177383592018e-05, |
|
"loss": 0.0167, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.864745011086474e-05, |
|
"loss": 0.018, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.809312638580932e-05, |
|
"loss": 0.0134, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.7538802660753886e-05, |
|
"loss": 0.0086, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.698447893569845e-05, |
|
"loss": 0.0029, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 5.6430155210643024e-05, |
|
"loss": 0.0014, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.587583148558758e-05, |
|
"loss": 0.0011, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_accuracy": 0.9924916573971079, |
|
"eval_loss": 0.028317071497440338, |
|
"eval_runtime": 58.1403, |
|
"eval_samples_per_second": 61.85, |
|
"eval_steps_per_second": 7.74, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.5321507760532155e-05, |
|
"loss": 0.0009, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.4767184035476724e-05, |
|
"loss": 0.001, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.4212860310421286e-05, |
|
"loss": 0.0014, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.365853658536586e-05, |
|
"loss": 0.0014, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.310421286031042e-05, |
|
"loss": 0.0118, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.254988913525499e-05, |
|
"loss": 0.0011, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.1995565410199555e-05, |
|
"loss": 0.0098, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.1441241685144124e-05, |
|
"loss": 0.0087, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.08869179600887e-05, |
|
"loss": 0.0011, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.033259423503326e-05, |
|
"loss": 0.0139, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.9941601779755284, |
|
"eval_loss": 0.022749271243810654, |
|
"eval_runtime": 58.1506, |
|
"eval_samples_per_second": 61.839, |
|
"eval_steps_per_second": 7.739, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.977827050997783e-05, |
|
"loss": 0.0036, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.92239467849224e-05, |
|
"loss": 0.0012, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.866962305986696e-05, |
|
"loss": 0.0123, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.811529933481153e-05, |
|
"loss": 0.0008, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.75609756097561e-05, |
|
"loss": 0.0008, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.700665188470067e-05, |
|
"loss": 0.0235, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 4.645232815964524e-05, |
|
"loss": 0.0009, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.58980044345898e-05, |
|
"loss": 0.0119, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.534368070953437e-05, |
|
"loss": 0.0452, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.478935698447894e-05, |
|
"loss": 0.0008, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.9927697441601779, |
|
"eval_loss": 0.02841453067958355, |
|
"eval_runtime": 58.4898, |
|
"eval_samples_per_second": 61.481, |
|
"eval_steps_per_second": 7.694, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.42350332594235e-05, |
|
"loss": 0.005, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.3680709534368077e-05, |
|
"loss": 0.0389, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.312638580931264e-05, |
|
"loss": 0.0019, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.257206208425721e-05, |
|
"loss": 0.0065, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.201773835920178e-05, |
|
"loss": 0.0025, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 4.146341463414634e-05, |
|
"loss": 0.0018, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.0909090909090915e-05, |
|
"loss": 0.0026, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.035476718403548e-05, |
|
"loss": 0.0008, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.9800443458980046e-05, |
|
"loss": 0.0009, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.9246119733924615e-05, |
|
"loss": 0.0042, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.9938820912124583, |
|
"eval_loss": 0.025445684790611267, |
|
"eval_runtime": 58.549, |
|
"eval_samples_per_second": 61.419, |
|
"eval_steps_per_second": 7.686, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.869179600886918e-05, |
|
"loss": 0.0066, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.8137472283813746e-05, |
|
"loss": 0.0436, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.758314855875832e-05, |
|
"loss": 0.0023, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.7028824833702884e-05, |
|
"loss": 0.0008, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.647450110864745e-05, |
|
"loss": 0.0008, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.5920177383592015e-05, |
|
"loss": 0.0087, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.5365853658536584e-05, |
|
"loss": 0.044, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.481152993348116e-05, |
|
"loss": 0.018, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.425720620842572e-05, |
|
"loss": 0.001, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.370288248337029e-05, |
|
"loss": 0.0032, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy": 0.9933259176863182, |
|
"eval_loss": 0.023396195843815804, |
|
"eval_runtime": 58.2141, |
|
"eval_samples_per_second": 61.772, |
|
"eval_steps_per_second": 7.73, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.314855875831486e-05, |
|
"loss": 0.0019, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.259423503325942e-05, |
|
"loss": 0.0009, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.203991130820399e-05, |
|
"loss": 0.0009, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.148558758314856e-05, |
|
"loss": 0.0037, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.093126385809313e-05, |
|
"loss": 0.0279, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.0376940133037695e-05, |
|
"loss": 0.0009, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 2.9822616407982264e-05, |
|
"loss": 0.0018, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.926829268292683e-05, |
|
"loss": 0.0009, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.8713968957871395e-05, |
|
"loss": 0.0467, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.8159645232815967e-05, |
|
"loss": 0.0142, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_accuracy": 0.9933259176863182, |
|
"eval_loss": 0.024244721978902817, |
|
"eval_runtime": 58.0241, |
|
"eval_samples_per_second": 61.974, |
|
"eval_steps_per_second": 7.755, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.7605321507760533e-05, |
|
"loss": 0.0008, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 2.7050997782705102e-05, |
|
"loss": 0.0037, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.6496674057649668e-05, |
|
"loss": 0.0009, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 2.5942350332594233e-05, |
|
"loss": 0.0008, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.5388026607538806e-05, |
|
"loss": 0.0249, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.483370288248337e-05, |
|
"loss": 0.0011, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.427937915742794e-05, |
|
"loss": 0.0007, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.3725055432372506e-05, |
|
"loss": 0.0007, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.3170731707317075e-05, |
|
"loss": 0.0007, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.261640798226164e-05, |
|
"loss": 0.0434, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_accuracy": 0.993047830923248, |
|
"eval_loss": 0.026953959837555885, |
|
"eval_runtime": 58.4633, |
|
"eval_samples_per_second": 61.509, |
|
"eval_steps_per_second": 7.697, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.206208425720621e-05, |
|
"loss": 0.0008, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.150776053215078e-05, |
|
"loss": 0.0009, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.0953436807095344e-05, |
|
"loss": 0.0009, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.0399113082039913e-05, |
|
"loss": 0.001, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.9844789356984482e-05, |
|
"loss": 0.0039, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.9290465631929047e-05, |
|
"loss": 0.0015, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.8736141906873613e-05, |
|
"loss": 0.0013, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.0009, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.762749445676275e-05, |
|
"loss": 0.0063, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.707317073170732e-05, |
|
"loss": 0.0196, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_accuracy": 0.9936040044493882, |
|
"eval_loss": 0.022605180740356445, |
|
"eval_runtime": 57.9368, |
|
"eval_samples_per_second": 62.068, |
|
"eval_steps_per_second": 7.767, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.6518847006651886e-05, |
|
"loss": 0.0068, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.596452328159645e-05, |
|
"loss": 0.0104, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.541019955654102e-05, |
|
"loss": 0.0197, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.485587583148559e-05, |
|
"loss": 0.003, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.4301552106430155e-05, |
|
"loss": 0.0011, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.3747228381374724e-05, |
|
"loss": 0.0181, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.3192904656319291e-05, |
|
"loss": 0.002, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.2638580931263858e-05, |
|
"loss": 0.0007, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.2084257206208427e-05, |
|
"loss": 0.0007, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.1529933481152993e-05, |
|
"loss": 0.0007, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_accuracy": 0.9944382647385984, |
|
"eval_loss": 0.02052154392004013, |
|
"eval_runtime": 57.8787, |
|
"eval_samples_per_second": 62.13, |
|
"eval_steps_per_second": 7.775, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.0975609756097562e-05, |
|
"loss": 0.0008, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.042128603104213e-05, |
|
"loss": 0.0007, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 9.866962305986696e-06, |
|
"loss": 0.0083, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 9.312638580931264e-06, |
|
"loss": 0.0012, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 8.758314855875833e-06, |
|
"loss": 0.0018, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 8.2039911308204e-06, |
|
"loss": 0.0419, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 7.649667405764967e-06, |
|
"loss": 0.0008, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 7.095343680709535e-06, |
|
"loss": 0.0009, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.541019955654103e-06, |
|
"loss": 0.0071, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 5.98669623059867e-06, |
|
"loss": 0.0042, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_accuracy": 0.9941601779755284, |
|
"eval_loss": 0.020064357668161392, |
|
"eval_runtime": 55.4082, |
|
"eval_samples_per_second": 64.9, |
|
"eval_steps_per_second": 8.122, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 5.432372505543237e-06, |
|
"loss": 0.0426, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"loss": 0.0028, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.323725055432373e-06, |
|
"loss": 0.002, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 3.7694013303769405e-06, |
|
"loss": 0.0012, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.2150776053215078e-06, |
|
"loss": 0.0008, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.6607538802660755e-06, |
|
"loss": 0.0013, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.106430155210643e-06, |
|
"loss": 0.0025, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.5521064301552107e-06, |
|
"loss": 0.0033, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 9.977827050997782e-07, |
|
"loss": 0.0083, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.434589800443459e-07, |
|
"loss": 0.0008, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.9947163515016685, |
|
"eval_loss": 0.02037554606795311, |
|
"eval_runtime": 55.7966, |
|
"eval_samples_per_second": 64.448, |
|
"eval_steps_per_second": 8.065, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 3608, |
|
"total_flos": 4.4722177046373335e+18, |
|
"train_loss": 0.03198626538376714, |
|
"train_runtime": 5538.163, |
|
"train_samples_per_second": 10.421, |
|
"train_steps_per_second": 0.651 |
|
} |
|
], |
|
"max_steps": 3608, |
|
"num_train_epochs": 4, |
|
"total_flos": 4.4722177046373335e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|