|
{ |
|
"best_metric": 1.5761640071868896, |
|
"best_model_checkpoint": "hsb_baichuan/checkpoint-12000", |
|
"epoch": 2.8293545534924847, |
|
"global_step": 12000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999992378674973e-05, |
|
"loss": 1.8463, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999969514746361e-05, |
|
"loss": 1.7893, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999931408353566e-05, |
|
"loss": 1.7552, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999878059728925e-05, |
|
"loss": 1.7823, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.999809469197708e-05, |
|
"loss": 1.7552, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9997256371781154e-05, |
|
"loss": 1.7256, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.999626564181277e-05, |
|
"loss": 1.7518, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9995122508112445e-05, |
|
"loss": 1.6769, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9993826977649954e-05, |
|
"loss": 1.6256, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.999237905832422e-05, |
|
"loss": 1.7261, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.999077875896329e-05, |
|
"loss": 1.678, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.998902608932429e-05, |
|
"loss": 1.6767, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.998712106009335e-05, |
|
"loss": 1.6371, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9985063682885534e-05, |
|
"loss": 1.7198, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9982853970244816e-05, |
|
"loss": 1.6614, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.998049193564394e-05, |
|
"loss": 1.6369, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9977977593484373e-05, |
|
"loss": 1.6423, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.99753109590962e-05, |
|
"loss": 1.7224, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.997249204873807e-05, |
|
"loss": 1.633, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9969520879597025e-05, |
|
"loss": 1.6923, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.996639746978848e-05, |
|
"loss": 1.6915, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.996312183835605e-05, |
|
"loss": 1.6234, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.995969400527144e-05, |
|
"loss": 1.6664, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9956113991434375e-05, |
|
"loss": 1.7018, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.995238181867241e-05, |
|
"loss": 1.6782, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.994849750974081e-05, |
|
"loss": 1.6428, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.994446108832246e-05, |
|
"loss": 1.6444, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.994027257902766e-05, |
|
"loss": 1.7016, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9935932007393986e-05, |
|
"loss": 1.6771, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.993143939988618e-05, |
|
"loss": 1.6449, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.992679478389593e-05, |
|
"loss": 1.6916, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.992199818774176e-05, |
|
"loss": 1.6751, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9917049640668776e-05, |
|
"loss": 1.6137, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9911949172848585e-05, |
|
"loss": 1.67, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.990669681537903e-05, |
|
"loss": 1.6393, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9901292600284065e-05, |
|
"loss": 1.5925, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.989573656051351e-05, |
|
"loss": 1.6982, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9890028729942875e-05, |
|
"loss": 1.6566, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9884169143373135e-05, |
|
"loss": 1.6258, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.987815783653055e-05, |
|
"loss": 1.6007, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9871994846066405e-05, |
|
"loss": 1.6167, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.986568020955685e-05, |
|
"loss": 1.6593, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.9859213965502574e-05, |
|
"loss": 1.6396, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.985259615332868e-05, |
|
"loss": 1.6635, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.984582681338435e-05, |
|
"loss": 1.6402, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.983890598694265e-05, |
|
"loss": 1.6745, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9831833716200296e-05, |
|
"loss": 1.7139, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.982461004427733e-05, |
|
"loss": 1.6294, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.981723501521692e-05, |
|
"loss": 1.6174, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.980970867398506e-05, |
|
"loss": 1.5851, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.98020310664703e-05, |
|
"loss": 1.6651, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.979420223948348e-05, |
|
"loss": 1.5993, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.978622224075742e-05, |
|
"loss": 1.6467, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.977809111894667e-05, |
|
"loss": 1.6865, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9769808923627136e-05, |
|
"loss": 1.6948, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9761375705295894e-05, |
|
"loss": 1.6155, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.975279151537076e-05, |
|
"loss": 1.6964, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9744056406190066e-05, |
|
"loss": 1.6332, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.973517043101229e-05, |
|
"loss": 1.6989, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9726133644015753e-05, |
|
"loss": 1.6102, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.97169461002983e-05, |
|
"loss": 1.7337, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.970760785587693e-05, |
|
"loss": 1.6642, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.969811896768748e-05, |
|
"loss": 1.6126, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.968847949358427e-05, |
|
"loss": 1.6332, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.967868949233975e-05, |
|
"loss": 1.644, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9668749023644154e-05, |
|
"loss": 1.6937, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.965865814810511e-05, |
|
"loss": 1.5925, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.964841692724729e-05, |
|
"loss": 1.646, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.963802542351203e-05, |
|
"loss": 1.646, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.962748370025696e-05, |
|
"loss": 1.6175, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.961679182175559e-05, |
|
"loss": 1.6747, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.960594985319696e-05, |
|
"loss": 1.5985, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.959495786068519e-05, |
|
"loss": 1.6171, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.958381591123912e-05, |
|
"loss": 1.6319, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.957252407279189e-05, |
|
"loss": 1.6629, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.956108241419052e-05, |
|
"loss": 1.6572, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.954949100519547e-05, |
|
"loss": 1.6324, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.953774991648027e-05, |
|
"loss": 1.7111, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.952585921963104e-05, |
|
"loss": 1.69, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.951381898714609e-05, |
|
"loss": 1.6322, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.95016292924354e-05, |
|
"loss": 1.6527, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.9489290209820313e-05, |
|
"loss": 1.6626, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.947680181453293e-05, |
|
"loss": 1.675, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.9464164182715755e-05, |
|
"loss": 1.5939, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.945137739142119e-05, |
|
"loss": 1.5833, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.943844151861106e-05, |
|
"loss": 1.6186, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9425356643156165e-05, |
|
"loss": 1.6172, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.941212284483578e-05, |
|
"loss": 1.5903, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.939874020433716e-05, |
|
"loss": 1.6082, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.938520880325507e-05, |
|
"loss": 1.6156, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.9371528724091275e-05, |
|
"loss": 1.6322, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.935770005025403e-05, |
|
"loss": 1.5638, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9343722866057605e-05, |
|
"loss": 1.6491, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.932959725672173e-05, |
|
"loss": 1.6502, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.9315323308371074e-05, |
|
"loss": 1.6091, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.930090110803478e-05, |
|
"loss": 1.6447, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9286330743645845e-05, |
|
"loss": 1.5945, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.9271612304040685e-05, |
|
"loss": 1.6577, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.925674587895848e-05, |
|
"loss": 1.5934, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.924173155904074e-05, |
|
"loss": 1.5242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6374789476394653, |
|
"eval_runtime": 120.8306, |
|
"eval_samples_per_second": 11.346, |
|
"eval_steps_per_second": 2.839, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.922656943583066e-05, |
|
"loss": 1.6052, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.9211259601772615e-05, |
|
"loss": 1.6244, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.919580215021159e-05, |
|
"loss": 1.5851, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.91801971753926e-05, |
|
"loss": 1.6342, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9164444772460085e-05, |
|
"loss": 1.5859, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9148545037457425e-05, |
|
"loss": 1.6124, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.9132498067326236e-05, |
|
"loss": 1.602, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.911630395990587e-05, |
|
"loss": 1.6564, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9099962813932774e-05, |
|
"loss": 1.5995, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.908347472903989e-05, |
|
"loss": 1.5934, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.906683980575606e-05, |
|
"loss": 1.676, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9050058145505405e-05, |
|
"loss": 1.6273, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9033129850606724e-05, |
|
"loss": 1.6725, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.9016055024272844e-05, |
|
"loss": 1.5949, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.899883377061001e-05, |
|
"loss": 1.6016, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.898146619461723e-05, |
|
"loss": 1.6214, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.8963952402185666e-05, |
|
"loss": 1.6544, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.8946292500097956e-05, |
|
"loss": 1.6928, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.892848659602759e-05, |
|
"loss": 1.617, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.891053479853822e-05, |
|
"loss": 1.5759, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8892437217083046e-05, |
|
"loss": 1.5391, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8874193962004105e-05, |
|
"loss": 1.6787, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.885580514453162e-05, |
|
"loss": 1.623, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.883727087678331e-05, |
|
"loss": 1.5873, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.8818591271763714e-05, |
|
"loss": 1.6066, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.879976644336352e-05, |
|
"loss": 1.6801, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.8780796506358825e-05, |
|
"loss": 1.5833, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.876168157641048e-05, |
|
"loss": 1.6128, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.874242177006335e-05, |
|
"loss": 1.6176, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.872301720474564e-05, |
|
"loss": 1.575, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8703467998768134e-05, |
|
"loss": 1.595, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8683774271323544e-05, |
|
"loss": 1.6355, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8663936142485685e-05, |
|
"loss": 1.5456, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.8643953733208824e-05, |
|
"loss": 1.6025, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.862382716532691e-05, |
|
"loss": 1.6777, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.8603556561552835e-05, |
|
"loss": 1.5932, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.8583142045477694e-05, |
|
"loss": 1.6043, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.856258374157e-05, |
|
"loss": 1.594, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.854188177517499e-05, |
|
"loss": 1.605, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.852103627251377e-05, |
|
"loss": 1.6241, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.8500047360682636e-05, |
|
"loss": 1.6771, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.8478915167652244e-05, |
|
"loss": 1.5895, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.8457639822266844e-05, |
|
"loss": 1.6305, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.843622145424348e-05, |
|
"loss": 1.6484, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.8414660194171244e-05, |
|
"loss": 1.5931, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.839295617351042e-05, |
|
"loss": 1.571, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.837110952459173e-05, |
|
"loss": 1.6011, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.834912038061551e-05, |
|
"loss": 1.588, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.832698887565088e-05, |
|
"loss": 1.6041, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.830471514463496e-05, |
|
"loss": 1.5719, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8282299323372027e-05, |
|
"loss": 1.5701, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8259741548532675e-05, |
|
"loss": 1.5389, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.823704195765303e-05, |
|
"loss": 1.5716, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8214200689133846e-05, |
|
"loss": 1.6128, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.819121788223972e-05, |
|
"loss": 1.585, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.81680936770982e-05, |
|
"loss": 1.6296, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.814482821469895e-05, |
|
"loss": 1.6498, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8121421636892896e-05, |
|
"loss": 1.636, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.809787408639133e-05, |
|
"loss": 1.5585, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8074185706765105e-05, |
|
"loss": 1.56, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.805035664244368e-05, |
|
"loss": 1.5903, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8026387038714294e-05, |
|
"loss": 1.6253, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.800227704172106e-05, |
|
"loss": 1.6222, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.797802679846408e-05, |
|
"loss": 1.5612, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.795363645679853e-05, |
|
"loss": 1.6505, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.79291061654338e-05, |
|
"loss": 1.574, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.7904436073932546e-05, |
|
"loss": 1.6852, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.787962633270979e-05, |
|
"loss": 1.6222, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.785467709303203e-05, |
|
"loss": 1.6398, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.782958850701626e-05, |
|
"loss": 1.6125, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.7804360727629094e-05, |
|
"loss": 1.5746, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.777899390868583e-05, |
|
"loss": 1.6119, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7753488204849474e-05, |
|
"loss": 1.6468, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.772784377162984e-05, |
|
"loss": 1.6201, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7702060765382585e-05, |
|
"loss": 1.6671, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7676139343308236e-05, |
|
"loss": 1.6204, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.765007966345125e-05, |
|
"loss": 1.6959, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.762388188469907e-05, |
|
"loss": 1.6466, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.7597546166781125e-05, |
|
"loss": 1.5926, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.757107267026787e-05, |
|
"loss": 1.6089, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.75444615565698e-05, |
|
"loss": 1.5725, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.751771298793647e-05, |
|
"loss": 1.6761, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.7490827127455504e-05, |
|
"loss": 1.5831, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.746380413905162e-05, |
|
"loss": 1.5859, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.743664418748559e-05, |
|
"loss": 1.5971, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.740934743835328e-05, |
|
"loss": 1.5726, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7381914058084586e-05, |
|
"loss": 1.5331, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7354344213942506e-05, |
|
"loss": 1.5822, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7326638074022e-05, |
|
"loss": 1.5794, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7298795807249085e-05, |
|
"loss": 1.5759, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.727081758337974e-05, |
|
"loss": 1.6224, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.724270357299886e-05, |
|
"loss": 1.5642, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7214453947519256e-05, |
|
"loss": 1.556, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.71860688791806e-05, |
|
"loss": 1.585, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.715754854104835e-05, |
|
"loss": 1.6414, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7128893107012716e-05, |
|
"loss": 1.5784, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.71001027517876e-05, |
|
"loss": 1.6047, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.707117765090954e-05, |
|
"loss": 1.648, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.704211798073659e-05, |
|
"loss": 1.6223, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.7012923918447326e-05, |
|
"loss": 1.6081, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6162242889404297, |
|
"eval_runtime": 120.2644, |
|
"eval_samples_per_second": 11.4, |
|
"eval_steps_per_second": 2.852, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.698359564203968e-05, |
|
"loss": 1.6442, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.695413333032992e-05, |
|
"loss": 1.5722, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.692453716295153e-05, |
|
"loss": 1.6816, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6894807320354125e-05, |
|
"loss": 1.5378, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6864943983802324e-05, |
|
"loss": 1.6699, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6834947335374696e-05, |
|
"loss": 1.5855, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.68048175579626e-05, |
|
"loss": 1.6015, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.67745548352691e-05, |
|
"loss": 1.5387, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.6744159351807837e-05, |
|
"loss": 1.6405, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.671363129290188e-05, |
|
"loss": 1.6547, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.668297084468266e-05, |
|
"loss": 1.6246, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.665217819408876e-05, |
|
"loss": 1.5588, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.662125352886482e-05, |
|
"loss": 1.6331, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6590197037560367e-05, |
|
"loss": 1.6697, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.655900890952872e-05, |
|
"loss": 1.6041, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.652768933492574e-05, |
|
"loss": 1.6565, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6496238504708764e-05, |
|
"loss": 1.6182, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6464656610635405e-05, |
|
"loss": 1.6574, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.643294384526234e-05, |
|
"loss": 1.5828, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.640110040194423e-05, |
|
"loss": 1.5964, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6369126474832434e-05, |
|
"loss": 1.6485, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.633702225887393e-05, |
|
"loss": 1.647, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.6304787949810037e-05, |
|
"loss": 1.6325, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.627242374417527e-05, |
|
"loss": 1.5784, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.6239929839296125e-05, |
|
"loss": 1.6343, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.6207306433289916e-05, |
|
"loss": 1.6395, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.6174553725063484e-05, |
|
"loss": 1.6122, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.6141671914312076e-05, |
|
"loss": 1.5881, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.610866120151805e-05, |
|
"loss": 1.6092, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.60755217879497e-05, |
|
"loss": 1.6278, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.604225387566005e-05, |
|
"loss": 1.5755, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.600885766748552e-05, |
|
"loss": 1.6634, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.597533336704482e-05, |
|
"loss": 1.5873, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.594168117873761e-05, |
|
"loss": 1.616, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.59079013077433e-05, |
|
"loss": 1.6475, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.58739939600198e-05, |
|
"loss": 1.6932, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.583995934230225e-05, |
|
"loss": 1.5927, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.580579766210175e-05, |
|
"loss": 1.5839, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.57715091277041e-05, |
|
"loss": 1.704, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5737093948168566e-05, |
|
"loss": 1.6202, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5702552333326574e-05, |
|
"loss": 1.5782, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.56678844937804e-05, |
|
"loss": 1.6271, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5633090640901965e-05, |
|
"loss": 1.6253, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.559817098683146e-05, |
|
"loss": 1.6356, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.556312574447612e-05, |
|
"loss": 1.5635, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.552795512750889e-05, |
|
"loss": 1.6473, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.549265935036714e-05, |
|
"loss": 1.6278, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.545723862825133e-05, |
|
"loss": 1.6253, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.5421693177123724e-05, |
|
"loss": 1.6483, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.5386023213707095e-05, |
|
"loss": 1.6938, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.5350228955483334e-05, |
|
"loss": 1.6065, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.531431062069217e-05, |
|
"loss": 1.6266, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.527826842832987e-05, |
|
"loss": 1.5903, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.524210259814784e-05, |
|
"loss": 1.6314, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.520581335065131e-05, |
|
"loss": 1.5874, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.516940090709799e-05, |
|
"loss": 1.6006, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.5132865489496756e-05, |
|
"loss": 1.5898, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.509620732060623e-05, |
|
"loss": 1.6183, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.505942662393346e-05, |
|
"loss": 1.5927, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.5022523623732586e-05, |
|
"loss": 1.62, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.498549854500339e-05, |
|
"loss": 1.6127, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4948351613490017e-05, |
|
"loss": 1.6201, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4911083055679526e-05, |
|
"loss": 1.5915, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4873693098800564e-05, |
|
"loss": 1.5617, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4836181970821924e-05, |
|
"loss": 1.6041, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.479854990045121e-05, |
|
"loss": 1.5829, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.476079711713343e-05, |
|
"loss": 1.5854, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.4722923851049545e-05, |
|
"loss": 1.6142, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.468493033311515e-05, |
|
"loss": 1.603, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.464681679497901e-05, |
|
"loss": 1.5794, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.460858346902162e-05, |
|
"loss": 1.5865, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.4570230588353914e-05, |
|
"loss": 1.616, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.4531758386815665e-05, |
|
"loss": 1.63, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.449316709897421e-05, |
|
"loss": 1.576, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.445445696012295e-05, |
|
"loss": 1.6069, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.441562820627991e-05, |
|
"loss": 1.6056, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.4376681074186364e-05, |
|
"loss": 1.5949, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.4337615801305286e-05, |
|
"loss": 1.6223, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.429843262582e-05, |
|
"loss": 1.6109, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.425913178663268e-05, |
|
"loss": 1.5932, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.421971352336289e-05, |
|
"loss": 1.6713, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.418017807634616e-05, |
|
"loss": 1.64, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.414052568663248e-05, |
|
"loss": 1.662, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.4100756595984846e-05, |
|
"loss": 1.5613, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.40608710468778e-05, |
|
"loss": 1.6049, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.4020869282495916e-05, |
|
"loss": 1.5997, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.398075154673237e-05, |
|
"loss": 1.6365, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.3940518084187384e-05, |
|
"loss": 1.6177, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.39001691401668e-05, |
|
"loss": 1.6507, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.385970496068057e-05, |
|
"loss": 1.5873, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.38191257924412e-05, |
|
"loss": 1.5996, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.377843188286233e-05, |
|
"loss": 1.665, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.3737623480057165e-05, |
|
"loss": 1.6183, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.369670083283698e-05, |
|
"loss": 1.6413, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.365566419070962e-05, |
|
"loss": 1.5564, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.3614513803877956e-05, |
|
"loss": 1.6456, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.357324992323836e-05, |
|
"loss": 1.589, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.353187280037918e-05, |
|
"loss": 1.5581, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.349038268757924e-05, |
|
"loss": 1.6063, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.344877983780624e-05, |
|
"loss": 1.611, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.6054201126098633, |
|
"eval_runtime": 120.533, |
|
"eval_samples_per_second": 11.374, |
|
"eval_steps_per_second": 2.846, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.340706450471524e-05, |
|
"loss": 1.6163, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.3365236942647146e-05, |
|
"loss": 1.6384, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.33232974066271e-05, |
|
"loss": 1.5703, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.3281246152362986e-05, |
|
"loss": 1.5774, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.323908343624381e-05, |
|
"loss": 1.6061, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.319680951533819e-05, |
|
"loss": 1.5721, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.315442464739276e-05, |
|
"loss": 1.5836, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.3111929090830605e-05, |
|
"loss": 1.6175, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.306932310474968e-05, |
|
"loss": 1.595, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.302660694892124e-05, |
|
"loss": 1.5982, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2983780883788247e-05, |
|
"loss": 1.5907, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.2940845170463806e-05, |
|
"loss": 1.6695, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.289780007072952e-05, |
|
"loss": 1.5913, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.285464584703396e-05, |
|
"loss": 1.553, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.2811382762491e-05, |
|
"loss": 1.618, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.276801108087829e-05, |
|
"loss": 1.4985, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.272453106663555e-05, |
|
"loss": 1.6046, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.268094298486305e-05, |
|
"loss": 1.6321, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.263724710131994e-05, |
|
"loss": 1.5503, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.259344368242264e-05, |
|
"loss": 1.573, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.254953299524323e-05, |
|
"loss": 1.528, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.250551530750779e-05, |
|
"loss": 1.5518, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.246139088759483e-05, |
|
"loss": 1.6624, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.241716000453357e-05, |
|
"loss": 1.5525, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.237282292800237e-05, |
|
"loss": 1.559, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.2328379928327025e-05, |
|
"loss": 1.5948, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.2283831276479185e-05, |
|
"loss": 1.6198, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.2239177244074655e-05, |
|
"loss": 1.5895, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.219441810337176e-05, |
|
"loss": 1.6092, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.214955412726965e-05, |
|
"loss": 1.5791, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.210458558930668e-05, |
|
"loss": 1.6058, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.205951276365875e-05, |
|
"loss": 1.5717, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.201433592513755e-05, |
|
"loss": 1.5903, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.1969055349189e-05, |
|
"loss": 1.6179, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.192367131189148e-05, |
|
"loss": 1.6235, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.1878184089954185e-05, |
|
"loss": 1.5712, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.183259396071545e-05, |
|
"loss": 1.6418, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.178690120214102e-05, |
|
"loss": 1.5748, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.1741106092822386e-05, |
|
"loss": 1.5349, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.169520891197508e-05, |
|
"loss": 1.6124, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.164920993943697e-05, |
|
"loss": 1.5648, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.1603109455666564e-05, |
|
"loss": 1.6162, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.1556907741741244e-05, |
|
"loss": 1.5948, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.151060507935568e-05, |
|
"loss": 1.6071, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.146420175081995e-05, |
|
"loss": 1.612, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.141769803905793e-05, |
|
"loss": 1.6257, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.1371094227605564e-05, |
|
"loss": 1.5877, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.132439060060908e-05, |
|
"loss": 1.5676, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.127758744282329e-05, |
|
"loss": 1.5605, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.123068503960986e-05, |
|
"loss": 1.6394, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1183683676935555e-05, |
|
"loss": 1.6232, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.113658364137051e-05, |
|
"loss": 1.5559, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.108938522008646e-05, |
|
"loss": 1.5552, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.104208870085502e-05, |
|
"loss": 1.5411, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0994694372045906e-05, |
|
"loss": 1.6087, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0947202522625175e-05, |
|
"loss": 1.5833, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.08996134421535e-05, |
|
"loss": 1.5919, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.0851927420784353e-05, |
|
"loss": 1.5449, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.080414474926226e-05, |
|
"loss": 1.6524, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.075626571892105e-05, |
|
"loss": 1.5543, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.0708290621682045e-05, |
|
"loss": 1.565, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.066021975005228e-05, |
|
"loss": 1.6035, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.061205339712275e-05, |
|
"loss": 1.537, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.0563791856566616e-05, |
|
"loss": 1.5907, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.051543542263736e-05, |
|
"loss": 1.558, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.046698439016708e-05, |
|
"loss": 1.6066, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.0418439054564615e-05, |
|
"loss": 1.6129, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.036979971181382e-05, |
|
"loss": 1.6739, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.0321066658471646e-05, |
|
"loss": 1.5924, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.027224019166648e-05, |
|
"loss": 1.5308, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.0223320609096195e-05, |
|
"loss": 1.6009, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.0174308209026435e-05, |
|
"loss": 1.6631, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.012520329028874e-05, |
|
"loss": 1.5813, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.007600615227876e-05, |
|
"loss": 1.619, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.002671709495438e-05, |
|
"loss": 1.5654, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.997733641883395e-05, |
|
"loss": 1.5944, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.992786442499442e-05, |
|
"loss": 1.654, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.98783014150695e-05, |
|
"loss": 1.5784, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.9828647691247836e-05, |
|
"loss": 1.5812, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.977890355627116e-05, |
|
"loss": 1.5983, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.9729069313432454e-05, |
|
"loss": 1.633, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.967914526657408e-05, |
|
"loss": 1.639, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.9629131720085966e-05, |
|
"loss": 1.5822, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.957902897890369e-05, |
|
"loss": 1.6079, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.952883734850667e-05, |
|
"loss": 1.6206, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.947855713491631e-05, |
|
"loss": 1.5733, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.942818864469407e-05, |
|
"loss": 1.6351, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.9377732184939664e-05, |
|
"loss": 1.6283, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.9327188063289156e-05, |
|
"loss": 1.6428, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.9276556587913096e-05, |
|
"loss": 1.6037, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.922583806751461e-05, |
|
"loss": 1.5311, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.917503281132758e-05, |
|
"loss": 1.5731, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.9124141129114695e-05, |
|
"loss": 1.6292, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.90731633311656e-05, |
|
"loss": 1.5781, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.902209972829498e-05, |
|
"loss": 1.5801, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.897095063184069e-05, |
|
"loss": 1.642, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8919716353661846e-05, |
|
"loss": 1.5843, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.886839720613691e-05, |
|
"loss": 1.6273, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8816993502161815e-05, |
|
"loss": 1.675, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.876550555514802e-05, |
|
"loss": 1.6156, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 1.5981309413909912, |
|
"eval_runtime": 120.2627, |
|
"eval_samples_per_second": 11.4, |
|
"eval_steps_per_second": 2.852, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8713933679020634e-05, |
|
"loss": 1.6232, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8662278188216485e-05, |
|
"loss": 1.5513, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.861053939768218e-05, |
|
"loss": 1.5522, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.855871762287225e-05, |
|
"loss": 1.6226, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.8506813179747165e-05, |
|
"loss": 1.6186, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.8454826384771426e-05, |
|
"loss": 1.5325, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.840275755491164e-05, |
|
"loss": 1.5972, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.83506070076346e-05, |
|
"loss": 1.603, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.82983750609053e-05, |
|
"loss": 1.517, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.824606203318507e-05, |
|
"loss": 1.6564, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.819366824342959e-05, |
|
"loss": 1.656, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.814119401108692e-05, |
|
"loss": 1.6301, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.8088639656095614e-05, |
|
"loss": 1.5871, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.803600549888273e-05, |
|
"loss": 1.6384, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7983291860361866e-05, |
|
"loss": 1.559, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.793049906193127e-05, |
|
"loss": 1.5624, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.78776274254718e-05, |
|
"loss": 1.5147, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.782467727334496e-05, |
|
"loss": 1.6521, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7771648928391045e-05, |
|
"loss": 1.6049, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.771854271392703e-05, |
|
"loss": 1.6414, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.766535895374472e-05, |
|
"loss": 1.5619, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.761209797210866e-05, |
|
"loss": 1.5992, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.755876009375428e-05, |
|
"loss": 1.5478, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.750534564388582e-05, |
|
"loss": 1.6274, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.745185494817438e-05, |
|
"loss": 1.5732, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7398288332755936e-05, |
|
"loss": 1.5372, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.7344646124229376e-05, |
|
"loss": 1.5875, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.7290928649654446e-05, |
|
"loss": 1.642, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.723713623654983e-05, |
|
"loss": 1.5793, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.718326921289108e-05, |
|
"loss": 1.644, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.712932790710869e-05, |
|
"loss": 1.5677, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.7075312648086036e-05, |
|
"loss": 1.5733, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.702122376515739e-05, |
|
"loss": 1.5445, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.696706158810591e-05, |
|
"loss": 1.6533, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.691282644716165e-05, |
|
"loss": 1.5779, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.685851867299953e-05, |
|
"loss": 1.5356, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.680413859673728e-05, |
|
"loss": 1.6036, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.674968654993352e-05, |
|
"loss": 1.5826, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.669516286458562e-05, |
|
"loss": 1.5973, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.664056787312779e-05, |
|
"loss": 1.5661, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6585901908428946e-05, |
|
"loss": 1.5681, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.653116530379077e-05, |
|
"loss": 1.565, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.647635839294561e-05, |
|
"loss": 1.5517, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.642148151005452e-05, |
|
"loss": 1.588, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.636653498970512e-05, |
|
"loss": 1.592, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6311519166909656e-05, |
|
"loss": 1.5633, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.62564343771029e-05, |
|
"loss": 1.6122, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.620128095614012e-05, |
|
"loss": 1.6046, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.614605924029504e-05, |
|
"loss": 1.5932, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.6090769566257767e-05, |
|
"loss": 1.6165, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.603541227113276e-05, |
|
"loss": 1.5227, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.597998769243678e-05, |
|
"loss": 1.5661, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.592449616809681e-05, |
|
"loss": 1.5499, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.5868938036448e-05, |
|
"loss": 1.5931, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.581331363623161e-05, |
|
"loss": 1.5451, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.5757623306592955e-05, |
|
"loss": 1.5564, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.570186738707931e-05, |
|
"loss": 1.5816, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.564604621763786e-05, |
|
"loss": 1.6446, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.559016013861364e-05, |
|
"loss": 1.594, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.553420949074742e-05, |
|
"loss": 1.6004, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5478194615173655e-05, |
|
"loss": 1.5862, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5422115853418405e-05, |
|
"loss": 1.6648, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.536597354739725e-05, |
|
"loss": 1.6372, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.530976803941319e-05, |
|
"loss": 1.5812, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.52534996721546e-05, |
|
"loss": 1.5302, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.519716878869308e-05, |
|
"loss": 1.5731, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.51407757324814e-05, |
|
"loss": 1.6117, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.508432084735142e-05, |
|
"loss": 1.5817, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.502780447751196e-05, |
|
"loss": 1.5911, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4971226967546714e-05, |
|
"loss": 1.5234, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.491458866241217e-05, |
|
"loss": 1.5837, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.485788990743546e-05, |
|
"loss": 1.5487, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.48011310483123e-05, |
|
"loss": 1.5276, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.474431243110486e-05, |
|
"loss": 1.5954, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.468743440223966e-05, |
|
"loss": 1.5444, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.463049730850546e-05, |
|
"loss": 1.5836, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.457350149705113e-05, |
|
"loss": 1.547, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.451644731538357e-05, |
|
"loss": 1.6454, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4459335111365533e-05, |
|
"loss": 1.5334, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.440216523321356e-05, |
|
"loss": 1.5022, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.434493802949582e-05, |
|
"loss": 1.5177, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.428765384913004e-05, |
|
"loss": 1.5837, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4230313041381265e-05, |
|
"loss": 1.5765, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.417291595585987e-05, |
|
"loss": 1.5551, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.411546294251932e-05, |
|
"loss": 1.6088, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.405795435165409e-05, |
|
"loss": 1.5787, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.400039053389751e-05, |
|
"loss": 1.6321, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.394277184021962e-05, |
|
"loss": 1.5318, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.388509862192507e-05, |
|
"loss": 1.4413, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.382737123065092e-05, |
|
"loss": 1.5207, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.3769590018364564e-05, |
|
"loss": 1.6031, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.371175533736148e-05, |
|
"loss": 1.6052, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.365386754026323e-05, |
|
"loss": 1.5122, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.359592698001516e-05, |
|
"loss": 1.6031, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.353793400988436e-05, |
|
"loss": 1.5539, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3479888983457454e-05, |
|
"loss": 1.5709, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.342179225463843e-05, |
|
"loss": 1.5619, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.336364417764654e-05, |
|
"loss": 1.5903, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.330544510701411e-05, |
|
"loss": 1.5481, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.324719539758435e-05, |
|
"loss": 1.5959, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 1.592111349105835, |
|
"eval_runtime": 120.2035, |
|
"eval_samples_per_second": 11.406, |
|
"eval_steps_per_second": 2.853, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.3188895404509254e-05, |
|
"loss": 1.5371, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.313054548324737e-05, |
|
"loss": 1.6071, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.307214598956165e-05, |
|
"loss": 1.5601, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3013697279517346e-05, |
|
"loss": 1.4761, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.295519970947973e-05, |
|
"loss": 1.5097, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.289665363611201e-05, |
|
"loss": 1.6271, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.2838059416373094e-05, |
|
"loss": 1.6295, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.277941740751548e-05, |
|
"loss": 1.5371, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.272072796708299e-05, |
|
"loss": 1.641, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.266199145290868e-05, |
|
"loss": 1.5497, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.260320822311259e-05, |
|
"loss": 1.5779, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.2544378636099625e-05, |
|
"loss": 1.6502, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.248550305055728e-05, |
|
"loss": 1.636, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.242658182545356e-05, |
|
"loss": 1.5893, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.2367615320034675e-05, |
|
"loss": 1.5649, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2308603893822985e-05, |
|
"loss": 1.6139, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.224954790661469e-05, |
|
"loss": 1.4683, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.219044771847767e-05, |
|
"loss": 1.5228, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2131303689749334e-05, |
|
"loss": 1.5809, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.2072116181034364e-05, |
|
"loss": 1.5908, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.201288555320256e-05, |
|
"loss": 1.5981, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.1953612167386624e-05, |
|
"loss": 1.566, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.189429638497994e-05, |
|
"loss": 1.6265, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.183493856763438e-05, |
|
"loss": 1.5968, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.177553907725814e-05, |
|
"loss": 1.5487, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.171609827601347e-05, |
|
"loss": 1.5437, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.16566165263145e-05, |
|
"loss": 1.6033, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.159709419082503e-05, |
|
"loss": 1.5619, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.153753163245632e-05, |
|
"loss": 1.595, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.147792921436484e-05, |
|
"loss": 1.5967, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.1418287299950136e-05, |
|
"loss": 1.6131, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.1358606252852526e-05, |
|
"loss": 1.564, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.1298886436950946e-05, |
|
"loss": 1.5854, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.1239128216360696e-05, |
|
"loss": 1.5676, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.117933195543122e-05, |
|
"loss": 1.5799, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.111949801874393e-05, |
|
"loss": 1.5614, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.105962677110991e-05, |
|
"loss": 1.5621, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.099971857756777e-05, |
|
"loss": 1.64, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.093977380338134e-05, |
|
"loss": 1.5687, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.0879792814037524e-05, |
|
"loss": 1.6168, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.0819775975244005e-05, |
|
"loss": 1.5049, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.075972365292706e-05, |
|
"loss": 1.6166, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.0699636213229294e-05, |
|
"loss": 1.5757, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.0639514022507436e-05, |
|
"loss": 1.5442, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.057935744733009e-05, |
|
"loss": 1.5417, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.051916685447551e-05, |
|
"loss": 1.556, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.0458942610929353e-05, |
|
"loss": 1.5974, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.0398685083882438e-05, |
|
"loss": 1.5775, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.0338394640728533e-05, |
|
"loss": 1.5323, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.027807164906209e-05, |
|
"loss": 1.6194, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.0217716476676005e-05, |
|
"loss": 1.5799, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.0157329491559382e-05, |
|
"loss": 1.5676, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.0096911061895306e-05, |
|
"loss": 1.5538, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.0036461556058552e-05, |
|
"loss": 1.5787, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9975981342613406e-05, |
|
"loss": 1.5615, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9915470790311338e-05, |
|
"loss": 1.5831, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.9854930268088845e-05, |
|
"loss": 1.6376, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9794360145065093e-05, |
|
"loss": 1.5673, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9733760790539784e-05, |
|
"loss": 1.5465, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.9673132573990796e-05, |
|
"loss": 1.5375, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.961247586507203e-05, |
|
"loss": 1.6384, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.955179103361106e-05, |
|
"loss": 1.608, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.9491078449606958e-05, |
|
"loss": 1.6231, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.9430338483227982e-05, |
|
"loss": 1.5672, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.9369571504809368e-05, |
|
"loss": 1.5708, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.9308777884851013e-05, |
|
"loss": 1.5704, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.924795799401528e-05, |
|
"loss": 1.5602, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.9187112203124687e-05, |
|
"loss": 1.5754, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.9126240883159684e-05, |
|
"loss": 1.5619, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.9065344405256345e-05, |
|
"loss": 1.5708, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.9004423140704162e-05, |
|
"loss": 1.6248, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.894347746094374e-05, |
|
"loss": 1.5462, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8882507737564546e-05, |
|
"loss": 1.523, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.8821514342302646e-05, |
|
"loss": 1.5787, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.876049764703842e-05, |
|
"loss": 1.5542, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8699458023794342e-05, |
|
"loss": 1.503, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.8638395844732636e-05, |
|
"loss": 1.554, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.857731148215309e-05, |
|
"loss": 1.5576, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8516205308490718e-05, |
|
"loss": 1.5663, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8455077696313536e-05, |
|
"loss": 1.5718, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8393929018320264e-05, |
|
"loss": 1.5952, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.8332759647338047e-05, |
|
"loss": 1.6014, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.827156995632024e-05, |
|
"loss": 1.5607, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.8210360318344032e-05, |
|
"loss": 1.6279, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.8149131106608284e-05, |
|
"loss": 1.5182, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.8087882694431156e-05, |
|
"loss": 1.6032, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.80266154552479e-05, |
|
"loss": 1.5395, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.796532976260856e-05, |
|
"loss": 1.5687, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7904025990175675e-05, |
|
"loss": 1.5735, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7842704511722017e-05, |
|
"loss": 1.5653, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.7781365701128333e-05, |
|
"loss": 1.5791, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7720009932381024e-05, |
|
"loss": 1.5829, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.76586375795699e-05, |
|
"loss": 1.5581, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.7597249016885878e-05, |
|
"loss": 1.6085, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.753584461861871e-05, |
|
"loss": 1.5713, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.74744247591547e-05, |
|
"loss": 1.5618, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7412989812974416e-05, |
|
"loss": 1.6267, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7351540154650408e-05, |
|
"loss": 1.6045, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7290076158844935e-05, |
|
"loss": 1.5682, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.7228598200307666e-05, |
|
"loss": 1.5716, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 1.5865955352783203, |
|
"eval_runtime": 120.5077, |
|
"eval_samples_per_second": 11.377, |
|
"eval_steps_per_second": 2.846, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.716710665387341e-05, |
|
"loss": 1.5948, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.710560189445981e-05, |
|
"loss": 1.5397, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.704408429706508e-05, |
|
"loss": 1.5307, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6982554236765704e-05, |
|
"loss": 1.5215, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.692101208871415e-05, |
|
"loss": 1.6056, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6859458228136592e-05, |
|
"loss": 1.5652, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6797893030330607e-05, |
|
"loss": 1.5155, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6736316870662904e-05, |
|
"loss": 1.6073, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6674730124567023e-05, |
|
"loss": 1.5765, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6613133167541055e-05, |
|
"loss": 1.601, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6551526375145342e-05, |
|
"loss": 1.6192, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6489910123000195e-05, |
|
"loss": 1.5599, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6428284786783597e-05, |
|
"loss": 1.5574, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6366650742228937e-05, |
|
"loss": 1.609, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6305008365122664e-05, |
|
"loss": 1.5537, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6243358031302067e-05, |
|
"loss": 1.4887, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.6181700116652917e-05, |
|
"loss": 1.604, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.612003499710724e-05, |
|
"loss": 1.5129, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.6058363048640948e-05, |
|
"loss": 1.5484, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5996684647271635e-05, |
|
"loss": 1.5315, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.59350001690562e-05, |
|
"loss": 1.5443, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5873309990088612e-05, |
|
"loss": 1.5461, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5811614486497605e-05, |
|
"loss": 1.5906, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.574991403444435e-05, |
|
"loss": 1.5412, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5688209010120225e-05, |
|
"loss": 1.5833, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.562649978974445e-05, |
|
"loss": 1.5442, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.556478674956186e-05, |
|
"loss": 1.5377, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5503070265840556e-05, |
|
"loss": 1.5521, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5441350714869644e-05, |
|
"loss": 1.5607, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5379628472956933e-05, |
|
"loss": 1.5642, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5317903916426645e-05, |
|
"loss": 1.5422, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5256177421617088e-05, |
|
"loss": 1.5729, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.519444936487842e-05, |
|
"loss": 1.5666, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5132720122570298e-05, |
|
"loss": 1.5788, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.507099007105963e-05, |
|
"loss": 1.4995, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.500925958671823e-05, |
|
"loss": 1.6433, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.494752904592058e-05, |
|
"loss": 1.5099, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4885798825041488e-05, |
|
"loss": 1.6121, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4824069300453815e-05, |
|
"loss": 1.551, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4762340848526162e-05, |
|
"loss": 1.5144, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4700613845620632e-05, |
|
"loss": 1.5084, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4638888668090457e-05, |
|
"loss": 1.5896, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4577165692277744e-05, |
|
"loss": 1.5006, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4515445294511176e-05, |
|
"loss": 1.5435, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.445372785110374e-05, |
|
"loss": 1.5812, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.439201373835039e-05, |
|
"loss": 1.5867, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.433030333252576e-05, |
|
"loss": 1.5748, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.42685970098819e-05, |
|
"loss": 1.5986, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.42130651215743e-05, |
|
"loss": 1.5599, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4151367593457314e-05, |
|
"loss": 1.549, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4089675239503044e-05, |
|
"loss": 1.5109, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4027988435853466e-05, |
|
"loss": 1.5712, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.3966307558616745e-05, |
|
"loss": 1.5581, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.3904632983864885e-05, |
|
"loss": 1.5607, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.384296508763147e-05, |
|
"loss": 1.598, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.378130424590935e-05, |
|
"loss": 1.5464, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.37196508346484e-05, |
|
"loss": 1.5242, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.365800522975316e-05, |
|
"loss": 1.4973, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.359636780708058e-05, |
|
"loss": 1.6024, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.353473894243772e-05, |
|
"loss": 1.5798, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3473119011579485e-05, |
|
"loss": 1.5767, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3411508390206286e-05, |
|
"loss": 1.5836, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.334990745396177e-05, |
|
"loss": 1.5451, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.328831657843054e-05, |
|
"loss": 1.5379, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3226736139135876e-05, |
|
"loss": 1.591, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.316516651153741e-05, |
|
"loss": 1.5998, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3103608071028848e-05, |
|
"loss": 1.6216, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3042061192935705e-05, |
|
"loss": 1.5791, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.2980526252512972e-05, |
|
"loss": 1.6064, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.29190036249429e-05, |
|
"loss": 1.6265, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.2857493685332633e-05, |
|
"loss": 1.5985, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.2795996808711963e-05, |
|
"loss": 1.591, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.2734513370031025e-05, |
|
"loss": 1.5104, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.2673043744158057e-05, |
|
"loss": 1.5631, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.261158830587705e-05, |
|
"loss": 1.5249, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.25501474298855e-05, |
|
"loss": 1.6217, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.2488721490792104e-05, |
|
"loss": 1.5736, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.2427310863114513e-05, |
|
"loss": 1.5733, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.2365915921277004e-05, |
|
"loss": 1.56, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.2304537039608224e-05, |
|
"loss": 1.5668, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.22431745923389e-05, |
|
"loss": 1.5134, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.2181828953599556e-05, |
|
"loss": 1.5995, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.2120500497418238e-05, |
|
"loss": 1.644, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.2059189597718205e-05, |
|
"loss": 1.5736, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.200402510774106e-05, |
|
"loss": 1.449, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.194274859512892e-05, |
|
"loss": 1.5933, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.18814907227621e-05, |
|
"loss": 1.5343, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.182025186413352e-05, |
|
"loss": 1.4848, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.175903239262017e-05, |
|
"loss": 1.5866, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.1697832681480858e-05, |
|
"loss": 1.5759, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.1636653103853887e-05, |
|
"loss": 1.5649, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.157549403275481e-05, |
|
"loss": 1.5768, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.1514355841074157e-05, |
|
"loss": 1.5945, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.1453238901575158e-05, |
|
"loss": 1.5329, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.139214358689146e-05, |
|
"loss": 1.5272, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.1331070269524858e-05, |
|
"loss": 1.6365, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.1270019321843033e-05, |
|
"loss": 1.5395, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.120899111607728e-05, |
|
"loss": 1.5281, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.114798602432024e-05, |
|
"loss": 1.6174, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.108700441852361e-05, |
|
"loss": 1.6281, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 1.5833255052566528, |
|
"eval_runtime": 120.1395, |
|
"eval_samples_per_second": 11.412, |
|
"eval_steps_per_second": 2.855, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.1026046670495906e-05, |
|
"loss": 1.6199, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.0965113151900166e-05, |
|
"loss": 1.6042, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.090420423425172e-05, |
|
"loss": 1.5406, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.0843320288915903e-05, |
|
"loss": 1.5542, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.078246168710577e-05, |
|
"loss": 1.5691, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.072162879987986e-05, |
|
"loss": 1.5629, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.066082199813996e-05, |
|
"loss": 1.6191, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.0600041652628787e-05, |
|
"loss": 1.6127, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.0539288133927746e-05, |
|
"loss": 1.5847, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.0478561812454678e-05, |
|
"loss": 1.591, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.0417863058461633e-05, |
|
"loss": 1.5478, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.0357192242032547e-05, |
|
"loss": 1.5991, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.0296549733081027e-05, |
|
"loss": 1.5795, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.0235935901348098e-05, |
|
"loss": 1.5478, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.0175351116399904e-05, |
|
"loss": 1.641, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.011479574762555e-05, |
|
"loss": 1.5214, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.005427016423474e-05, |
|
"loss": 1.5606, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.9993774735255587e-05, |
|
"loss": 1.5445, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.9933309829532344e-05, |
|
"loss": 1.6013, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.9872875815723187e-05, |
|
"loss": 1.61, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.981247306229792e-05, |
|
"loss": 1.5263, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.9752101937535754e-05, |
|
"loss": 1.5198, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.9691762809523055e-05, |
|
"loss": 1.5959, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.963145604615112e-05, |
|
"loss": 1.6017, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.9571182015113894e-05, |
|
"loss": 1.5784, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.9510941083905775e-05, |
|
"loss": 1.5273, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.9450733619819317e-05, |
|
"loss": 1.5608, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.939055998994306e-05, |
|
"loss": 1.5707, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.9330420561159224e-05, |
|
"loss": 1.5873, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.9270315700141532e-05, |
|
"loss": 1.5831, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.9210245773352913e-05, |
|
"loss": 1.5502, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.915021114704332e-05, |
|
"loss": 1.5395, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.909021218724748e-05, |
|
"loss": 1.6205, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.9030249259782647e-05, |
|
"loss": 1.6269, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.8970322730246386e-05, |
|
"loss": 1.6254, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.891043296401435e-05, |
|
"loss": 1.6078, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.8850580326238037e-05, |
|
"loss": 1.5952, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.8790765181842572e-05, |
|
"loss": 1.6439, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.873098789552448e-05, |
|
"loss": 1.5527, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.8671248831749454e-05, |
|
"loss": 1.5974, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.8611548354750176e-05, |
|
"loss": 1.5194, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.8551886828524013e-05, |
|
"loss": 1.5647, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.8492264616830884e-05, |
|
"loss": 1.6324, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.843268208319098e-05, |
|
"loss": 1.5751, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.8373139590882603e-05, |
|
"loss": 1.5693, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.8313637502939895e-05, |
|
"loss": 1.562, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.8254176182150654e-05, |
|
"loss": 1.5584, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.8194755991054123e-05, |
|
"loss": 1.5866, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.8135377291938765e-05, |
|
"loss": 1.6487, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.8076040446840092e-05, |
|
"loss": 1.5458, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.80167458175384e-05, |
|
"loss": 1.5688, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.79574937655566e-05, |
|
"loss": 1.5377, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.7898284652158006e-05, |
|
"loss": 1.6038, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.783911883834415e-05, |
|
"loss": 1.5159, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.777999668485254e-05, |
|
"loss": 1.575, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.7720918552154498e-05, |
|
"loss": 1.6133, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.7661884800452932e-05, |
|
"loss": 1.5649, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.7602895789680194e-05, |
|
"loss": 1.5856, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.7543951879495806e-05, |
|
"loss": 1.5763, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.7485053429284335e-05, |
|
"loss": 1.5841, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.7426200798153152e-05, |
|
"loss": 1.6031, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.7367394344930298e-05, |
|
"loss": 1.5723, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.7308634428162245e-05, |
|
"loss": 1.5619, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.724992140611173e-05, |
|
"loss": 1.5642, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.719125563675557e-05, |
|
"loss": 1.5634, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7132637477782477e-05, |
|
"loss": 1.5896, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7074067286590897e-05, |
|
"loss": 1.5564, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7015545420286798e-05, |
|
"loss": 1.533, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.695707223568151e-05, |
|
"loss": 1.5789, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.689864808928954e-05, |
|
"loss": 1.573, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.6840273337326424e-05, |
|
"loss": 1.6167, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.6781948335706534e-05, |
|
"loss": 1.5644, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.67236734400409e-05, |
|
"loss": 1.5827, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.666544900563505e-05, |
|
"loss": 1.5427, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.660727538748687e-05, |
|
"loss": 1.5782, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.654915294028439e-05, |
|
"loss": 1.5257, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.649108201840367e-05, |
|
"loss": 1.5747, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6433062975906594e-05, |
|
"loss": 1.5598, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6375096166538757e-05, |
|
"loss": 1.5349, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6317181943727272e-05, |
|
"loss": 1.5958, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6259320660578627e-05, |
|
"loss": 1.5406, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.620151266987654e-05, |
|
"loss": 1.4676, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.61437583240798e-05, |
|
"loss": 1.5561, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.608605797532013e-05, |
|
"loss": 1.5527, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.6028411975400005e-05, |
|
"loss": 1.6027, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.5970820675790554e-05, |
|
"loss": 1.5452, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.5913284427629376e-05, |
|
"loss": 1.5342, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.585580358171845e-05, |
|
"loss": 1.6369, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.5798378488521937e-05, |
|
"loss": 1.6002, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.5741009498164066e-05, |
|
"loss": 1.5132, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.5683696960427012e-05, |
|
"loss": 1.6326, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.5626441224748784e-05, |
|
"loss": 1.5737, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.5569242640221015e-05, |
|
"loss": 1.6005, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.5512101555586918e-05, |
|
"loss": 1.5976, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.54550183192391e-05, |
|
"loss": 1.6039, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.5397993279217504e-05, |
|
"loss": 1.5774, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.5341026783207208e-05, |
|
"loss": 1.5339, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.528411917853636e-05, |
|
"loss": 1.5806, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.5227270812174033e-05, |
|
"loss": 1.5673, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.5170482030728142e-05, |
|
"loss": 1.6091, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.5800806283950806, |
|
"eval_runtime": 121.3887, |
|
"eval_samples_per_second": 11.294, |
|
"eval_steps_per_second": 2.826, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.511375318044329e-05, |
|
"loss": 1.5585, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.5057084607198685e-05, |
|
"loss": 1.6185, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.5000476656506019e-05, |
|
"loss": 1.5859, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4943929673507345e-05, |
|
"loss": 1.5645, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4887444002973048e-05, |
|
"loss": 1.5036, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.483101998929963e-05, |
|
"loss": 1.6188, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.4774657976507695e-05, |
|
"loss": 1.5842, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.4718358308239799e-05, |
|
"loss": 1.5984, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.4662121327758432e-05, |
|
"loss": 1.5114, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.4605947377943818e-05, |
|
"loss": 1.5658, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.454983680129191e-05, |
|
"loss": 1.5323, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.4493789939912244e-05, |
|
"loss": 1.6191, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.4437807135525922e-05, |
|
"loss": 1.5712, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.438188872946345e-05, |
|
"loss": 1.5304, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.4326035062662707e-05, |
|
"loss": 1.5967, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.4270246475666846e-05, |
|
"loss": 1.5486, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.4214523308622243e-05, |
|
"loss": 1.6059, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.4158865901276385e-05, |
|
"loss": 1.593, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.410327459297583e-05, |
|
"loss": 1.5811, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.4047749722664116e-05, |
|
"loss": 1.5334, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.39922916288797e-05, |
|
"loss": 1.5781, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3936900649753931e-05, |
|
"loss": 1.6089, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3881577123008921e-05, |
|
"loss": 1.5119, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3826321385955535e-05, |
|
"loss": 1.5515, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.3771133775491307e-05, |
|
"loss": 1.586, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.3716014628098431e-05, |
|
"loss": 1.6166, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.3660964279841647e-05, |
|
"loss": 1.5123, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.3605983066366234e-05, |
|
"loss": 1.5726, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.3551071322895936e-05, |
|
"loss": 1.5723, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3496229384230974e-05, |
|
"loss": 1.5756, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3441457584745928e-05, |
|
"loss": 1.5795, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.3386756258387744e-05, |
|
"loss": 1.5917, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.33321257386737e-05, |
|
"loss": 1.5951, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.3277566358689336e-05, |
|
"loss": 1.5424, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.3223078451086487e-05, |
|
"loss": 1.548, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.316866234808119e-05, |
|
"loss": 1.5404, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.3114318381451688e-05, |
|
"loss": 1.5472, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.3060046882536409e-05, |
|
"loss": 1.5692, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.3005848182231939e-05, |
|
"loss": 1.4966, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.2951722610990993e-05, |
|
"loss": 1.564, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.2897670498820455e-05, |
|
"loss": 1.5788, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.284369217527928e-05, |
|
"loss": 1.5353, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.2789787969476554e-05, |
|
"loss": 1.5966, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.2735958210069448e-05, |
|
"loss": 1.5634, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.268220322526123e-05, |
|
"loss": 1.5649, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.262852334279929e-05, |
|
"loss": 1.4958, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.257491888997308e-05, |
|
"loss": 1.5192, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2521390193612165e-05, |
|
"loss": 1.5598, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2467937580084225e-05, |
|
"loss": 1.5079, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2414561375293038e-05, |
|
"loss": 1.514, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.236126190467655e-05, |
|
"loss": 1.5451, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.2308039493204823e-05, |
|
"loss": 1.5526, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.2254894465378094e-05, |
|
"loss": 1.4948, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.220182714522479e-05, |
|
"loss": 1.6119, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.2148837856299533e-05, |
|
"loss": 1.5818, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.2095926921681219e-05, |
|
"loss": 1.5446, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.2043094663970982e-05, |
|
"loss": 1.5348, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.1990341405290271e-05, |
|
"loss": 1.5595, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.193766746727886e-05, |
|
"loss": 1.5402, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1885073171092926e-05, |
|
"loss": 1.5021, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1832558837403043e-05, |
|
"loss": 1.5309, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1780124786392258e-05, |
|
"loss": 1.6031, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1727771337754112e-05, |
|
"loss": 1.6009, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.167549881069075e-05, |
|
"loss": 1.5555, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.162330752391089e-05, |
|
"loss": 1.5342, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.1571197795627941e-05, |
|
"loss": 1.5715, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.1519169943558042e-05, |
|
"loss": 1.5763, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.1467224284918141e-05, |
|
"loss": 1.5585, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.141536113642403e-05, |
|
"loss": 1.5248, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.1363580814288435e-05, |
|
"loss": 1.5985, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.1311883634219095e-05, |
|
"loss": 1.5718, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1260269911416807e-05, |
|
"loss": 1.5899, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1208739960573553e-05, |
|
"loss": 1.5258, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.1157294095870527e-05, |
|
"loss": 1.517, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.110593263097626e-05, |
|
"loss": 1.4968, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.105465587904467e-05, |
|
"loss": 1.538, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.100346415271321e-05, |
|
"loss": 1.5363, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.0952357764100906e-05, |
|
"loss": 1.5474, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.090133702480647e-05, |
|
"loss": 1.5999, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.0850402245906408e-05, |
|
"loss": 1.538, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.0799553737953136e-05, |
|
"loss": 1.5791, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.0748791810973052e-05, |
|
"loss": 1.6128, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.0698116774464676e-05, |
|
"loss": 1.5819, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.064752893739673e-05, |
|
"loss": 1.4816, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.059702860820632e-05, |
|
"loss": 1.5091, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.0546616094796968e-05, |
|
"loss": 1.5383, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.0496291704536798e-05, |
|
"loss": 1.5577, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.044605574425664e-05, |
|
"loss": 1.5483, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.0395908520248143e-05, |
|
"loss": 1.5387, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.0345850338261964e-05, |
|
"loss": 1.5891, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.0295881503505836e-05, |
|
"loss": 1.565, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.0246002320642742e-05, |
|
"loss": 1.5359, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.0196213093789042e-05, |
|
"loss": 1.5579, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.0146514126512663e-05, |
|
"loss": 1.563, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.0096905721831176e-05, |
|
"loss": 1.5762, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.004738818221001e-05, |
|
"loss": 1.4976, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.997961809560564e-06, |
|
"loss": 1.5758, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.948626905238415e-06, |
|
"loss": 1.5827, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.899383770041426e-06, |
|
"loss": 1.5686, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.850232704207951e-06, |
|
"loss": 1.5696, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 1.5784997940063477, |
|
"eval_runtime": 121.4622, |
|
"eval_samples_per_second": 11.287, |
|
"eval_steps_per_second": 2.824, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.801174007414978e-06, |
|
"loss": 1.5198, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.752207978776346e-06, |
|
"loss": 1.4989, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.703334916840856e-06, |
|
"loss": 1.5645, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.654555119590506e-06, |
|
"loss": 1.5655, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.605868884438645e-06, |
|
"loss": 1.5699, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.557276508228164e-06, |
|
"loss": 1.532, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.508778287229714e-06, |
|
"loss": 1.5158, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.460374517139848e-06, |
|
"loss": 1.5939, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.412065493079261e-06, |
|
"loss": 1.4778, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.363851509590962e-06, |
|
"loss": 1.5716, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.315732860638518e-06, |
|
"loss": 1.5349, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.267709839604217e-06, |
|
"loss": 1.5646, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.219782739287292e-06, |
|
"loss": 1.5573, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.171951851902149e-06, |
|
"loss": 1.5657, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.124217469076593e-06, |
|
"loss": 1.5415, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.076579881850011e-06, |
|
"loss": 1.5735, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.029039380671636e-06, |
|
"loss": 1.6238, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.981596255398756e-06, |
|
"loss": 1.612, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.934250795294943e-06, |
|
"loss": 1.4949, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.887003289028326e-06, |
|
"loss": 1.5107, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.839854024669781e-06, |
|
"loss": 1.534, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.792803289691199e-06, |
|
"loss": 1.538, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.745851370963737e-06, |
|
"loss": 1.5493, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.698998554756052e-06, |
|
"loss": 1.5706, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.652245126732595e-06, |
|
"loss": 1.5403, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.605591371951815e-06, |
|
"loss": 1.5141, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.559037574864453e-06, |
|
"loss": 1.59, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.512584019311806e-06, |
|
"loss": 1.5207, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.466230988523988e-06, |
|
"loss": 1.5303, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.419978765118206e-06, |
|
"loss": 1.5287, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.373827631097052e-06, |
|
"loss": 1.5204, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.327777867846758e-06, |
|
"loss": 1.5644, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.281829756135492e-06, |
|
"loss": 1.5745, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.23598357611165e-06, |
|
"loss": 1.5933, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.190239607302133e-06, |
|
"loss": 1.5505, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.144598128610684e-06, |
|
"loss": 1.5541, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.099059418316126e-06, |
|
"loss": 1.6338, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.053623754070714e-06, |
|
"loss": 1.5897, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.008291412898414e-06, |
|
"loss": 1.5704, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.963062671193225e-06, |
|
"loss": 1.5133, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.917937804717521e-06, |
|
"loss": 1.6135, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.872917088600307e-06, |
|
"loss": 1.4678, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.828000797335593e-06, |
|
"loss": 1.5418, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.783189204780696e-06, |
|
"loss": 1.6363, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.738482584154601e-06, |
|
"loss": 1.5124, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.693881208036253e-06, |
|
"loss": 1.5569, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.649385348362912e-06, |
|
"loss": 1.567, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.604995276428501e-06, |
|
"loss": 1.5967, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.560711262881967e-06, |
|
"loss": 1.5462, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.516533577725593e-06, |
|
"loss": 1.5963, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.472462490313379e-06, |
|
"loss": 1.5272, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.428498269349376e-06, |
|
"loss": 1.6033, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.384641182886098e-06, |
|
"loss": 1.5305, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.340891498322824e-06, |
|
"loss": 1.5018, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.297249482404009e-06, |
|
"loss": 1.5483, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.2537154012176425e-06, |
|
"loss": 1.5751, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.210289520193619e-06, |
|
"loss": 1.5893, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.166972104102163e-06, |
|
"loss": 1.5296, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.123763417052151e-06, |
|
"loss": 1.4995, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.080663722489536e-06, |
|
"loss": 1.5799, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.037673283195742e-06, |
|
"loss": 1.596, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 6.99479236128607e-06, |
|
"loss": 1.4874, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 6.952021218208069e-06, |
|
"loss": 1.6123, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 6.909360114739963e-06, |
|
"loss": 1.5308, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.866809310989053e-06, |
|
"loss": 1.5896, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.824369066390157e-06, |
|
"loss": 1.5295, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.782039639703991e-06, |
|
"loss": 1.5922, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.739821289015607e-06, |
|
"loss": 1.5989, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.6977142717328165e-06, |
|
"loss": 1.5635, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.6557188445846465e-06, |
|
"loss": 1.5945, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.613835263619727e-06, |
|
"loss": 1.5264, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.572063784204769e-06, |
|
"loss": 1.5457, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.530404661022984e-06, |
|
"loss": 1.549, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.488858148072547e-06, |
|
"loss": 1.6442, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.44742449866505e-06, |
|
"loss": 1.5169, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.406103965423932e-06, |
|
"loss": 1.6114, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.364896800282968e-06, |
|
"loss": 1.519, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.323803254484712e-06, |
|
"loss": 1.5455, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.282823578578986e-06, |
|
"loss": 1.587, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.241958022421332e-06, |
|
"loss": 1.5676, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.201206835171497e-06, |
|
"loss": 1.5286, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.1605702652919095e-06, |
|
"loss": 1.5097, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.12004856054619e-06, |
|
"loss": 1.5699, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.079641967997596e-06, |
|
"loss": 1.5225, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.039350734007546e-06, |
|
"loss": 1.5637, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 5.9991751042341085e-06, |
|
"loss": 1.5933, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.959115323630521e-06, |
|
"loss": 1.6083, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.919171636443663e-06, |
|
"loss": 1.5347, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.879344286212596e-06, |
|
"loss": 1.5161, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.8396335157670625e-06, |
|
"loss": 1.5147, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.800039567226004e-06, |
|
"loss": 1.4942, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.760562681996121e-06, |
|
"loss": 1.5687, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.721203100770339e-06, |
|
"loss": 1.5276, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.681961063526392e-06, |
|
"loss": 1.5139, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.6428368095253286e-06, |
|
"loss": 1.6345, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.603830577310084e-06, |
|
"loss": 1.5481, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.564942604703996e-06, |
|
"loss": 1.5523, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.526173128809362e-06, |
|
"loss": 1.5385, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.487522386006e-06, |
|
"loss": 1.5857, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.448990611949823e-06, |
|
"loss": 1.5659, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 1.5774571895599365, |
|
"eval_runtime": 120.2987, |
|
"eval_samples_per_second": 11.397, |
|
"eval_steps_per_second": 2.851, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.41057804157136e-06, |
|
"loss": 1.5826, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.372284909074362e-06, |
|
"loss": 1.5019, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.334111447934348e-06, |
|
"loss": 1.5691, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.296057890897213e-06, |
|
"loss": 1.524, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.258124469977776e-06, |
|
"loss": 1.5359, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.220311416458376e-06, |
|
"loss": 1.631, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.182618960887476e-06, |
|
"loss": 1.5473, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.145047333078235e-06, |
|
"loss": 1.5423, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.1075967621071166e-06, |
|
"loss": 1.5166, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.070267476312515e-06, |
|
"loss": 1.6095, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.033059703293319e-06, |
|
"loss": 1.552, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.995973669907553e-06, |
|
"loss": 1.5961, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.959009602270989e-06, |
|
"loss": 1.548, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.922167725755761e-06, |
|
"loss": 1.6018, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.885448264989015e-06, |
|
"loss": 1.4976, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.8488514438514955e-06, |
|
"loss": 1.5935, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.812377485476224e-06, |
|
"loss": 1.5797, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.776026612247108e-06, |
|
"loss": 1.5587, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.739799045797611e-06, |
|
"loss": 1.5267, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.7036950070093645e-06, |
|
"loss": 1.5398, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.667714716010882e-06, |
|
"loss": 1.6229, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.631858392176142e-06, |
|
"loss": 1.5529, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.596126254123309e-06, |
|
"loss": 1.6194, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.560518519713372e-06, |
|
"loss": 1.5792, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.525035406048819e-06, |
|
"loss": 1.5083, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.4896771294723334e-06, |
|
"loss": 1.5629, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.4544439055654474e-06, |
|
"loss": 1.464, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.419335949147241e-06, |
|
"loss": 1.5578, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.384353474273023e-06, |
|
"loss": 1.5438, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.349496694233057e-06, |
|
"loss": 1.5336, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.3147658215512196e-06, |
|
"loss": 1.5478, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.280161067983721e-06, |
|
"loss": 1.5084, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.245682644517815e-06, |
|
"loss": 1.6072, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.211330761370533e-06, |
|
"loss": 1.5532, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.177105627987363e-06, |
|
"loss": 1.5815, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.143007453040995e-06, |
|
"loss": 1.5864, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.109036444430045e-06, |
|
"loss": 1.4932, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.075192809277803e-06, |
|
"loss": 1.4614, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.041476753930937e-06, |
|
"loss": 1.5828, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.007888483958258e-06, |
|
"loss": 1.5167, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.974428204149469e-06, |
|
"loss": 1.5448, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.941096118513893e-06, |
|
"loss": 1.5393, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.907892430279272e-06, |
|
"loss": 1.5387, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.87481734189048e-06, |
|
"loss": 1.5227, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.841871055008317e-06, |
|
"loss": 1.6027, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.809053770508261e-06, |
|
"loss": 1.6111, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.776365688479283e-06, |
|
"loss": 1.5452, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.743807008222572e-06, |
|
"loss": 1.5782, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.7113779282503564e-06, |
|
"loss": 1.5405, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.6790786462846783e-06, |
|
"loss": 1.6353, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.6469093592562066e-06, |
|
"loss": 1.4493, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.6148702633030135e-06, |
|
"loss": 1.6325, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.582961553769387e-06, |
|
"loss": 1.5489, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.5511834252046435e-06, |
|
"loss": 1.6267, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.5195360713619452e-06, |
|
"loss": 1.5385, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4880196851971055e-06, |
|
"loss": 1.5823, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4566344588674248e-06, |
|
"loss": 1.5976, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.425380583730506e-06, |
|
"loss": 1.5544, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.394258250343102e-06, |
|
"loss": 1.5691, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.363267648459956e-06, |
|
"loss": 1.4777, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3324089670326185e-06, |
|
"loss": 1.5837, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3016823942083303e-06, |
|
"loss": 1.5463, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2710881173288384e-06, |
|
"loss": 1.5895, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2406263229292992e-06, |
|
"loss": 1.5495, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2102971967370944e-06, |
|
"loss": 1.6608, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.1801009236707285e-06, |
|
"loss": 1.5773, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.1500376878386832e-06, |
|
"loss": 1.5229, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.120107672538325e-06, |
|
"loss": 1.5475, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.090311060254747e-06, |
|
"loss": 1.5161, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.0606480326596825e-06, |
|
"loss": 1.5614, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.031118770610386e-06, |
|
"loss": 1.5324, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.0017234541485503e-06, |
|
"loss": 1.6035, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.9724622624991815e-06, |
|
"loss": 1.5388, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.94333537406952e-06, |
|
"loss": 1.5093, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.9143429664479525e-06, |
|
"loss": 1.5894, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.8854852164029225e-06, |
|
"loss": 1.5607, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.8567622998818765e-06, |
|
"loss": 1.5648, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.8281743920101523e-06, |
|
"loss": 1.565, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.799721667089944e-06, |
|
"loss": 1.5372, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.7714042985992144e-06, |
|
"loss": 1.5587, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.7432224591906698e-06, |
|
"loss": 1.5361, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.715176320690674e-06, |
|
"loss": 1.5489, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.687266054098217e-06, |
|
"loss": 1.5527, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.65949182958386e-06, |
|
"loss": 1.5471, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.6318538164887303e-06, |
|
"loss": 1.528, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.604352183323447e-06, |
|
"loss": 1.5848, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.576987097767117e-06, |
|
"loss": 1.5875, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.549758726666307e-06, |
|
"loss": 1.5908, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5226672360340373e-06, |
|
"loss": 1.5164, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.4984020638028378e-06, |
|
"loss": 1.54, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.4715711004368204e-06, |
|
"loss": 1.4219, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.4448774942539832e-06, |
|
"loss": 1.527, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.418321408006857e-06, |
|
"loss": 1.5304, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.391903003609486e-06, |
|
"loss": 1.5367, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.3656224421364724e-06, |
|
"loss": 1.5717, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.339479883821968e-06, |
|
"loss": 1.5522, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.3134754880587307e-06, |
|
"loss": 1.5375, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.2876094133971154e-06, |
|
"loss": 1.5232, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.261881817544137e-06, |
|
"loss": 1.5888, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.2362928573624877e-06, |
|
"loss": 1.5925, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 1.576684594154358, |
|
"eval_runtime": 121.7819, |
|
"eval_samples_per_second": 11.258, |
|
"eval_steps_per_second": 2.817, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.210842688869591e-06, |
|
"loss": 1.6087, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.1855314672366568e-06, |
|
"loss": 1.5736, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.1603593467877243e-06, |
|
"loss": 1.5425, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.135326480998717e-06, |
|
"loss": 1.5811, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.1104330224965247e-06, |
|
"loss": 1.5116, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.0856791230580484e-06, |
|
"loss": 1.5301, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.0610649336093134e-06, |
|
"loss": 1.5275, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.036590604224503e-06, |
|
"loss": 1.5363, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.012256284125072e-06, |
|
"loss": 1.4919, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9880621216788298e-06, |
|
"loss": 1.627, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9640082643990394e-06, |
|
"loss": 1.5766, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9400948589435088e-06, |
|
"loss": 1.5139, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9163220511137114e-06, |
|
"loss": 1.5513, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.8926899858538794e-06, |
|
"loss": 1.5805, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.8691988072501359e-06, |
|
"loss": 1.5263, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.845848658529606e-06, |
|
"loss": 1.528, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.8226396820595431e-06, |
|
"loss": 1.5566, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.7995720193464766e-06, |
|
"loss": 1.5577, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.7766458110353297e-06, |
|
"loss": 1.5529, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.753861196908571e-06, |
|
"loss": 1.5468, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.7312183158853524e-06, |
|
"loss": 1.568, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.7087173060206879e-06, |
|
"loss": 1.5648, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.6863583045045816e-06, |
|
"loss": 1.5755, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.6641414476612077e-06, |
|
"loss": 1.5526, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.642066870948078e-06, |
|
"loss": 1.5661, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.6201347089552038e-06, |
|
"loss": 1.5303, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.598345095404305e-06, |
|
"loss": 1.5469, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.576698163147955e-06, |
|
"loss": 1.5015, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.5551940441688034e-06, |
|
"loss": 1.6044, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.5338328695787496e-06, |
|
"loss": 1.4745, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.512614769618162e-06, |
|
"loss": 1.5845, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.491539873655068e-06, |
|
"loss": 1.5262, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.4706083101843737e-06, |
|
"loss": 1.5452, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.449820206827071e-06, |
|
"loss": 1.4844, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4291756903294845e-06, |
|
"loss": 1.5662, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4086748865624666e-06, |
|
"loss": 1.5769, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.3883179205206459e-06, |
|
"loss": 1.5101, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.3681049163216664e-06, |
|
"loss": 1.6027, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3480359972054325e-06, |
|
"loss": 1.5928, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3281112855333428e-06, |
|
"loss": 1.4951, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3083309027875663e-06, |
|
"loss": 1.5245, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.2886949695702782e-06, |
|
"loss": 1.5047, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.269203605602942e-06, |
|
"loss": 1.5346, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.249856929725579e-06, |
|
"loss": 1.4552, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.2306550598960298e-06, |
|
"loss": 1.6088, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.2115981131892469e-06, |
|
"loss": 1.5466, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.1926862057965755e-06, |
|
"loss": 1.5386, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1739194530250574e-06, |
|
"loss": 1.6025, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1552979692967064e-06, |
|
"loss": 1.5199, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1368218681478276e-06, |
|
"loss": 1.5507, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1184912622283133e-06, |
|
"loss": 1.5485, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.1003062633009765e-06, |
|
"loss": 1.5408, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.0822669822408427e-06, |
|
"loss": 1.5907, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.064373529034493e-06, |
|
"loss": 1.581, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.0466260127793808e-06, |
|
"loss": 1.5346, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.0290245416831823e-06, |
|
"loss": 1.5924, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.0115692230631245e-06, |
|
"loss": 1.4831, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.942601633453313e-07, |
|
"loss": 1.5182, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.77097468064178e-07, |
|
"loss": 1.5762, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.600812418616434e-07, |
|
"loss": 1.5347, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.432115884866865e-07, |
|
"loss": 1.5561, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.264886107945986e-07, |
|
"loss": 1.5222, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.099124107463718e-07, |
|
"loss": 1.5198, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.934830894080897e-07, |
|
"loss": 1.5235, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.772007469503241e-07, |
|
"loss": 1.547, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.610654826474828e-07, |
|
"loss": 1.5714, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.450773948772445e-07, |
|
"loss": 1.5183, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.292365811199381e-07, |
|
"loss": 1.5648, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.135431379579589e-07, |
|
"loss": 1.6074, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.979971610751701e-07, |
|
"loss": 1.6357, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.825987452563271e-07, |
|
"loss": 1.584, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.673479843864933e-07, |
|
"loss": 1.5438, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.522449714504748e-07, |
|
"loss": 1.5387, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.37289798532248e-07, |
|
"loss": 1.5856, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.224825568143967e-07, |
|
"loss": 1.5303, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.078233365775677e-07, |
|
"loss": 1.5315, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.933122271998993e-07, |
|
"loss": 1.5769, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.789493171565048e-07, |
|
"loss": 1.5182, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.647346940189037e-07, |
|
"loss": 1.5813, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.50668444454508e-07, |
|
"loss": 1.5716, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.367506542260842e-07, |
|
"loss": 1.5617, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.229814081912366e-07, |
|
"loss": 1.6197, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.093607903018828e-07, |
|
"loss": 1.5667, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 5.958888836037513e-07, |
|
"loss": 1.5824, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 5.825657702358572e-07, |
|
"loss": 1.5595, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.69391531430033e-07, |
|
"loss": 1.5121, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.563662475103982e-07, |
|
"loss": 1.4909, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.434899978928904e-07, |
|
"loss": 1.5018, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.307628610847798e-07, |
|
"loss": 1.5611, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.18184914684175e-07, |
|
"loss": 1.5067, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.057562353795813e-07, |
|
"loss": 1.5029, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.934768989493938e-07, |
|
"loss": 1.6069, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.813469802614684e-07, |
|
"loss": 1.5995, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.69366553272646e-07, |
|
"loss": 1.5461, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.5753569102831016e-07, |
|
"loss": 1.5528, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.4585446566193236e-07, |
|
"loss": 1.6161, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.343229483946526e-07, |
|
"loss": 1.6181, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.2294120953482173e-07, |
|
"loss": 1.5454, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.117093184775822e-07, |
|
"loss": 1.524, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.006273437044489e-07, |
|
"loss": 1.5892, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 1.5761640071868896, |
|
"eval_runtime": 120.7457, |
|
"eval_samples_per_second": 11.354, |
|
"eval_steps_per_second": 2.841, |
|
"step": 12000 |
|
} |
|
], |
|
"max_steps": 12723, |
|
"num_train_epochs": 3, |
|
"total_flos": 4.293883720613929e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|