|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7867652664481207, |
|
"global_step": 10320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.992376305557673e-06, |
|
"loss": 30.7359, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.984752611115347e-06, |
|
"loss": 31.0328, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.97712891667302e-06, |
|
"loss": 30.2281, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.969505222230695e-06, |
|
"loss": 30.8781, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.961881527788367e-06, |
|
"loss": 29.9109, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.954257833346041e-06, |
|
"loss": 29.7312, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.946634138903714e-06, |
|
"loss": 28.8719, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.939010444461386e-06, |
|
"loss": 28.7094, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.93138675001906e-06, |
|
"loss": 28.0828, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.923763055576732e-06, |
|
"loss": 27.6328, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.916139361134408e-06, |
|
"loss": 26.9016, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.90851566669208e-06, |
|
"loss": 26.3891, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.900891972249752e-06, |
|
"loss": 25.8813, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.893268277807426e-06, |
|
"loss": 25.7141, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.885644583365099e-06, |
|
"loss": 25.2875, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.878020888922773e-06, |
|
"loss": 24.6219, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.870397194480445e-06, |
|
"loss": 24.1156, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.86277350003812e-06, |
|
"loss": 24.0234, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.855149805595793e-06, |
|
"loss": 23.6562, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.847526111153465e-06, |
|
"loss": 23.2922, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.839902416711139e-06, |
|
"loss": 22.7734, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.832278722268811e-06, |
|
"loss": 22.5281, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.824655027826485e-06, |
|
"loss": 22.0547, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.817031333384158e-06, |
|
"loss": 22.1594, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.809407638941832e-06, |
|
"loss": 21.7297, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.801783944499506e-06, |
|
"loss": 21.2375, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.794160250057178e-06, |
|
"loss": 20.6297, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.786536555614852e-06, |
|
"loss": 20.3438, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.778912861172524e-06, |
|
"loss": 20.0625, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.771289166730198e-06, |
|
"loss": 19.825, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.76366547228787e-06, |
|
"loss": 19.6578, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.756041777845544e-06, |
|
"loss": 19.4641, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.748418083403218e-06, |
|
"loss": 18.9703, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.74079438896089e-06, |
|
"loss": 18.5938, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.733170694518565e-06, |
|
"loss": 18.2047, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.725547000076237e-06, |
|
"loss": 17.3922, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.717923305633911e-06, |
|
"loss": 17.1758, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.710299611191583e-06, |
|
"loss": 16.975, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.702675916749257e-06, |
|
"loss": 15.7492, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.695052222306931e-06, |
|
"loss": 15.2898, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.687428527864603e-06, |
|
"loss": 14.4289, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.679804833422277e-06, |
|
"loss": 14.3398, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.67218113897995e-06, |
|
"loss": 13.6992, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.664557444537624e-06, |
|
"loss": 12.75, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.656933750095296e-06, |
|
"loss": 12.8219, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.64931005565297e-06, |
|
"loss": 12.1195, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.641686361210644e-06, |
|
"loss": 10.5531, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.634062666768316e-06, |
|
"loss": 9.9711, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.62643897232599e-06, |
|
"loss": 10.1711, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.618815277883662e-06, |
|
"loss": 9.2937, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.611191583441336e-06, |
|
"loss": 8.8313, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.603567888999009e-06, |
|
"loss": 8.4027, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.595944194556683e-06, |
|
"loss": 8.5461, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.588320500114357e-06, |
|
"loss": 8.4668, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.580696805672029e-06, |
|
"loss": 7.9266, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.573073111229703e-06, |
|
"loss": 7.718, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.565449416787375e-06, |
|
"loss": 7.6984, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.55782572234505e-06, |
|
"loss": 7.4594, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.550202027902723e-06, |
|
"loss": 7.302, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.542578333460395e-06, |
|
"loss": 7.2758, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.53495463901807e-06, |
|
"loss": 6.9484, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.527330944575742e-06, |
|
"loss": 6.5934, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.519707250133416e-06, |
|
"loss": 6.4156, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.512083555691088e-06, |
|
"loss": 6.3332, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.504459861248762e-06, |
|
"loss": 6.3387, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.496836166806436e-06, |
|
"loss": 6.7496, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.489212472364108e-06, |
|
"loss": 6.2605, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.481588777921782e-06, |
|
"loss": 6.1672, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.473965083479454e-06, |
|
"loss": 6.448, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.466341389037128e-06, |
|
"loss": 6.2863, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.4587176945948e-06, |
|
"loss": 5.8332, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.451094000152475e-06, |
|
"loss": 5.7637, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.443470305710149e-06, |
|
"loss": 5.8871, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.435846611267821e-06, |
|
"loss": 5.6824, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.428222916825495e-06, |
|
"loss": 5.6824, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.420599222383167e-06, |
|
"loss": 5.623, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.412975527940841e-06, |
|
"loss": 5.793, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.405351833498513e-06, |
|
"loss": 5.6578, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.397728139056187e-06, |
|
"loss": 5.4418, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.390104444613861e-06, |
|
"loss": 5.6582, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.382480750171534e-06, |
|
"loss": 5.4758, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.374857055729208e-06, |
|
"loss": 5.3496, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.36723336128688e-06, |
|
"loss": 5.809, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.359609666844554e-06, |
|
"loss": 5.3367, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.351985972402226e-06, |
|
"loss": 5.3164, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.3443622779599e-06, |
|
"loss": 5.2313, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.336738583517574e-06, |
|
"loss": 5.3535, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.329114889075246e-06, |
|
"loss": 5.477, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.32149119463292e-06, |
|
"loss": 5.5727, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.313867500190593e-06, |
|
"loss": 5.1645, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.306243805748267e-06, |
|
"loss": 5.0938, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.298620111305939e-06, |
|
"loss": 5.1703, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.290996416863613e-06, |
|
"loss": 5.0945, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.283372722421287e-06, |
|
"loss": 5.1152, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.27574902797896e-06, |
|
"loss": 5.0703, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.268125333536633e-06, |
|
"loss": 5.0184, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.260501639094305e-06, |
|
"loss": 5.0059, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.25287794465198e-06, |
|
"loss": 4.9516, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.245254250209652e-06, |
|
"loss": 4.9363, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.237630555767326e-06, |
|
"loss": 5.1004, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.230006861325e-06, |
|
"loss": 4.8988, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.222383166882672e-06, |
|
"loss": 5.0332, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.214759472440346e-06, |
|
"loss": 5.0336, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.207135777998018e-06, |
|
"loss": 4.7898, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.199512083555692e-06, |
|
"loss": 4.8375, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.191888389113365e-06, |
|
"loss": 4.7547, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.184264694671038e-06, |
|
"loss": 4.7156, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.176641000228712e-06, |
|
"loss": 4.7141, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.169017305786385e-06, |
|
"loss": 4.7141, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.161393611344059e-06, |
|
"loss": 4.718, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.153769916901731e-06, |
|
"loss": 4.6184, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.146146222459405e-06, |
|
"loss": 4.7738, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.138522528017077e-06, |
|
"loss": 4.552, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.130898833574751e-06, |
|
"loss": 4.5945, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.123275139132425e-06, |
|
"loss": 4.5016, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.115651444690098e-06, |
|
"loss": 4.516, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.108027750247771e-06, |
|
"loss": 4.557, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.100404055805444e-06, |
|
"loss": 4.4656, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.092780361363118e-06, |
|
"loss": 4.4598, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.08515666692079e-06, |
|
"loss": 4.425, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.077532972478464e-06, |
|
"loss": 4.5328, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.069909278036138e-06, |
|
"loss": 4.4902, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.06228558359381e-06, |
|
"loss": 4.4094, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.054661889151484e-06, |
|
"loss": 4.4609, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.047038194709157e-06, |
|
"loss": 4.2906, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.03941450026683e-06, |
|
"loss": 4.2863, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.031790805824503e-06, |
|
"loss": 4.3141, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.024167111382177e-06, |
|
"loss": 4.2844, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.01654341693985e-06, |
|
"loss": 4.323, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.008919722497523e-06, |
|
"loss": 4.2539, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.001296028055197e-06, |
|
"loss": 4.2047, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.99367233361287e-06, |
|
"loss": 4.2406, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.986048639170543e-06, |
|
"loss": 4.1656, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.978424944728216e-06, |
|
"loss": 4.1324, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.97080125028589e-06, |
|
"loss": 4.2102, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.963177555843564e-06, |
|
"loss": 4.1785, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.955553861401236e-06, |
|
"loss": 4.1496, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.94793016695891e-06, |
|
"loss": 4.148, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.940306472516582e-06, |
|
"loss": 4.0875, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.932682778074256e-06, |
|
"loss": 4.0488, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.925059083631928e-06, |
|
"loss": 4.0926, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.917435389189602e-06, |
|
"loss": 4.0521, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.909811694747276e-06, |
|
"loss": 4.0557, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.902188000304949e-06, |
|
"loss": 4.057, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.894564305862623e-06, |
|
"loss": 3.9955, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.886940611420295e-06, |
|
"loss": 3.9541, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.879316916977969e-06, |
|
"loss": 4.0002, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.871693222535641e-06, |
|
"loss": 3.8512, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.864069528093315e-06, |
|
"loss": 3.9119, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 8.856445833650989e-06, |
|
"loss": 4.0641, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.848822139208661e-06, |
|
"loss": 3.8793, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.841198444766335e-06, |
|
"loss": 3.8955, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.833574750324008e-06, |
|
"loss": 3.8289, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.825951055881682e-06, |
|
"loss": 3.8039, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.818327361439354e-06, |
|
"loss": 3.7967, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.810703666997026e-06, |
|
"loss": 3.8004, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.803079972554702e-06, |
|
"loss": 3.7502, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.795456278112374e-06, |
|
"loss": 3.7746, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.787832583670048e-06, |
|
"loss": 3.74, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.78020888922772e-06, |
|
"loss": 3.7273, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.772585194785394e-06, |
|
"loss": 3.7084, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.764961500343067e-06, |
|
"loss": 3.6695, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.757337805900739e-06, |
|
"loss": 3.6781, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.749714111458415e-06, |
|
"loss": 3.6531, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.742090417016087e-06, |
|
"loss": 3.6125, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.73446672257376e-06, |
|
"loss": 3.6434, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.726843028131433e-06, |
|
"loss": 3.5969, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.719219333689107e-06, |
|
"loss": 3.6078, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.71159563924678e-06, |
|
"loss": 3.6463, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.703971944804452e-06, |
|
"loss": 3.5914, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.696348250362127e-06, |
|
"loss": 3.5229, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.6887245559198e-06, |
|
"loss": 3.5178, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.681100861477474e-06, |
|
"loss": 3.5465, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.673477167035146e-06, |
|
"loss": 3.4473, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.665853472592818e-06, |
|
"loss": 3.4814, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.658229778150492e-06, |
|
"loss": 3.459, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 8.650606083708166e-06, |
|
"loss": 3.4336, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.64298238926584e-06, |
|
"loss": 3.3758, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.635358694823512e-06, |
|
"loss": 3.3777, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.627735000381186e-06, |
|
"loss": 3.3633, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.620111305938859e-06, |
|
"loss": 3.3711, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.612487611496531e-06, |
|
"loss": 3.3563, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.604863917054205e-06, |
|
"loss": 3.3432, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.597240222611879e-06, |
|
"loss": 3.4258, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.589616528169553e-06, |
|
"loss": 3.3188, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.581992833727225e-06, |
|
"loss": 3.2518, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.574369139284897e-06, |
|
"loss": 3.283, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.566745444842571e-06, |
|
"loss": 3.2523, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.559121750400244e-06, |
|
"loss": 3.2496, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 8.551498055957918e-06, |
|
"loss": 3.2254, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.543874361515592e-06, |
|
"loss": 3.1781, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.536250667073266e-06, |
|
"loss": 3.1686, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.528626972630938e-06, |
|
"loss": 3.2557, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.52100327818861e-06, |
|
"loss": 3.1295, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.513379583746284e-06, |
|
"loss": 3.1766, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.505755889303956e-06, |
|
"loss": 3.1641, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.49813219486163e-06, |
|
"loss": 3.1301, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.490508500419304e-06, |
|
"loss": 3.1424, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.482884805976977e-06, |
|
"loss": 3.0842, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.47526111153465e-06, |
|
"loss": 3.0824, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.467637417092323e-06, |
|
"loss": 3.0498, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.460013722649997e-06, |
|
"loss": 3.1193, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 8.45239002820767e-06, |
|
"loss": 3.0367, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.444766333765343e-06, |
|
"loss": 3.051, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.437142639323017e-06, |
|
"loss": 3.0029, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.42951894488069e-06, |
|
"loss": 2.9674, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.421895250438363e-06, |
|
"loss": 2.9797, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.414271555996036e-06, |
|
"loss": 3.0271, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.40664786155371e-06, |
|
"loss": 2.9619, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.399024167111382e-06, |
|
"loss": 3.0301, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.391400472669056e-06, |
|
"loss": 2.9232, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.38377677822673e-06, |
|
"loss": 2.935, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.376153083784402e-06, |
|
"loss": 2.924, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.368529389342076e-06, |
|
"loss": 2.8533, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.360905694899748e-06, |
|
"loss": 2.859, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.353282000457422e-06, |
|
"loss": 2.8752, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.345658306015095e-06, |
|
"loss": 2.8092, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.338034611572769e-06, |
|
"loss": 2.9111, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.330410917130443e-06, |
|
"loss": 2.8344, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.322787222688115e-06, |
|
"loss": 2.8535, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.315163528245789e-06, |
|
"loss": 2.8996, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.307539833803461e-06, |
|
"loss": 2.8619, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.299916139361135e-06, |
|
"loss": 2.8908, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.292292444918807e-06, |
|
"loss": 2.8221, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.284668750476481e-06, |
|
"loss": 2.8062, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.277045056034155e-06, |
|
"loss": 2.7785, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.269421361591828e-06, |
|
"loss": 2.8027, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.261797667149502e-06, |
|
"loss": 2.7518, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.254173972707174e-06, |
|
"loss": 2.7961, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.246550278264848e-06, |
|
"loss": 2.7605, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.23892658382252e-06, |
|
"loss": 2.7055, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.231302889380194e-06, |
|
"loss": 2.6449, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.223679194937868e-06, |
|
"loss": 2.6789, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.21605550049554e-06, |
|
"loss": 2.6936, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.208431806053214e-06, |
|
"loss": 2.6859, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.200808111610887e-06, |
|
"loss": 2.7135, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.19318441716856e-06, |
|
"loss": 2.634, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.185560722726233e-06, |
|
"loss": 2.643, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.177937028283907e-06, |
|
"loss": 2.6611, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.170313333841581e-06, |
|
"loss": 2.6721, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.162689639399253e-06, |
|
"loss": 2.6314, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.155065944956927e-06, |
|
"loss": 2.666, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.1474422505146e-06, |
|
"loss": 2.6109, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.139818556072273e-06, |
|
"loss": 2.6479, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.132194861629946e-06, |
|
"loss": 2.6289, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.12457116718762e-06, |
|
"loss": 2.5789, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.116947472745294e-06, |
|
"loss": 2.5514, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.109323778302966e-06, |
|
"loss": 2.5531, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.10170008386064e-06, |
|
"loss": 2.5922, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.094076389418312e-06, |
|
"loss": 2.5785, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.086452694975986e-06, |
|
"loss": 2.5496, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.078829000533659e-06, |
|
"loss": 2.5227, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.071205306091332e-06, |
|
"loss": 2.6195, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.063581611649006e-06, |
|
"loss": 2.5441, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.055957917206679e-06, |
|
"loss": 2.5348, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.048334222764353e-06, |
|
"loss": 2.5326, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.040710528322025e-06, |
|
"loss": 2.5195, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.033086833879699e-06, |
|
"loss": 2.5826, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.025463139437371e-06, |
|
"loss": 2.5199, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.017839444995045e-06, |
|
"loss": 2.4783, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.01021575055272e-06, |
|
"loss": 2.5186, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.002592056110392e-06, |
|
"loss": 2.5002, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.994968361668065e-06, |
|
"loss": 2.5014, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.987344667225738e-06, |
|
"loss": 2.4631, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.979720972783412e-06, |
|
"loss": 2.4621, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.972097278341084e-06, |
|
"loss": 2.4621, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.964473583898758e-06, |
|
"loss": 2.524, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.956849889456432e-06, |
|
"loss": 2.4607, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.949226195014104e-06, |
|
"loss": 2.4621, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.941602500571778e-06, |
|
"loss": 2.4975, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.93397880612945e-06, |
|
"loss": 2.4941, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.926355111687125e-06, |
|
"loss": 2.3941, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.918731417244797e-06, |
|
"loss": 2.457, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.91110772280247e-06, |
|
"loss": 2.4133, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.903484028360145e-06, |
|
"loss": 2.41, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.895860333917817e-06, |
|
"loss": 2.4154, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.888236639475491e-06, |
|
"loss": 2.3951, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.880612945033163e-06, |
|
"loss": 2.3861, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.872989250590837e-06, |
|
"loss": 2.3781, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.86536555614851e-06, |
|
"loss": 2.3859, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.857741861706184e-06, |
|
"loss": 2.3857, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.850118167263858e-06, |
|
"loss": 2.367, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.84249447282153e-06, |
|
"loss": 2.3711, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.834870778379204e-06, |
|
"loss": 2.342, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.827247083936876e-06, |
|
"loss": 2.3771, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.81962338949455e-06, |
|
"loss": 2.3484, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.811999695052222e-06, |
|
"loss": 2.3561, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.804376000609896e-06, |
|
"loss": 2.3777, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.79675230616757e-06, |
|
"loss": 2.3965, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.789128611725243e-06, |
|
"loss": 2.3568, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.781504917282917e-06, |
|
"loss": 2.3586, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.773881222840589e-06, |
|
"loss": 2.3389, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.766257528398263e-06, |
|
"loss": 2.3141, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.758633833955935e-06, |
|
"loss": 2.3244, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 7.751010139513609e-06, |
|
"loss": 2.323, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.743386445071283e-06, |
|
"loss": 2.3035, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.735762750628955e-06, |
|
"loss": 2.2945, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.72813905618663e-06, |
|
"loss": 2.2807, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.720515361744302e-06, |
|
"loss": 2.3168, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.712891667301976e-06, |
|
"loss": 2.291, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.705267972859648e-06, |
|
"loss": 2.2896, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.697644278417322e-06, |
|
"loss": 2.285, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.690020583974996e-06, |
|
"loss": 2.2836, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.682396889532668e-06, |
|
"loss": 2.2945, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.674773195090342e-06, |
|
"loss": 2.299, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.667149500648014e-06, |
|
"loss": 2.3023, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.659525806205688e-06, |
|
"loss": 2.2889, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.65190211176336e-06, |
|
"loss": 2.265, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.644278417321035e-06, |
|
"loss": 2.276, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.636654722878709e-06, |
|
"loss": 2.2551, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.629031028436381e-06, |
|
"loss": 2.2389, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.621407333994054e-06, |
|
"loss": 2.252, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.613783639551727e-06, |
|
"loss": 2.2695, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.6061599451094e-06, |
|
"loss": 2.2432, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.598536250667073e-06, |
|
"loss": 2.2299, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.590912556224747e-06, |
|
"loss": 2.2291, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.5832888617824205e-06, |
|
"loss": 2.2617, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.575665167340094e-06, |
|
"loss": 2.268, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.568041472897767e-06, |
|
"loss": 2.2533, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.56041777845544e-06, |
|
"loss": 2.2246, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.552794084013113e-06, |
|
"loss": 2.2418, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.545170389570786e-06, |
|
"loss": 2.2529, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.53754669512846e-06, |
|
"loss": 2.2363, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.529923000686133e-06, |
|
"loss": 2.2176, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.522299306243806e-06, |
|
"loss": 2.2248, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.5146756118014795e-06, |
|
"loss": 2.1896, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.507051917359153e-06, |
|
"loss": 2.1836, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.499428222916826e-06, |
|
"loss": 2.2072, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.491804528474499e-06, |
|
"loss": 2.182, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.484180834032173e-06, |
|
"loss": 2.2078, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.476557139589846e-06, |
|
"loss": 2.202, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.468933445147519e-06, |
|
"loss": 2.2119, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.461309750705192e-06, |
|
"loss": 2.1961, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.453686056262865e-06, |
|
"loss": 2.1996, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.4460623618205385e-06, |
|
"loss": 2.1904, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.438438667378212e-06, |
|
"loss": 2.1912, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.430814972935886e-06, |
|
"loss": 2.2055, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.423191278493559e-06, |
|
"loss": 2.176, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.415567584051232e-06, |
|
"loss": 2.1834, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.407943889608905e-06, |
|
"loss": 2.1508, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.400320195166578e-06, |
|
"loss": 2.1447, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.392696500724251e-06, |
|
"loss": 2.1648, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.385072806281924e-06, |
|
"loss": 2.1432, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.377449111839598e-06, |
|
"loss": 2.1818, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.3698254173972715e-06, |
|
"loss": 2.1557, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.362201722954945e-06, |
|
"loss": 2.1549, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.354578028512618e-06, |
|
"loss": 2.1576, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.346954334070291e-06, |
|
"loss": 2.1533, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.339330639627964e-06, |
|
"loss": 2.1576, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.331706945185637e-06, |
|
"loss": 2.1648, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.324083250743311e-06, |
|
"loss": 2.1676, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.316459556300984e-06, |
|
"loss": 2.1617, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.308835861858657e-06, |
|
"loss": 2.1432, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.3012121674163305e-06, |
|
"loss": 2.1645, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.293588472974004e-06, |
|
"loss": 2.174, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.285964778531677e-06, |
|
"loss": 2.1467, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.27834108408935e-06, |
|
"loss": 2.1385, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.270717389647024e-06, |
|
"loss": 2.1344, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.263093695204697e-06, |
|
"loss": 2.1182, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.25547000076237e-06, |
|
"loss": 2.1393, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.247846306320043e-06, |
|
"loss": 2.1361, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.240222611877716e-06, |
|
"loss": 2.1328, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.2325989174353895e-06, |
|
"loss": 2.1322, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.224975222993063e-06, |
|
"loss": 2.1455, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.217351528550737e-06, |
|
"loss": 2.1078, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.20972783410841e-06, |
|
"loss": 2.1408, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.202104139666083e-06, |
|
"loss": 2.1225, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.194480445223756e-06, |
|
"loss": 2.1324, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.186856750781429e-06, |
|
"loss": 2.107, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.179233056339102e-06, |
|
"loss": 2.1271, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.1716093618967754e-06, |
|
"loss": 2.1141, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.163985667454449e-06, |
|
"loss": 2.1141, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.1563619730121225e-06, |
|
"loss": 2.0951, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.148738278569796e-06, |
|
"loss": 2.0971, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.141114584127469e-06, |
|
"loss": 2.1066, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.133490889685142e-06, |
|
"loss": 2.0941, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.125867195242815e-06, |
|
"loss": 2.1232, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.118243500800488e-06, |
|
"loss": 2.1021, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.110619806358162e-06, |
|
"loss": 2.0938, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.102996111915835e-06, |
|
"loss": 2.1164, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.0953724174735084e-06, |
|
"loss": 2.0836, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.0877487230311816e-06, |
|
"loss": 2.0916, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.080125028588855e-06, |
|
"loss": 2.1068, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.072501334146528e-06, |
|
"loss": 2.0977, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.064877639704201e-06, |
|
"loss": 2.0994, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.057253945261875e-06, |
|
"loss": 2.0932, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.049630250819548e-06, |
|
"loss": 2.099, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.042006556377221e-06, |
|
"loss": 2.1025, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.034382861934894e-06, |
|
"loss": 2.0963, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.0267591674925675e-06, |
|
"loss": 2.0982, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.019135473050241e-06, |
|
"loss": 2.0689, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.011511778607914e-06, |
|
"loss": 2.0893, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.003888084165588e-06, |
|
"loss": 2.0986, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.996264389723261e-06, |
|
"loss": 2.0852, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.988640695280934e-06, |
|
"loss": 2.1145, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.981017000838607e-06, |
|
"loss": 2.0885, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.97339330639628e-06, |
|
"loss": 2.0957, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.965769611953953e-06, |
|
"loss": 2.0619, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.9581459175116265e-06, |
|
"loss": 2.074, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.9505222230693005e-06, |
|
"loss": 2.0637, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.942898528626974e-06, |
|
"loss": 2.0822, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.935274834184647e-06, |
|
"loss": 2.0922, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.92765113974232e-06, |
|
"loss": 2.0863, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.920027445299993e-06, |
|
"loss": 2.0684, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.912403750857666e-06, |
|
"loss": 2.0896, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.904780056415339e-06, |
|
"loss": 2.0615, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.897156361973013e-06, |
|
"loss": 2.0467, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.889532667530686e-06, |
|
"loss": 2.0713, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.8819089730883595e-06, |
|
"loss": 2.0795, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.874285278646033e-06, |
|
"loss": 2.0762, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.866661584203706e-06, |
|
"loss": 2.0539, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.859037889761379e-06, |
|
"loss": 2.066, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.851414195319053e-06, |
|
"loss": 2.082, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.843790500876726e-06, |
|
"loss": 2.076, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.836166806434399e-06, |
|
"loss": 2.084, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.828543111992072e-06, |
|
"loss": 2.0691, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.820919417549745e-06, |
|
"loss": 2.0727, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.8132957231074185e-06, |
|
"loss": 2.0645, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.805672028665092e-06, |
|
"loss": 2.0625, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.798048334222766e-06, |
|
"loss": 2.0729, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.790424639780439e-06, |
|
"loss": 2.0545, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.782800945338112e-06, |
|
"loss": 2.0551, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.775177250895785e-06, |
|
"loss": 2.0553, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.767553556453458e-06, |
|
"loss": 2.0568, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.759929862011131e-06, |
|
"loss": 2.0494, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.752306167568804e-06, |
|
"loss": 2.052, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.744682473126478e-06, |
|
"loss": 2.0559, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.7370587786841515e-06, |
|
"loss": 2.0336, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.729435084241825e-06, |
|
"loss": 2.0472, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.721811389799498e-06, |
|
"loss": 2.0562, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.714187695357171e-06, |
|
"loss": 2.0545, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.706564000914844e-06, |
|
"loss": 2.0535, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.698940306472516e-06, |
|
"loss": 2.0263, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.691316612030191e-06, |
|
"loss": 2.058, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.683692917587864e-06, |
|
"loss": 2.0533, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.676069223145537e-06, |
|
"loss": 2.0605, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.6684455287032105e-06, |
|
"loss": 2.0426, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.660821834260884e-06, |
|
"loss": 2.05, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.653198139818556e-06, |
|
"loss": 2.0334, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.645574445376229e-06, |
|
"loss": 2.0363, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.637950750933904e-06, |
|
"loss": 2.0394, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.630327056491577e-06, |
|
"loss": 2.0482, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.62270336204925e-06, |
|
"loss": 2.0266, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.615079667606923e-06, |
|
"loss": 2.0434, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.6074559731645956e-06, |
|
"loss": 2.0577, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.599832278722269e-06, |
|
"loss": 2.0351, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.592208584279942e-06, |
|
"loss": 2.0363, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.584584889837617e-06, |
|
"loss": 2.0621, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.57696119539529e-06, |
|
"loss": 2.0369, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.569337500952963e-06, |
|
"loss": 2.0353, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.561713806510635e-06, |
|
"loss": 2.0434, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.554090112068308e-06, |
|
"loss": 2.0469, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.5464664176259815e-06, |
|
"loss": 2.0525, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.538842723183655e-06, |
|
"loss": 2.048, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.531219028741329e-06, |
|
"loss": 2.041, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.5235953342990025e-06, |
|
"loss": 2.0564, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.515971639856676e-06, |
|
"loss": 2.0571, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.508347945414348e-06, |
|
"loss": 2.0462, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.500724250972021e-06, |
|
"loss": 2.0322, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.493100556529694e-06, |
|
"loss": 2.0574, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.485476862087367e-06, |
|
"loss": 2.0396, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.477853167645042e-06, |
|
"loss": 2.0214, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.470229473202715e-06, |
|
"loss": 2.0372, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.462605778760388e-06, |
|
"loss": 2.0311, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.454982084318061e-06, |
|
"loss": 2.049, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.447358389875734e-06, |
|
"loss": 2.0438, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.439734695433407e-06, |
|
"loss": 2.0396, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.43211100099108e-06, |
|
"loss": 2.0375, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.424487306548755e-06, |
|
"loss": 2.0537, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.416863612106427e-06, |
|
"loss": 2.0272, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.4092399176641e-06, |
|
"loss": 2.0449, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.4016162232217735e-06, |
|
"loss": 2.0355, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.393992528779447e-06, |
|
"loss": 2.0334, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.38636883433712e-06, |
|
"loss": 2.0424, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.378745139894793e-06, |
|
"loss": 2.0303, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.371121445452467e-06, |
|
"loss": 2.0482, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.36349775101014e-06, |
|
"loss": 2.0299, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.355874056567813e-06, |
|
"loss": 2.0357, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.348250362125486e-06, |
|
"loss": 2.032, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.340626667683159e-06, |
|
"loss": 2.0398, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.3330029732408325e-06, |
|
"loss": 2.027, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.325379278798506e-06, |
|
"loss": 2.0309, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.31775558435618e-06, |
|
"loss": 2.024, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.310131889913853e-06, |
|
"loss": 2.0318, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.302508195471526e-06, |
|
"loss": 2.0342, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.294884501029199e-06, |
|
"loss": 2.0428, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.287260806586872e-06, |
|
"loss": 2.0347, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.279637112144545e-06, |
|
"loss": 2.0327, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.272013417702218e-06, |
|
"loss": 2.0364, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.264389723259892e-06, |
|
"loss": 2.0394, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.2567660288175655e-06, |
|
"loss": 2.0394, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.249142334375239e-06, |
|
"loss": 2.0299, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.241518639932912e-06, |
|
"loss": 2.0387, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.233894945490585e-06, |
|
"loss": 2.0266, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.226271251048258e-06, |
|
"loss": 2.0578, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.218647556605931e-06, |
|
"loss": 2.0301, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.211023862163605e-06, |
|
"loss": 2.0484, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.203400167721278e-06, |
|
"loss": 2.0398, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.195776473278951e-06, |
|
"loss": 2.0304, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.1881527788366245e-06, |
|
"loss": 2.0255, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.180529084394298e-06, |
|
"loss": 2.0346, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.172905389951971e-06, |
|
"loss": 2.0393, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.165281695509644e-06, |
|
"loss": 2.0346, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.157658001067318e-06, |
|
"loss": 2.0217, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.150034306624991e-06, |
|
"loss": 2.0355, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.142410612182664e-06, |
|
"loss": 2.035, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.134786917740337e-06, |
|
"loss": 2.0451, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.12716322329801e-06, |
|
"loss": 2.0346, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.1195395288556835e-06, |
|
"loss": 2.0338, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.111915834413357e-06, |
|
"loss": 2.0347, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.104292139971031e-06, |
|
"loss": 2.0337, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.096668445528704e-06, |
|
"loss": 2.0334, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.089044751086377e-06, |
|
"loss": 2.0412, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.08142105664405e-06, |
|
"loss": 2.0336, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.073797362201723e-06, |
|
"loss": 2.0422, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.066173667759396e-06, |
|
"loss": 2.0469, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.0585499733170694e-06, |
|
"loss": 2.0363, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.050926278874743e-06, |
|
"loss": 2.0424, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.0433025844324165e-06, |
|
"loss": 2.03, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.03567888999009e-06, |
|
"loss": 2.0365, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.028055195547763e-06, |
|
"loss": 2.0434, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.020431501105436e-06, |
|
"loss": 2.0355, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.012807806663109e-06, |
|
"loss": 2.0566, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.005184112220783e-06, |
|
"loss": 2.0328, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.997560417778456e-06, |
|
"loss": 2.0492, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.989936723336129e-06, |
|
"loss": 2.0398, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.9823130288938024e-06, |
|
"loss": 2.0492, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.9746893344514756e-06, |
|
"loss": 2.0533, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.967065640009149e-06, |
|
"loss": 2.041, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.959441945566822e-06, |
|
"loss": 2.05, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.951818251124496e-06, |
|
"loss": 2.048, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.944194556682169e-06, |
|
"loss": 2.0496, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.936570862239842e-06, |
|
"loss": 2.0385, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.928947167797515e-06, |
|
"loss": 2.0449, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.921323473355188e-06, |
|
"loss": 2.0557, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.9136997789128614e-06, |
|
"loss": 2.0461, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.906076084470535e-06, |
|
"loss": 2.0613, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.8984523900282086e-06, |
|
"loss": 2.0502, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.890828695585882e-06, |
|
"loss": 2.0527, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.883205001143555e-06, |
|
"loss": 2.0432, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.875581306701228e-06, |
|
"loss": 2.0625, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.867957612258901e-06, |
|
"loss": 2.0451, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.860333917816574e-06, |
|
"loss": 2.0502, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5.852710223374247e-06, |
|
"loss": 2.0471, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.845086528931921e-06, |
|
"loss": 2.0594, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.8374628344895944e-06, |
|
"loss": 2.0691, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.8298391400472676e-06, |
|
"loss": 2.0635, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.822215445604941e-06, |
|
"loss": 2.0578, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.814591751162614e-06, |
|
"loss": 2.0492, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.806968056720287e-06, |
|
"loss": 2.0477, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.79934436227796e-06, |
|
"loss": 2.0588, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.791720667835634e-06, |
|
"loss": 2.0828, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.784096973393307e-06, |
|
"loss": 2.0654, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.77647327895098e-06, |
|
"loss": 2.0557, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.7688495845086535e-06, |
|
"loss": 2.0631, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.761225890066327e-06, |
|
"loss": 2.0498, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 5.753602195624e-06, |
|
"loss": 2.0621, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.745978501181673e-06, |
|
"loss": 2.0678, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.738354806739347e-06, |
|
"loss": 2.0576, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.73073111229702e-06, |
|
"loss": 2.0664, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.723107417854693e-06, |
|
"loss": 2.0656, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.715483723412366e-06, |
|
"loss": 2.0602, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.707860028970039e-06, |
|
"loss": 2.0629, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.7002363345277125e-06, |
|
"loss": 2.0559, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.692612640085386e-06, |
|
"loss": 2.0746, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.68498894564306e-06, |
|
"loss": 2.0811, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.677365251200733e-06, |
|
"loss": 2.066, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.669741556758406e-06, |
|
"loss": 2.067, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.662117862316079e-06, |
|
"loss": 2.0666, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.654494167873752e-06, |
|
"loss": 2.0752, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.646870473431425e-06, |
|
"loss": 2.0674, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.639246778989098e-06, |
|
"loss": 2.0709, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.631623084546772e-06, |
|
"loss": 2.0773, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.6239993901044455e-06, |
|
"loss": 2.0711, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.616375695662119e-06, |
|
"loss": 2.0736, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.608752001219792e-06, |
|
"loss": 2.0818, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.601128306777465e-06, |
|
"loss": 2.0799, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.593504612335138e-06, |
|
"loss": 2.0695, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.585880917892811e-06, |
|
"loss": 2.0869, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.578257223450485e-06, |
|
"loss": 2.0715, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.570633529008158e-06, |
|
"loss": 2.0811, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.563009834565831e-06, |
|
"loss": 2.0723, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.5553861401235045e-06, |
|
"loss": 2.076, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.547762445681178e-06, |
|
"loss": 2.0818, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.540138751238851e-06, |
|
"loss": 2.0766, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.532515056796524e-06, |
|
"loss": 2.0785, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.524891362354198e-06, |
|
"loss": 2.0932, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.517267667911871e-06, |
|
"loss": 2.0781, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.509643973469544e-06, |
|
"loss": 2.0846, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.502020279027217e-06, |
|
"loss": 2.0805, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.49439658458489e-06, |
|
"loss": 2.0871, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.4867728901425635e-06, |
|
"loss": 2.076, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.479149195700237e-06, |
|
"loss": 2.0955, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.471525501257911e-06, |
|
"loss": 2.0824, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.463901806815584e-06, |
|
"loss": 2.0908, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.456278112373257e-06, |
|
"loss": 2.0912, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.44865441793093e-06, |
|
"loss": 2.0803, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.441030723488603e-06, |
|
"loss": 2.0879, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.433407029046276e-06, |
|
"loss": 2.0838, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.425783334603949e-06, |
|
"loss": 2.0855, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.418159640161623e-06, |
|
"loss": 2.0756, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.4105359457192965e-06, |
|
"loss": 2.0959, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.40291225127697e-06, |
|
"loss": 2.1043, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.395288556834643e-06, |
|
"loss": 2.0967, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.387664862392316e-06, |
|
"loss": 2.0955, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.380041167949989e-06, |
|
"loss": 2.109, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.372417473507661e-06, |
|
"loss": 2.0945, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.364793779065336e-06, |
|
"loss": 2.0941, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.357170084623009e-06, |
|
"loss": 2.0947, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.349546390180682e-06, |
|
"loss": 2.1063, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.3419226957383556e-06, |
|
"loss": 2.0994, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.334299001296029e-06, |
|
"loss": 2.0977, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.326675306853701e-06, |
|
"loss": 2.1, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.319051612411374e-06, |
|
"loss": 2.1055, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.311427917969049e-06, |
|
"loss": 2.1027, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.303804223526722e-06, |
|
"loss": 2.0982, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.296180529084395e-06, |
|
"loss": 2.1033, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.288556834642068e-06, |
|
"loss": 2.1006, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.280933140199741e-06, |
|
"loss": 2.1041, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.273309445757414e-06, |
|
"loss": 2.1023, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.265685751315087e-06, |
|
"loss": 2.101, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.258062056872762e-06, |
|
"loss": 2.1107, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.250438362430435e-06, |
|
"loss": 2.1088, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.242814667988108e-06, |
|
"loss": 2.1156, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.235190973545781e-06, |
|
"loss": 2.1107, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.227567279103453e-06, |
|
"loss": 2.1043, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.2199435846611265e-06, |
|
"loss": 2.1189, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.2123198902188e-06, |
|
"loss": 2.1127, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.2046961957764744e-06, |
|
"loss": 2.1166, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.1970725013341476e-06, |
|
"loss": 2.1162, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.189448806891821e-06, |
|
"loss": 2.1133, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.181825112449493e-06, |
|
"loss": 2.1123, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.174201418007166e-06, |
|
"loss": 2.1129, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.166577723564839e-06, |
|
"loss": 2.1121, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.158954029122512e-06, |
|
"loss": 2.1117, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.151330334680187e-06, |
|
"loss": 2.1299, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.14370664023786e-06, |
|
"loss": 2.1189, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.136082945795533e-06, |
|
"loss": 2.115, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.128459251353206e-06, |
|
"loss": 2.1154, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.120835556910879e-06, |
|
"loss": 2.1184, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.113211862468552e-06, |
|
"loss": 2.1283, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.105588168026227e-06, |
|
"loss": 2.1189, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.0979644735839e-06, |
|
"loss": 2.1111, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.090340779141572e-06, |
|
"loss": 2.1221, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.082717084699245e-06, |
|
"loss": 2.1205, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.0750933902569185e-06, |
|
"loss": 2.1203, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.067469695814592e-06, |
|
"loss": 2.1207, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.059846001372265e-06, |
|
"loss": 2.1307, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.05222230692994e-06, |
|
"loss": 2.1279, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.044598612487612e-06, |
|
"loss": 2.1328, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.036974918045285e-06, |
|
"loss": 2.1258, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.029351223602958e-06, |
|
"loss": 2.1244, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.021727529160631e-06, |
|
"loss": 2.1348, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.014103834718304e-06, |
|
"loss": 2.127, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.0064801402759775e-06, |
|
"loss": 2.1283, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.9988564458336515e-06, |
|
"loss": 2.1297, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.991232751391325e-06, |
|
"loss": 2.1398, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.983609056948998e-06, |
|
"loss": 2.1322, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.975985362506671e-06, |
|
"loss": 2.1365, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.968361668064344e-06, |
|
"loss": 2.1221, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.960737973622017e-06, |
|
"loss": 2.1385, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.953114279179691e-06, |
|
"loss": 2.1387, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.945490584737364e-06, |
|
"loss": 2.1324, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.937866890295037e-06, |
|
"loss": 2.1234, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9302431958527105e-06, |
|
"loss": 2.1393, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.922619501410384e-06, |
|
"loss": 2.1354, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.914995806968057e-06, |
|
"loss": 2.1361, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.907372112525731e-06, |
|
"loss": 2.1305, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.899748418083404e-06, |
|
"loss": 2.1346, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.892124723641077e-06, |
|
"loss": 2.1426, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.88450102919875e-06, |
|
"loss": 2.1357, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.876877334756423e-06, |
|
"loss": 2.1373, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.869253640314096e-06, |
|
"loss": 2.1367, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.8616299458717695e-06, |
|
"loss": 2.1273, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.8540062514294435e-06, |
|
"loss": 2.1436, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.846382556987117e-06, |
|
"loss": 2.1396, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.83875886254479e-06, |
|
"loss": 2.1492, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.831135168102463e-06, |
|
"loss": 2.1594, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.823511473660136e-06, |
|
"loss": 2.1484, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.815887779217809e-06, |
|
"loss": 2.1502, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.808264084775482e-06, |
|
"loss": 2.1383, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.800640390333156e-06, |
|
"loss": 2.1465, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.793016695890829e-06, |
|
"loss": 2.1533, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.7853930014485025e-06, |
|
"loss": 2.1541, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.777769307006176e-06, |
|
"loss": 2.149, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.770145612563849e-06, |
|
"loss": 2.1529, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.762521918121522e-06, |
|
"loss": 2.1562, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.754898223679195e-06, |
|
"loss": 2.1553, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.747274529236869e-06, |
|
"loss": 2.1527, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.739650834794542e-06, |
|
"loss": 2.1514, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.732027140352215e-06, |
|
"loss": 2.1623, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.7244034459098884e-06, |
|
"loss": 2.1709, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.7167797514675616e-06, |
|
"loss": 2.1619, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.709156057025235e-06, |
|
"loss": 2.1502, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.701532362582908e-06, |
|
"loss": 2.1461, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.693908668140582e-06, |
|
"loss": 2.1596, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.686284973698255e-06, |
|
"loss": 2.1648, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.678661279255928e-06, |
|
"loss": 2.1654, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.671037584813601e-06, |
|
"loss": 2.1627, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.663413890371274e-06, |
|
"loss": 2.1688, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.6557901959289475e-06, |
|
"loss": 2.1623, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.648166501486621e-06, |
|
"loss": 2.1662, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.6405428070442946e-06, |
|
"loss": 2.1654, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.632919112601968e-06, |
|
"loss": 2.1582, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.625295418159641e-06, |
|
"loss": 2.1562, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.617671723717314e-06, |
|
"loss": 2.1633, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.610048029274987e-06, |
|
"loss": 2.1588, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.60242433483266e-06, |
|
"loss": 2.1645, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.594800640390333e-06, |
|
"loss": 2.1621, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.587176945948007e-06, |
|
"loss": 2.1658, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5795532515056805e-06, |
|
"loss": 2.1678, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.571929557063354e-06, |
|
"loss": 2.1645, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.564305862621026e-06, |
|
"loss": 2.1699, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5566821681787e-06, |
|
"loss": 2.1736, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.549058473736373e-06, |
|
"loss": 2.167, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.541434779294046e-06, |
|
"loss": 2.177, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.53381108485172e-06, |
|
"loss": 2.1752, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.526187390409393e-06, |
|
"loss": 2.1721, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.5185636959670655e-06, |
|
"loss": 2.1844, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.510940001524739e-06, |
|
"loss": 2.1762, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.503316307082413e-06, |
|
"loss": 2.182, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.495692612640086e-06, |
|
"loss": 2.1824, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.488068918197759e-06, |
|
"loss": 2.175, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.480445223755433e-06, |
|
"loss": 2.1848, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.472821529313105e-06, |
|
"loss": 2.1758, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.465197834870778e-06, |
|
"loss": 2.1885, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.457574140428452e-06, |
|
"loss": 2.1809, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.449950445986125e-06, |
|
"loss": 2.1908, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.4423267515437985e-06, |
|
"loss": 2.183, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.434703057101472e-06, |
|
"loss": 2.1826, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.427079362659145e-06, |
|
"loss": 2.1908, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.419455668216818e-06, |
|
"loss": 2.1854, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.411831973774491e-06, |
|
"loss": 2.1916, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.404208279332165e-06, |
|
"loss": 2.1836, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.396584584889838e-06, |
|
"loss": 2.1855, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.388960890447511e-06, |
|
"loss": 2.1895, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.381337196005184e-06, |
|
"loss": 2.1855, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.3737135015628575e-06, |
|
"loss": 2.1826, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.366089807120531e-06, |
|
"loss": 2.1926, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.358466112678204e-06, |
|
"loss": 2.1895, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.350842418235878e-06, |
|
"loss": 2.1973, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.343218723793551e-06, |
|
"loss": 2.1936, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.335595029351224e-06, |
|
"loss": 2.1982, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.327971334908897e-06, |
|
"loss": 2.1973, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.32034764046657e-06, |
|
"loss": 2.1963, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.312723946024243e-06, |
|
"loss": 2.193, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.3051002515819165e-06, |
|
"loss": 2.2084, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.2974765571395905e-06, |
|
"loss": 2.2049, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.289852862697264e-06, |
|
"loss": 2.1889, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.282229168254937e-06, |
|
"loss": 2.1986, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.27460547381261e-06, |
|
"loss": 2.1945, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.266981779370283e-06, |
|
"loss": 2.1998, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.259358084927956e-06, |
|
"loss": 2.1986, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.251734390485629e-06, |
|
"loss": 2.201, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.244110696043303e-06, |
|
"loss": 2.2078, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.236487001600976e-06, |
|
"loss": 2.2041, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.2288633071586495e-06, |
|
"loss": 2.2051, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.221239612716323e-06, |
|
"loss": 2.207, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.213615918273996e-06, |
|
"loss": 2.2041, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.205992223831669e-06, |
|
"loss": 2.2074, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.198368529389342e-06, |
|
"loss": 2.2004, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.190744834947016e-06, |
|
"loss": 2.2121, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.183121140504689e-06, |
|
"loss": 2.2131, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.175497446062362e-06, |
|
"loss": 2.2131, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.1678737516200354e-06, |
|
"loss": 2.2043, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.1602500571777086e-06, |
|
"loss": 2.2057, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.152626362735382e-06, |
|
"loss": 2.2066, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.145002668293055e-06, |
|
"loss": 2.2213, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.137378973850729e-06, |
|
"loss": 2.2234, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.129755279408402e-06, |
|
"loss": 2.2174, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.122131584966075e-06, |
|
"loss": 2.2186, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.114507890523748e-06, |
|
"loss": 2.2115, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.106884196081421e-06, |
|
"loss": 2.2293, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0992605016390945e-06, |
|
"loss": 2.2129, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.091636807196768e-06, |
|
"loss": 2.2178, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0840131127544416e-06, |
|
"loss": 2.2182, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.076389418312115e-06, |
|
"loss": 2.227, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.068765723869788e-06, |
|
"loss": 2.2273, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.061142029427461e-06, |
|
"loss": 2.2225, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.053518334985134e-06, |
|
"loss": 2.2307, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.045894640542807e-06, |
|
"loss": 2.2209, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.03827094610048e-06, |
|
"loss": 2.2314, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.030647251658154e-06, |
|
"loss": 2.2258, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0230235572158275e-06, |
|
"loss": 2.233, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.015399862773501e-06, |
|
"loss": 2.2275, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.007776168331174e-06, |
|
"loss": 2.2271, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.000152473888847e-06, |
|
"loss": 2.2324, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.99252877944652e-06, |
|
"loss": 2.2418, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.984905085004193e-06, |
|
"loss": 2.2348, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.977281390561867e-06, |
|
"loss": 2.2416, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.96965769611954e-06, |
|
"loss": 2.2408, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.962034001677213e-06, |
|
"loss": 2.2395, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9544103072348865e-06, |
|
"loss": 2.2328, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.94678661279256e-06, |
|
"loss": 2.2471, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.939162918350233e-06, |
|
"loss": 2.2352, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.931539223907906e-06, |
|
"loss": 2.2406, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.92391552946558e-06, |
|
"loss": 2.2322, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.916291835023253e-06, |
|
"loss": 2.2447, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.908668140580926e-06, |
|
"loss": 2.2412, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.901044446138599e-06, |
|
"loss": 2.2412, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.893420751696272e-06, |
|
"loss": 2.2361, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.8857970572539455e-06, |
|
"loss": 2.2393, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.878173362811619e-06, |
|
"loss": 2.2414, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.870549668369293e-06, |
|
"loss": 2.2477, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.862925973926966e-06, |
|
"loss": 2.2484, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.855302279484638e-06, |
|
"loss": 2.2463, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.847678585042312e-06, |
|
"loss": 2.2463, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.840054890599985e-06, |
|
"loss": 2.2387, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.832431196157658e-06, |
|
"loss": 2.2482, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.824807501715331e-06, |
|
"loss": 2.249, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.817183807273005e-06, |
|
"loss": 2.251, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.809560112830678e-06, |
|
"loss": 2.2467, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.801936418388351e-06, |
|
"loss": 2.2529, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.7943127239460248e-06, |
|
"loss": 2.25, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.786689029503698e-06, |
|
"loss": 2.2533, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.779065335061371e-06, |
|
"loss": 2.2539, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.771441640619044e-06, |
|
"loss": 2.2549, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.7638179461767177e-06, |
|
"loss": 2.2439, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.756194251734391e-06, |
|
"loss": 2.2607, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.748570557292064e-06, |
|
"loss": 2.2596, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7409468628497375e-06, |
|
"loss": 2.2547, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7333231684074107e-06, |
|
"loss": 2.2695, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7256994739650838e-06, |
|
"loss": 2.2631, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.718075779522757e-06, |
|
"loss": 2.2543, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7104520850804305e-06, |
|
"loss": 2.2664, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7028283906381036e-06, |
|
"loss": 2.2568, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6952046961957767e-06, |
|
"loss": 2.2645, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6875810017534503e-06, |
|
"loss": 2.2658, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6799573073111234e-06, |
|
"loss": 2.2654, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6723336128687965e-06, |
|
"loss": 2.2682, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6647099184264697e-06, |
|
"loss": 2.2715, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.6570862239841432e-06, |
|
"loss": 2.2707, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.6494625295418164e-06, |
|
"loss": 2.2648, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.6418388350994895e-06, |
|
"loss": 2.2693, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.634215140657163e-06, |
|
"loss": 2.2752, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.626591446214836e-06, |
|
"loss": 2.2711, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.6189677517725093e-06, |
|
"loss": 2.2746, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.6113440573301824e-06, |
|
"loss": 2.2654, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.603720362887856e-06, |
|
"loss": 2.2721, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.596096668445529e-06, |
|
"loss": 2.2746, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.5884729740032022e-06, |
|
"loss": 2.2684, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.580849279560876e-06, |
|
"loss": 2.2781, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.573225585118549e-06, |
|
"loss": 2.2775, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.565601890676222e-06, |
|
"loss": 2.2754, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.5579781962338956e-06, |
|
"loss": 2.2756, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.5503545017915687e-06, |
|
"loss": 2.2834, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.542730807349242e-06, |
|
"loss": 2.282, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.5351071129069146e-06, |
|
"loss": 2.2748, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.5274834184645886e-06, |
|
"loss": 2.2865, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.5198597240222617e-06, |
|
"loss": 2.2859, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.5122360295799344e-06, |
|
"loss": 2.2908, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.5046123351376084e-06, |
|
"loss": 2.2895, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.4969886406952815e-06, |
|
"loss": 2.2873, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.4893649462529542e-06, |
|
"loss": 2.2807, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.4817412518106273e-06, |
|
"loss": 2.2889, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.4741175573683013e-06, |
|
"loss": 2.2893, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.466493862925974e-06, |
|
"loss": 2.2877, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.458870168483647e-06, |
|
"loss": 2.2939, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.451246474041321e-06, |
|
"loss": 2.2947, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.443622779598994e-06, |
|
"loss": 2.2932, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.435999085156667e-06, |
|
"loss": 2.2941, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.42837539071434e-06, |
|
"loss": 2.291, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.4207516962720137e-06, |
|
"loss": 2.2855, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.413128001829687e-06, |
|
"loss": 2.2846, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.40550430738736e-06, |
|
"loss": 2.283, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3978806129450335e-06, |
|
"loss": 2.2908, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3902569185027066e-06, |
|
"loss": 2.2951, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3826332240603797e-06, |
|
"loss": 2.2889, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.375009529618053e-06, |
|
"loss": 2.2871, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3673858351757264e-06, |
|
"loss": 2.2875, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3597621407333996e-06, |
|
"loss": 2.2971, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.3521384462910727e-06, |
|
"loss": 2.2992, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3445147518487462e-06, |
|
"loss": 2.2961, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3368910574064194e-06, |
|
"loss": 2.3055, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3292673629640925e-06, |
|
"loss": 2.3043, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3216436685217656e-06, |
|
"loss": 2.3014, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.314019974079439e-06, |
|
"loss": 2.3037, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.3063962796371123e-06, |
|
"loss": 2.2908, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.2987725851947854e-06, |
|
"loss": 2.3041, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.291148890752459e-06, |
|
"loss": 2.3021, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.283525196310132e-06, |
|
"loss": 2.3039, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.2759015018678053e-06, |
|
"loss": 2.3041, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.2682778074254784e-06, |
|
"loss": 2.3012, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.260654112983152e-06, |
|
"loss": 2.2986, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.253030418540825e-06, |
|
"loss": 2.3004, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.245406724098498e-06, |
|
"loss": 2.301, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.2377830296561718e-06, |
|
"loss": 2.3121, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.230159335213845e-06, |
|
"loss": 2.3102, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.222535640771518e-06, |
|
"loss": 2.3115, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.214911946329191e-06, |
|
"loss": 2.31, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.2072882518868647e-06, |
|
"loss": 2.3121, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.199664557444538e-06, |
|
"loss": 2.3137, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.192040863002211e-06, |
|
"loss": 2.309, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.1844171685598845e-06, |
|
"loss": 2.3104, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.1767934741175576e-06, |
|
"loss": 2.308, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.1691697796752308e-06, |
|
"loss": 2.3199, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.161546085232904e-06, |
|
"loss": 2.3139, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.1539223907905775e-06, |
|
"loss": 2.3105, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.1462986963482506e-06, |
|
"loss": 2.3207, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.1386750019059237e-06, |
|
"loss": 2.3186, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.1310513074635973e-06, |
|
"loss": 2.3189, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.1234276130212704e-06, |
|
"loss": 2.3186, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.1158039185789435e-06, |
|
"loss": 2.3158, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.108180224136617e-06, |
|
"loss": 2.3297, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.1005565296942902e-06, |
|
"loss": 2.3268, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.0929328352519634e-06, |
|
"loss": 2.3172, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.0853091408096365e-06, |
|
"loss": 2.3182, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.07768544636731e-06, |
|
"loss": 2.3219, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.070061751924983e-06, |
|
"loss": 2.3189, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.0624380574826563e-06, |
|
"loss": 2.3193, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.05481436304033e-06, |
|
"loss": 2.3289, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.047190668598003e-06, |
|
"loss": 2.3322, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.039566974155676e-06, |
|
"loss": 2.3242, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.0319432797133492e-06, |
|
"loss": 2.3229, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.024319585271023e-06, |
|
"loss": 2.334, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.016695890828696e-06, |
|
"loss": 2.3248, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.009072196386369e-06, |
|
"loss": 2.326, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.0014485019440426e-06, |
|
"loss": 2.3285, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.9938248075017157e-06, |
|
"loss": 2.3281, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.986201113059389e-06, |
|
"loss": 2.3379, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.978577418617062e-06, |
|
"loss": 2.3277, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.9709537241747356e-06, |
|
"loss": 2.3326, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.9633300297324087e-06, |
|
"loss": 2.3279, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.955706335290082e-06, |
|
"loss": 2.3307, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9480826408477554e-06, |
|
"loss": 2.3354, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9404589464054285e-06, |
|
"loss": 2.3357, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9328352519631016e-06, |
|
"loss": 2.3281, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9252115575207748e-06, |
|
"loss": 2.3297, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9175878630784483e-06, |
|
"loss": 2.3375, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9099641686361214e-06, |
|
"loss": 2.342, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.9023404741937946e-06, |
|
"loss": 2.3305, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.894716779751468e-06, |
|
"loss": 2.3375, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.8870930853091413e-06, |
|
"loss": 2.3422, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.8794693908668144e-06, |
|
"loss": 2.3529, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.871845696424487e-06, |
|
"loss": 2.3475, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.864222001982161e-06, |
|
"loss": 2.3412, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.856598307539834e-06, |
|
"loss": 2.3396, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.848974613097507e-06, |
|
"loss": 2.3504, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.841350918655181e-06, |
|
"loss": 2.3422, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.833727224212854e-06, |
|
"loss": 2.3461, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.8261035297705267e-06, |
|
"loss": 2.3482, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.8184798353282e-06, |
|
"loss": 2.3441, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.810856140885874e-06, |
|
"loss": 2.3504, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.8032324464435465e-06, |
|
"loss": 2.3469, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7956087520012197e-06, |
|
"loss": 2.349, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7879850575588937e-06, |
|
"loss": 2.3463, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7803613631165664e-06, |
|
"loss": 2.3475, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7727376686742395e-06, |
|
"loss": 2.3492, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7651139742319126e-06, |
|
"loss": 2.3531, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.757490279789586e-06, |
|
"loss": 2.343, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7498665853472593e-06, |
|
"loss": 2.3504, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7422428909049324e-06, |
|
"loss": 2.3529, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7346191964626064e-06, |
|
"loss": 2.3527, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.726995502020279e-06, |
|
"loss": 2.3572, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7193718075779523e-06, |
|
"loss": 2.3451, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7117481131356254e-06, |
|
"loss": 2.3551, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.704124418693299e-06, |
|
"loss": 2.359, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.696500724250972e-06, |
|
"loss": 2.352, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.688877029808645e-06, |
|
"loss": 2.3549, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.6812533353663188e-06, |
|
"loss": 2.3525, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.673629640923992e-06, |
|
"loss": 2.3625, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.666005946481665e-06, |
|
"loss": 2.3549, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.6583822520393386e-06, |
|
"loss": 2.365, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.6507585575970117e-06, |
|
"loss": 2.3639, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.643134863154685e-06, |
|
"loss": 2.3607, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.635511168712358e-06, |
|
"loss": 2.3613, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6278874742700315e-06, |
|
"loss": 2.3684, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6202637798277046e-06, |
|
"loss": 2.3645, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6126400853853778e-06, |
|
"loss": 2.3689, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.6050163909430513e-06, |
|
"loss": 2.3691, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.5973926965007245e-06, |
|
"loss": 2.3695, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.5897690020583976e-06, |
|
"loss": 2.3758, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.5821453076160707e-06, |
|
"loss": 2.3701, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.5745216131737443e-06, |
|
"loss": 2.3643, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.5668979187314174e-06, |
|
"loss": 2.3766, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.5592742242890905e-06, |
|
"loss": 2.374, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.551650529846764e-06, |
|
"loss": 2.3727, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5440268354044372e-06, |
|
"loss": 2.3783, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5364031409621103e-06, |
|
"loss": 2.3645, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5287794465197835e-06, |
|
"loss": 2.3775, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.521155752077457e-06, |
|
"loss": 2.3707, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.51353205763513e-06, |
|
"loss": 2.3828, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.5059083631928033e-06, |
|
"loss": 2.3697, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.498284668750477e-06, |
|
"loss": 2.3814, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.49066097430815e-06, |
|
"loss": 2.3709, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.483037279865823e-06, |
|
"loss": 2.3766, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.4754135854234967e-06, |
|
"loss": 2.3768, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.4677898909811694e-06, |
|
"loss": 2.3793, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.460166196538843e-06, |
|
"loss": 2.3777, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.4525425020965165e-06, |
|
"loss": 2.3809, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.444918807654189e-06, |
|
"loss": 2.3795, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.4372951132118627e-06, |
|
"loss": 2.3756, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.429671418769536e-06, |
|
"loss": 2.3805, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.422047724327209e-06, |
|
"loss": 2.3861, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.414424029884882e-06, |
|
"loss": 2.3816, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.4068003354425557e-06, |
|
"loss": 2.3879, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.399176641000229e-06, |
|
"loss": 2.3818, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.391552946557902e-06, |
|
"loss": 2.3775, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.3839292521155755e-06, |
|
"loss": 2.3809, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.3763055576732486e-06, |
|
"loss": 2.3775, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.3686818632309218e-06, |
|
"loss": 2.3885, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.3610581687885953e-06, |
|
"loss": 2.3879, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.3534344743462684e-06, |
|
"loss": 2.385, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.3458107799039416e-06, |
|
"loss": 2.3793, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.3381870854616147e-06, |
|
"loss": 2.3898, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.3305633910192883e-06, |
|
"loss": 2.382, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.3229396965769614e-06, |
|
"loss": 2.39, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.3153160021346345e-06, |
|
"loss": 2.3848, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 2.3936, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.300068613249981e-06, |
|
"loss": 2.3937, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2924449188076543e-06, |
|
"loss": 2.3939, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2848212243653275e-06, |
|
"loss": 2.3889, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.277197529923001e-06, |
|
"loss": 2.382, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.269573835480674e-06, |
|
"loss": 2.3936, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2619501410383473e-06, |
|
"loss": 2.3896, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.254326446596021e-06, |
|
"loss": 2.3908, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.246702752153694e-06, |
|
"loss": 2.3871, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.239079057711367e-06, |
|
"loss": 2.3945, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2314553632690402e-06, |
|
"loss": 2.4012, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2238316688267138e-06, |
|
"loss": 2.3996, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.216207974384387e-06, |
|
"loss": 2.3977, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.20858427994206e-06, |
|
"loss": 2.4, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2009605854997336e-06, |
|
"loss": 2.3959, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.1933368910574067e-06, |
|
"loss": 2.3996, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.18571319661508e-06, |
|
"loss": 2.4045, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.178089502172753e-06, |
|
"loss": 2.3979, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.1704658077304265e-06, |
|
"loss": 2.4029, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.1628421132880993e-06, |
|
"loss": 2.4057, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.155218418845773e-06, |
|
"loss": 2.4002, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.1475947244034464e-06, |
|
"loss": 2.3965, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.139971029961119e-06, |
|
"loss": 2.4021, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.1323473355187926e-06, |
|
"loss": 2.4031, |
|
"step": 10320 |
|
} |
|
], |
|
"max_steps": 13117, |
|
"num_train_epochs": 1, |
|
"total_flos": 6400995950592000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|