{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7867652664481207, "global_step": 10320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.992376305557673e-06, "loss": 30.7359, "step": 10 }, { "epoch": 0.0, "learning_rate": 9.984752611115347e-06, "loss": 31.0328, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.97712891667302e-06, "loss": 30.2281, "step": 30 }, { "epoch": 0.0, "learning_rate": 9.969505222230695e-06, "loss": 30.8781, "step": 40 }, { "epoch": 0.0, "learning_rate": 9.961881527788367e-06, "loss": 29.9109, "step": 50 }, { "epoch": 0.0, "learning_rate": 9.954257833346041e-06, "loss": 29.7312, "step": 60 }, { "epoch": 0.01, "learning_rate": 9.946634138903714e-06, "loss": 28.8719, "step": 70 }, { "epoch": 0.01, "learning_rate": 9.939010444461386e-06, "loss": 28.7094, "step": 80 }, { "epoch": 0.01, "learning_rate": 9.93138675001906e-06, "loss": 28.0828, "step": 90 }, { "epoch": 0.01, "learning_rate": 9.923763055576732e-06, "loss": 27.6328, "step": 100 }, { "epoch": 0.01, "learning_rate": 9.916139361134408e-06, "loss": 26.9016, "step": 110 }, { "epoch": 0.01, "learning_rate": 9.90851566669208e-06, "loss": 26.3891, "step": 120 }, { "epoch": 0.01, "learning_rate": 9.900891972249752e-06, "loss": 25.8813, "step": 130 }, { "epoch": 0.01, "learning_rate": 9.893268277807426e-06, "loss": 25.7141, "step": 140 }, { "epoch": 0.01, "learning_rate": 9.885644583365099e-06, "loss": 25.2875, "step": 150 }, { "epoch": 0.01, "learning_rate": 9.878020888922773e-06, "loss": 24.6219, "step": 160 }, { "epoch": 0.01, "learning_rate": 9.870397194480445e-06, "loss": 24.1156, "step": 170 }, { "epoch": 0.01, "learning_rate": 9.86277350003812e-06, "loss": 24.0234, "step": 180 }, { "epoch": 0.01, "learning_rate": 9.855149805595793e-06, "loss": 23.6562, "step": 190 }, { "epoch": 0.02, "learning_rate": 9.847526111153465e-06, "loss": 23.2922, "step": 200 }, { "epoch": 0.02, "learning_rate": 9.839902416711139e-06, "loss": 22.7734, "step": 210 }, { "epoch": 0.02, "learning_rate": 9.832278722268811e-06, "loss": 22.5281, "step": 220 }, { "epoch": 0.02, "learning_rate": 9.824655027826485e-06, "loss": 22.0547, "step": 230 }, { "epoch": 0.02, "learning_rate": 9.817031333384158e-06, "loss": 22.1594, "step": 240 }, { "epoch": 0.02, "learning_rate": 9.809407638941832e-06, "loss": 21.7297, "step": 250 }, { "epoch": 0.02, "learning_rate": 9.801783944499506e-06, "loss": 21.2375, "step": 260 }, { "epoch": 0.02, "learning_rate": 9.794160250057178e-06, "loss": 20.6297, "step": 270 }, { "epoch": 0.02, "learning_rate": 9.786536555614852e-06, "loss": 20.3438, "step": 280 }, { "epoch": 0.02, "learning_rate": 9.778912861172524e-06, "loss": 20.0625, "step": 290 }, { "epoch": 0.02, "learning_rate": 9.771289166730198e-06, "loss": 19.825, "step": 300 }, { "epoch": 0.02, "learning_rate": 9.76366547228787e-06, "loss": 19.6578, "step": 310 }, { "epoch": 0.02, "learning_rate": 9.756041777845544e-06, "loss": 19.4641, "step": 320 }, { "epoch": 0.03, "learning_rate": 9.748418083403218e-06, "loss": 18.9703, "step": 330 }, { "epoch": 0.03, "learning_rate": 9.74079438896089e-06, "loss": 18.5938, "step": 340 }, { "epoch": 0.03, "learning_rate": 9.733170694518565e-06, "loss": 18.2047, "step": 350 }, { "epoch": 0.03, "learning_rate": 9.725547000076237e-06, "loss": 17.3922, "step": 360 }, { "epoch": 0.03, "learning_rate": 9.717923305633911e-06, "loss": 17.1758, "step": 370 }, { "epoch": 0.03, "learning_rate": 9.710299611191583e-06, "loss": 16.975, "step": 380 }, { "epoch": 0.03, "learning_rate": 9.702675916749257e-06, "loss": 15.7492, "step": 390 }, { "epoch": 0.03, "learning_rate": 9.695052222306931e-06, "loss": 15.2898, "step": 400 }, { "epoch": 0.03, "learning_rate": 9.687428527864603e-06, "loss": 14.4289, "step": 410 }, { "epoch": 0.03, "learning_rate": 9.679804833422277e-06, "loss": 14.3398, "step": 420 }, { "epoch": 0.03, "learning_rate": 9.67218113897995e-06, "loss": 13.6992, "step": 430 }, { "epoch": 0.03, "learning_rate": 9.664557444537624e-06, "loss": 12.75, "step": 440 }, { "epoch": 0.03, "learning_rate": 9.656933750095296e-06, "loss": 12.8219, "step": 450 }, { "epoch": 0.04, "learning_rate": 9.64931005565297e-06, "loss": 12.1195, "step": 460 }, { "epoch": 0.04, "learning_rate": 9.641686361210644e-06, "loss": 10.5531, "step": 470 }, { "epoch": 0.04, "learning_rate": 9.634062666768316e-06, "loss": 9.9711, "step": 480 }, { "epoch": 0.04, "learning_rate": 9.62643897232599e-06, "loss": 10.1711, "step": 490 }, { "epoch": 0.04, "learning_rate": 9.618815277883662e-06, "loss": 9.2937, "step": 500 }, { "epoch": 0.04, "learning_rate": 9.611191583441336e-06, "loss": 8.8313, "step": 510 }, { "epoch": 0.04, "learning_rate": 9.603567888999009e-06, "loss": 8.4027, "step": 520 }, { "epoch": 0.04, "learning_rate": 9.595944194556683e-06, "loss": 8.5461, "step": 530 }, { "epoch": 0.04, "learning_rate": 9.588320500114357e-06, "loss": 8.4668, "step": 540 }, { "epoch": 0.04, "learning_rate": 9.580696805672029e-06, "loss": 7.9266, "step": 550 }, { "epoch": 0.04, "learning_rate": 9.573073111229703e-06, "loss": 7.718, "step": 560 }, { "epoch": 0.04, "learning_rate": 9.565449416787375e-06, "loss": 7.6984, "step": 570 }, { "epoch": 0.04, "learning_rate": 9.55782572234505e-06, "loss": 7.4594, "step": 580 }, { "epoch": 0.04, "learning_rate": 9.550202027902723e-06, "loss": 7.302, "step": 590 }, { "epoch": 0.05, "learning_rate": 9.542578333460395e-06, "loss": 7.2758, "step": 600 }, { "epoch": 0.05, "learning_rate": 9.53495463901807e-06, "loss": 6.9484, "step": 610 }, { "epoch": 0.05, "learning_rate": 9.527330944575742e-06, "loss": 6.5934, "step": 620 }, { "epoch": 0.05, "learning_rate": 9.519707250133416e-06, "loss": 6.4156, "step": 630 }, { "epoch": 0.05, "learning_rate": 9.512083555691088e-06, "loss": 6.3332, "step": 640 }, { "epoch": 0.05, "learning_rate": 9.504459861248762e-06, "loss": 6.3387, "step": 650 }, { "epoch": 0.05, "learning_rate": 9.496836166806436e-06, "loss": 6.7496, "step": 660 }, { "epoch": 0.05, "learning_rate": 9.489212472364108e-06, "loss": 6.2605, "step": 670 }, { "epoch": 0.05, "learning_rate": 9.481588777921782e-06, "loss": 6.1672, "step": 680 }, { "epoch": 0.05, "learning_rate": 9.473965083479454e-06, "loss": 6.448, "step": 690 }, { "epoch": 0.05, "learning_rate": 9.466341389037128e-06, "loss": 6.2863, "step": 700 }, { "epoch": 0.05, "learning_rate": 9.4587176945948e-06, "loss": 5.8332, "step": 710 }, { "epoch": 0.05, "learning_rate": 9.451094000152475e-06, "loss": 5.7637, "step": 720 }, { "epoch": 0.06, "learning_rate": 9.443470305710149e-06, "loss": 5.8871, "step": 730 }, { "epoch": 0.06, "learning_rate": 9.435846611267821e-06, "loss": 5.6824, "step": 740 }, { "epoch": 0.06, "learning_rate": 9.428222916825495e-06, "loss": 5.6824, "step": 750 }, { "epoch": 0.06, "learning_rate": 9.420599222383167e-06, "loss": 5.623, "step": 760 }, { "epoch": 0.06, "learning_rate": 9.412975527940841e-06, "loss": 5.793, "step": 770 }, { "epoch": 0.06, "learning_rate": 9.405351833498513e-06, "loss": 5.6578, "step": 780 }, { "epoch": 0.06, "learning_rate": 9.397728139056187e-06, "loss": 5.4418, "step": 790 }, { "epoch": 0.06, "learning_rate": 9.390104444613861e-06, "loss": 5.6582, "step": 800 }, { "epoch": 0.06, "learning_rate": 9.382480750171534e-06, "loss": 5.4758, "step": 810 }, { "epoch": 0.06, "learning_rate": 9.374857055729208e-06, "loss": 5.3496, "step": 820 }, { "epoch": 0.06, "learning_rate": 9.36723336128688e-06, "loss": 5.809, "step": 830 }, { "epoch": 0.06, "learning_rate": 9.359609666844554e-06, "loss": 5.3367, "step": 840 }, { "epoch": 0.06, "learning_rate": 9.351985972402226e-06, "loss": 5.3164, "step": 850 }, { "epoch": 0.07, "learning_rate": 9.3443622779599e-06, "loss": 5.2313, "step": 860 }, { "epoch": 0.07, "learning_rate": 9.336738583517574e-06, "loss": 5.3535, "step": 870 }, { "epoch": 0.07, "learning_rate": 9.329114889075246e-06, "loss": 5.477, "step": 880 }, { "epoch": 0.07, "learning_rate": 9.32149119463292e-06, "loss": 5.5727, "step": 890 }, { "epoch": 0.07, "learning_rate": 9.313867500190593e-06, "loss": 5.1645, "step": 900 }, { "epoch": 0.07, "learning_rate": 9.306243805748267e-06, "loss": 5.0938, "step": 910 }, { "epoch": 0.07, "learning_rate": 9.298620111305939e-06, "loss": 5.1703, "step": 920 }, { "epoch": 0.07, "learning_rate": 9.290996416863613e-06, "loss": 5.0945, "step": 930 }, { "epoch": 0.07, "learning_rate": 9.283372722421287e-06, "loss": 5.1152, "step": 940 }, { "epoch": 0.07, "learning_rate": 9.27574902797896e-06, "loss": 5.0703, "step": 950 }, { "epoch": 0.07, "learning_rate": 9.268125333536633e-06, "loss": 5.0184, "step": 960 }, { "epoch": 0.07, "learning_rate": 9.260501639094305e-06, "loss": 5.0059, "step": 970 }, { "epoch": 0.07, "learning_rate": 9.25287794465198e-06, "loss": 4.9516, "step": 980 }, { "epoch": 0.08, "learning_rate": 9.245254250209652e-06, "loss": 4.9363, "step": 990 }, { "epoch": 0.08, "learning_rate": 9.237630555767326e-06, "loss": 5.1004, "step": 1000 }, { "epoch": 0.08, "learning_rate": 9.230006861325e-06, "loss": 4.8988, "step": 1010 }, { "epoch": 0.08, "learning_rate": 9.222383166882672e-06, "loss": 5.0332, "step": 1020 }, { "epoch": 0.08, "learning_rate": 9.214759472440346e-06, "loss": 5.0336, "step": 1030 }, { "epoch": 0.08, "learning_rate": 9.207135777998018e-06, "loss": 4.7898, "step": 1040 }, { "epoch": 0.08, "learning_rate": 9.199512083555692e-06, "loss": 4.8375, "step": 1050 }, { "epoch": 0.08, "learning_rate": 9.191888389113365e-06, "loss": 4.7547, "step": 1060 }, { "epoch": 0.08, "learning_rate": 9.184264694671038e-06, "loss": 4.7156, "step": 1070 }, { "epoch": 0.08, "learning_rate": 9.176641000228712e-06, "loss": 4.7141, "step": 1080 }, { "epoch": 0.08, "learning_rate": 9.169017305786385e-06, "loss": 4.7141, "step": 1090 }, { "epoch": 0.08, "learning_rate": 9.161393611344059e-06, "loss": 4.718, "step": 1100 }, { "epoch": 0.08, "learning_rate": 9.153769916901731e-06, "loss": 4.6184, "step": 1110 }, { "epoch": 0.09, "learning_rate": 9.146146222459405e-06, "loss": 4.7738, "step": 1120 }, { "epoch": 0.09, "learning_rate": 9.138522528017077e-06, "loss": 4.552, "step": 1130 }, { "epoch": 0.09, "learning_rate": 9.130898833574751e-06, "loss": 4.5945, "step": 1140 }, { "epoch": 0.09, "learning_rate": 9.123275139132425e-06, "loss": 4.5016, "step": 1150 }, { "epoch": 0.09, "learning_rate": 9.115651444690098e-06, "loss": 4.516, "step": 1160 }, { "epoch": 0.09, "learning_rate": 9.108027750247771e-06, "loss": 4.557, "step": 1170 }, { "epoch": 0.09, "learning_rate": 9.100404055805444e-06, "loss": 4.4656, "step": 1180 }, { "epoch": 0.09, "learning_rate": 9.092780361363118e-06, "loss": 4.4598, "step": 1190 }, { "epoch": 0.09, "learning_rate": 9.08515666692079e-06, "loss": 4.425, "step": 1200 }, { "epoch": 0.09, "learning_rate": 9.077532972478464e-06, "loss": 4.5328, "step": 1210 }, { "epoch": 0.09, "learning_rate": 9.069909278036138e-06, "loss": 4.4902, "step": 1220 }, { "epoch": 0.09, "learning_rate": 9.06228558359381e-06, "loss": 4.4094, "step": 1230 }, { "epoch": 0.09, "learning_rate": 9.054661889151484e-06, "loss": 4.4609, "step": 1240 }, { "epoch": 0.1, "learning_rate": 9.047038194709157e-06, "loss": 4.2906, "step": 1250 }, { "epoch": 0.1, "learning_rate": 9.03941450026683e-06, "loss": 4.2863, "step": 1260 }, { "epoch": 0.1, "learning_rate": 9.031790805824503e-06, "loss": 4.3141, "step": 1270 }, { "epoch": 0.1, "learning_rate": 9.024167111382177e-06, "loss": 4.2844, "step": 1280 }, { "epoch": 0.1, "learning_rate": 9.01654341693985e-06, "loss": 4.323, "step": 1290 }, { "epoch": 0.1, "learning_rate": 9.008919722497523e-06, "loss": 4.2539, "step": 1300 }, { "epoch": 0.1, "learning_rate": 9.001296028055197e-06, "loss": 4.2047, "step": 1310 }, { "epoch": 0.1, "learning_rate": 8.99367233361287e-06, "loss": 4.2406, "step": 1320 }, { "epoch": 0.1, "learning_rate": 8.986048639170543e-06, "loss": 4.1656, "step": 1330 }, { "epoch": 0.1, "learning_rate": 8.978424944728216e-06, "loss": 4.1324, "step": 1340 }, { "epoch": 0.1, "learning_rate": 8.97080125028589e-06, "loss": 4.2102, "step": 1350 }, { "epoch": 0.1, "learning_rate": 8.963177555843564e-06, "loss": 4.1785, "step": 1360 }, { "epoch": 0.1, "learning_rate": 8.955553861401236e-06, "loss": 4.1496, "step": 1370 }, { "epoch": 0.11, "learning_rate": 8.94793016695891e-06, "loss": 4.148, "step": 1380 }, { "epoch": 0.11, "learning_rate": 8.940306472516582e-06, "loss": 4.0875, "step": 1390 }, { "epoch": 0.11, "learning_rate": 8.932682778074256e-06, "loss": 4.0488, "step": 1400 }, { "epoch": 0.11, "learning_rate": 8.925059083631928e-06, "loss": 4.0926, "step": 1410 }, { "epoch": 0.11, "learning_rate": 8.917435389189602e-06, "loss": 4.0521, "step": 1420 }, { "epoch": 0.11, "learning_rate": 8.909811694747276e-06, "loss": 4.0557, "step": 1430 }, { "epoch": 0.11, "learning_rate": 8.902188000304949e-06, "loss": 4.057, "step": 1440 }, { "epoch": 0.11, "learning_rate": 8.894564305862623e-06, "loss": 3.9955, "step": 1450 }, { "epoch": 0.11, "learning_rate": 8.886940611420295e-06, "loss": 3.9541, "step": 1460 }, { "epoch": 0.11, "learning_rate": 8.879316916977969e-06, "loss": 4.0002, "step": 1470 }, { "epoch": 0.11, "learning_rate": 8.871693222535641e-06, "loss": 3.8512, "step": 1480 }, { "epoch": 0.11, "learning_rate": 8.864069528093315e-06, "loss": 3.9119, "step": 1490 }, { "epoch": 0.11, "learning_rate": 8.856445833650989e-06, "loss": 4.0641, "step": 1500 }, { "epoch": 0.12, "learning_rate": 8.848822139208661e-06, "loss": 3.8793, "step": 1510 }, { "epoch": 0.12, "learning_rate": 8.841198444766335e-06, "loss": 3.8955, "step": 1520 }, { "epoch": 0.12, "learning_rate": 8.833574750324008e-06, "loss": 3.8289, "step": 1530 }, { "epoch": 0.12, "learning_rate": 8.825951055881682e-06, "loss": 3.8039, "step": 1540 }, { "epoch": 0.12, "learning_rate": 8.818327361439354e-06, "loss": 3.7967, "step": 1550 }, { "epoch": 0.12, "learning_rate": 8.810703666997026e-06, "loss": 3.8004, "step": 1560 }, { "epoch": 0.12, "learning_rate": 8.803079972554702e-06, "loss": 3.7502, "step": 1570 }, { "epoch": 0.12, "learning_rate": 8.795456278112374e-06, "loss": 3.7746, "step": 1580 }, { "epoch": 0.12, "learning_rate": 8.787832583670048e-06, "loss": 3.74, "step": 1590 }, { "epoch": 0.12, "learning_rate": 8.78020888922772e-06, "loss": 3.7273, "step": 1600 }, { "epoch": 0.12, "learning_rate": 8.772585194785394e-06, "loss": 3.7084, "step": 1610 }, { "epoch": 0.12, "learning_rate": 8.764961500343067e-06, "loss": 3.6695, "step": 1620 }, { "epoch": 0.12, "learning_rate": 8.757337805900739e-06, "loss": 3.6781, "step": 1630 }, { "epoch": 0.13, "learning_rate": 8.749714111458415e-06, "loss": 3.6531, "step": 1640 }, { "epoch": 0.13, "learning_rate": 8.742090417016087e-06, "loss": 3.6125, "step": 1650 }, { "epoch": 0.13, "learning_rate": 8.73446672257376e-06, "loss": 3.6434, "step": 1660 }, { "epoch": 0.13, "learning_rate": 8.726843028131433e-06, "loss": 3.5969, "step": 1670 }, { "epoch": 0.13, "learning_rate": 8.719219333689107e-06, "loss": 3.6078, "step": 1680 }, { "epoch": 0.13, "learning_rate": 8.71159563924678e-06, "loss": 3.6463, "step": 1690 }, { "epoch": 0.13, "learning_rate": 8.703971944804452e-06, "loss": 3.5914, "step": 1700 }, { "epoch": 0.13, "learning_rate": 8.696348250362127e-06, "loss": 3.5229, "step": 1710 }, { "epoch": 0.13, "learning_rate": 8.6887245559198e-06, "loss": 3.5178, "step": 1720 }, { "epoch": 0.13, "learning_rate": 8.681100861477474e-06, "loss": 3.5465, "step": 1730 }, { "epoch": 0.13, "learning_rate": 8.673477167035146e-06, "loss": 3.4473, "step": 1740 }, { "epoch": 0.13, "learning_rate": 8.665853472592818e-06, "loss": 3.4814, "step": 1750 }, { "epoch": 0.13, "learning_rate": 8.658229778150492e-06, "loss": 3.459, "step": 1760 }, { "epoch": 0.13, "learning_rate": 8.650606083708166e-06, "loss": 3.4336, "step": 1770 }, { "epoch": 0.14, "learning_rate": 8.64298238926584e-06, "loss": 3.3758, "step": 1780 }, { "epoch": 0.14, "learning_rate": 8.635358694823512e-06, "loss": 3.3777, "step": 1790 }, { "epoch": 0.14, "learning_rate": 8.627735000381186e-06, "loss": 3.3633, "step": 1800 }, { "epoch": 0.14, "learning_rate": 8.620111305938859e-06, "loss": 3.3711, "step": 1810 }, { "epoch": 0.14, "learning_rate": 8.612487611496531e-06, "loss": 3.3563, "step": 1820 }, { "epoch": 0.14, "learning_rate": 8.604863917054205e-06, "loss": 3.3432, "step": 1830 }, { "epoch": 0.14, "learning_rate": 8.597240222611879e-06, "loss": 3.4258, "step": 1840 }, { "epoch": 0.14, "learning_rate": 8.589616528169553e-06, "loss": 3.3188, "step": 1850 }, { "epoch": 0.14, "learning_rate": 8.581992833727225e-06, "loss": 3.2518, "step": 1860 }, { "epoch": 0.14, "learning_rate": 8.574369139284897e-06, "loss": 3.283, "step": 1870 }, { "epoch": 0.14, "learning_rate": 8.566745444842571e-06, "loss": 3.2523, "step": 1880 }, { "epoch": 0.14, "learning_rate": 8.559121750400244e-06, "loss": 3.2496, "step": 1890 }, { "epoch": 0.14, "learning_rate": 8.551498055957918e-06, "loss": 3.2254, "step": 1900 }, { "epoch": 0.15, "learning_rate": 8.543874361515592e-06, "loss": 3.1781, "step": 1910 }, { "epoch": 0.15, "learning_rate": 8.536250667073266e-06, "loss": 3.1686, "step": 1920 }, { "epoch": 0.15, "learning_rate": 8.528626972630938e-06, "loss": 3.2557, "step": 1930 }, { "epoch": 0.15, "learning_rate": 8.52100327818861e-06, "loss": 3.1295, "step": 1940 }, { "epoch": 0.15, "learning_rate": 8.513379583746284e-06, "loss": 3.1766, "step": 1950 }, { "epoch": 0.15, "learning_rate": 8.505755889303956e-06, "loss": 3.1641, "step": 1960 }, { "epoch": 0.15, "learning_rate": 8.49813219486163e-06, "loss": 3.1301, "step": 1970 }, { "epoch": 0.15, "learning_rate": 8.490508500419304e-06, "loss": 3.1424, "step": 1980 }, { "epoch": 0.15, "learning_rate": 8.482884805976977e-06, "loss": 3.0842, "step": 1990 }, { "epoch": 0.15, "learning_rate": 8.47526111153465e-06, "loss": 3.0824, "step": 2000 }, { "epoch": 0.15, "learning_rate": 8.467637417092323e-06, "loss": 3.0498, "step": 2010 }, { "epoch": 0.15, "learning_rate": 8.460013722649997e-06, "loss": 3.1193, "step": 2020 }, { "epoch": 0.15, "learning_rate": 8.45239002820767e-06, "loss": 3.0367, "step": 2030 }, { "epoch": 0.16, "learning_rate": 8.444766333765343e-06, "loss": 3.051, "step": 2040 }, { "epoch": 0.16, "learning_rate": 8.437142639323017e-06, "loss": 3.0029, "step": 2050 }, { "epoch": 0.16, "learning_rate": 8.42951894488069e-06, "loss": 2.9674, "step": 2060 }, { "epoch": 0.16, "learning_rate": 8.421895250438363e-06, "loss": 2.9797, "step": 2070 }, { "epoch": 0.16, "learning_rate": 8.414271555996036e-06, "loss": 3.0271, "step": 2080 }, { "epoch": 0.16, "learning_rate": 8.40664786155371e-06, "loss": 2.9619, "step": 2090 }, { "epoch": 0.16, "learning_rate": 8.399024167111382e-06, "loss": 3.0301, "step": 2100 }, { "epoch": 0.16, "learning_rate": 8.391400472669056e-06, "loss": 2.9232, "step": 2110 }, { "epoch": 0.16, "learning_rate": 8.38377677822673e-06, "loss": 2.935, "step": 2120 }, { "epoch": 0.16, "learning_rate": 8.376153083784402e-06, "loss": 2.924, "step": 2130 }, { "epoch": 0.16, "learning_rate": 8.368529389342076e-06, "loss": 2.8533, "step": 2140 }, { "epoch": 0.16, "learning_rate": 8.360905694899748e-06, "loss": 2.859, "step": 2150 }, { "epoch": 0.16, "learning_rate": 8.353282000457422e-06, "loss": 2.8752, "step": 2160 }, { "epoch": 0.17, "learning_rate": 8.345658306015095e-06, "loss": 2.8092, "step": 2170 }, { "epoch": 0.17, "learning_rate": 8.338034611572769e-06, "loss": 2.9111, "step": 2180 }, { "epoch": 0.17, "learning_rate": 8.330410917130443e-06, "loss": 2.8344, "step": 2190 }, { "epoch": 0.17, "learning_rate": 8.322787222688115e-06, "loss": 2.8535, "step": 2200 }, { "epoch": 0.17, "learning_rate": 8.315163528245789e-06, "loss": 2.8996, "step": 2210 }, { "epoch": 0.17, "learning_rate": 8.307539833803461e-06, "loss": 2.8619, "step": 2220 }, { "epoch": 0.17, "learning_rate": 8.299916139361135e-06, "loss": 2.8908, "step": 2230 }, { "epoch": 0.17, "learning_rate": 8.292292444918807e-06, "loss": 2.8221, "step": 2240 }, { "epoch": 0.17, "learning_rate": 8.284668750476481e-06, "loss": 2.8062, "step": 2250 }, { "epoch": 0.17, "learning_rate": 8.277045056034155e-06, "loss": 2.7785, "step": 2260 }, { "epoch": 0.17, "learning_rate": 8.269421361591828e-06, "loss": 2.8027, "step": 2270 }, { "epoch": 0.17, "learning_rate": 8.261797667149502e-06, "loss": 2.7518, "step": 2280 }, { "epoch": 0.17, "learning_rate": 8.254173972707174e-06, "loss": 2.7961, "step": 2290 }, { "epoch": 0.18, "learning_rate": 8.246550278264848e-06, "loss": 2.7605, "step": 2300 }, { "epoch": 0.18, "learning_rate": 8.23892658382252e-06, "loss": 2.7055, "step": 2310 }, { "epoch": 0.18, "learning_rate": 8.231302889380194e-06, "loss": 2.6449, "step": 2320 }, { "epoch": 0.18, "learning_rate": 8.223679194937868e-06, "loss": 2.6789, "step": 2330 }, { "epoch": 0.18, "learning_rate": 8.21605550049554e-06, "loss": 2.6936, "step": 2340 }, { "epoch": 0.18, "learning_rate": 8.208431806053214e-06, "loss": 2.6859, "step": 2350 }, { "epoch": 0.18, "learning_rate": 8.200808111610887e-06, "loss": 2.7135, "step": 2360 }, { "epoch": 0.18, "learning_rate": 8.19318441716856e-06, "loss": 2.634, "step": 2370 }, { "epoch": 0.18, "learning_rate": 8.185560722726233e-06, "loss": 2.643, "step": 2380 }, { "epoch": 0.18, "learning_rate": 8.177937028283907e-06, "loss": 2.6611, "step": 2390 }, { "epoch": 0.18, "learning_rate": 8.170313333841581e-06, "loss": 2.6721, "step": 2400 }, { "epoch": 0.18, "learning_rate": 8.162689639399253e-06, "loss": 2.6314, "step": 2410 }, { "epoch": 0.18, "learning_rate": 8.155065944956927e-06, "loss": 2.666, "step": 2420 }, { "epoch": 0.19, "learning_rate": 8.1474422505146e-06, "loss": 2.6109, "step": 2430 }, { "epoch": 0.19, "learning_rate": 8.139818556072273e-06, "loss": 2.6479, "step": 2440 }, { "epoch": 0.19, "learning_rate": 8.132194861629946e-06, "loss": 2.6289, "step": 2450 }, { "epoch": 0.19, "learning_rate": 8.12457116718762e-06, "loss": 2.5789, "step": 2460 }, { "epoch": 0.19, "learning_rate": 8.116947472745294e-06, "loss": 2.5514, "step": 2470 }, { "epoch": 0.19, "learning_rate": 8.109323778302966e-06, "loss": 2.5531, "step": 2480 }, { "epoch": 0.19, "learning_rate": 8.10170008386064e-06, "loss": 2.5922, "step": 2490 }, { "epoch": 0.19, "learning_rate": 8.094076389418312e-06, "loss": 2.5785, "step": 2500 }, { "epoch": 0.19, "learning_rate": 8.086452694975986e-06, "loss": 2.5496, "step": 2510 }, { "epoch": 0.19, "learning_rate": 8.078829000533659e-06, "loss": 2.5227, "step": 2520 }, { "epoch": 0.19, "learning_rate": 8.071205306091332e-06, "loss": 2.6195, "step": 2530 }, { "epoch": 0.19, "learning_rate": 8.063581611649006e-06, "loss": 2.5441, "step": 2540 }, { "epoch": 0.19, "learning_rate": 8.055957917206679e-06, "loss": 2.5348, "step": 2550 }, { "epoch": 0.2, "learning_rate": 8.048334222764353e-06, "loss": 2.5326, "step": 2560 }, { "epoch": 0.2, "learning_rate": 8.040710528322025e-06, "loss": 2.5195, "step": 2570 }, { "epoch": 0.2, "learning_rate": 8.033086833879699e-06, "loss": 2.5826, "step": 2580 }, { "epoch": 0.2, "learning_rate": 8.025463139437371e-06, "loss": 2.5199, "step": 2590 }, { "epoch": 0.2, "learning_rate": 8.017839444995045e-06, "loss": 2.4783, "step": 2600 }, { "epoch": 0.2, "learning_rate": 8.01021575055272e-06, "loss": 2.5186, "step": 2610 }, { "epoch": 0.2, "learning_rate": 8.002592056110392e-06, "loss": 2.5002, "step": 2620 }, { "epoch": 0.2, "learning_rate": 7.994968361668065e-06, "loss": 2.5014, "step": 2630 }, { "epoch": 0.2, "learning_rate": 7.987344667225738e-06, "loss": 2.4631, "step": 2640 }, { "epoch": 0.2, "learning_rate": 7.979720972783412e-06, "loss": 2.4621, "step": 2650 }, { "epoch": 0.2, "learning_rate": 7.972097278341084e-06, "loss": 2.4621, "step": 2660 }, { "epoch": 0.2, "learning_rate": 7.964473583898758e-06, "loss": 2.524, "step": 2670 }, { "epoch": 0.2, "learning_rate": 7.956849889456432e-06, "loss": 2.4607, "step": 2680 }, { "epoch": 0.21, "learning_rate": 7.949226195014104e-06, "loss": 2.4621, "step": 2690 }, { "epoch": 0.21, "learning_rate": 7.941602500571778e-06, "loss": 2.4975, "step": 2700 }, { "epoch": 0.21, "learning_rate": 7.93397880612945e-06, "loss": 2.4941, "step": 2710 }, { "epoch": 0.21, "learning_rate": 7.926355111687125e-06, "loss": 2.3941, "step": 2720 }, { "epoch": 0.21, "learning_rate": 7.918731417244797e-06, "loss": 2.457, "step": 2730 }, { "epoch": 0.21, "learning_rate": 7.91110772280247e-06, "loss": 2.4133, "step": 2740 }, { "epoch": 0.21, "learning_rate": 7.903484028360145e-06, "loss": 2.41, "step": 2750 }, { "epoch": 0.21, "learning_rate": 7.895860333917817e-06, "loss": 2.4154, "step": 2760 }, { "epoch": 0.21, "learning_rate": 7.888236639475491e-06, "loss": 2.3951, "step": 2770 }, { "epoch": 0.21, "learning_rate": 7.880612945033163e-06, "loss": 2.3861, "step": 2780 }, { "epoch": 0.21, "learning_rate": 7.872989250590837e-06, "loss": 2.3781, "step": 2790 }, { "epoch": 0.21, "learning_rate": 7.86536555614851e-06, "loss": 2.3859, "step": 2800 }, { "epoch": 0.21, "learning_rate": 7.857741861706184e-06, "loss": 2.3857, "step": 2810 }, { "epoch": 0.21, "learning_rate": 7.850118167263858e-06, "loss": 2.367, "step": 2820 }, { "epoch": 0.22, "learning_rate": 7.84249447282153e-06, "loss": 2.3711, "step": 2830 }, { "epoch": 0.22, "learning_rate": 7.834870778379204e-06, "loss": 2.342, "step": 2840 }, { "epoch": 0.22, "learning_rate": 7.827247083936876e-06, "loss": 2.3771, "step": 2850 }, { "epoch": 0.22, "learning_rate": 7.81962338949455e-06, "loss": 2.3484, "step": 2860 }, { "epoch": 0.22, "learning_rate": 7.811999695052222e-06, "loss": 2.3561, "step": 2870 }, { "epoch": 0.22, "learning_rate": 7.804376000609896e-06, "loss": 2.3777, "step": 2880 }, { "epoch": 0.22, "learning_rate": 7.79675230616757e-06, "loss": 2.3965, "step": 2890 }, { "epoch": 0.22, "learning_rate": 7.789128611725243e-06, "loss": 2.3568, "step": 2900 }, { "epoch": 0.22, "learning_rate": 7.781504917282917e-06, "loss": 2.3586, "step": 2910 }, { "epoch": 0.22, "learning_rate": 7.773881222840589e-06, "loss": 2.3389, "step": 2920 }, { "epoch": 0.22, "learning_rate": 7.766257528398263e-06, "loss": 2.3141, "step": 2930 }, { "epoch": 0.22, "learning_rate": 7.758633833955935e-06, "loss": 2.3244, "step": 2940 }, { "epoch": 0.22, "learning_rate": 7.751010139513609e-06, "loss": 2.323, "step": 2950 }, { "epoch": 0.23, "learning_rate": 7.743386445071283e-06, "loss": 2.3035, "step": 2960 }, { "epoch": 0.23, "learning_rate": 7.735762750628955e-06, "loss": 2.2945, "step": 2970 }, { "epoch": 0.23, "learning_rate": 7.72813905618663e-06, "loss": 2.2807, "step": 2980 }, { "epoch": 0.23, "learning_rate": 7.720515361744302e-06, "loss": 2.3168, "step": 2990 }, { "epoch": 0.23, "learning_rate": 7.712891667301976e-06, "loss": 2.291, "step": 3000 }, { "epoch": 0.23, "learning_rate": 7.705267972859648e-06, "loss": 2.2896, "step": 3010 }, { "epoch": 0.23, "learning_rate": 7.697644278417322e-06, "loss": 2.285, "step": 3020 }, { "epoch": 0.23, "learning_rate": 7.690020583974996e-06, "loss": 2.2836, "step": 3030 }, { "epoch": 0.23, "learning_rate": 7.682396889532668e-06, "loss": 2.2945, "step": 3040 }, { "epoch": 0.23, "learning_rate": 7.674773195090342e-06, "loss": 2.299, "step": 3050 }, { "epoch": 0.23, "learning_rate": 7.667149500648014e-06, "loss": 2.3023, "step": 3060 }, { "epoch": 0.23, "learning_rate": 7.659525806205688e-06, "loss": 2.2889, "step": 3070 }, { "epoch": 0.23, "learning_rate": 7.65190211176336e-06, "loss": 2.265, "step": 3080 }, { "epoch": 0.24, "learning_rate": 7.644278417321035e-06, "loss": 2.276, "step": 3090 }, { "epoch": 0.24, "learning_rate": 7.636654722878709e-06, "loss": 2.2551, "step": 3100 }, { "epoch": 0.24, "learning_rate": 7.629031028436381e-06, "loss": 2.2389, "step": 3110 }, { "epoch": 0.24, "learning_rate": 7.621407333994054e-06, "loss": 2.252, "step": 3120 }, { "epoch": 0.24, "learning_rate": 7.613783639551727e-06, "loss": 2.2695, "step": 3130 }, { "epoch": 0.24, "learning_rate": 7.6061599451094e-06, "loss": 2.2432, "step": 3140 }, { "epoch": 0.24, "learning_rate": 7.598536250667073e-06, "loss": 2.2299, "step": 3150 }, { "epoch": 0.24, "learning_rate": 7.590912556224747e-06, "loss": 2.2291, "step": 3160 }, { "epoch": 0.24, "learning_rate": 7.5832888617824205e-06, "loss": 2.2617, "step": 3170 }, { "epoch": 0.24, "learning_rate": 7.575665167340094e-06, "loss": 2.268, "step": 3180 }, { "epoch": 0.24, "learning_rate": 7.568041472897767e-06, "loss": 2.2533, "step": 3190 }, { "epoch": 0.24, "learning_rate": 7.56041777845544e-06, "loss": 2.2246, "step": 3200 }, { "epoch": 0.24, "learning_rate": 7.552794084013113e-06, "loss": 2.2418, "step": 3210 }, { "epoch": 0.25, "learning_rate": 7.545170389570786e-06, "loss": 2.2529, "step": 3220 }, { "epoch": 0.25, "learning_rate": 7.53754669512846e-06, "loss": 2.2363, "step": 3230 }, { "epoch": 0.25, "learning_rate": 7.529923000686133e-06, "loss": 2.2176, "step": 3240 }, { "epoch": 0.25, "learning_rate": 7.522299306243806e-06, "loss": 2.2248, "step": 3250 }, { "epoch": 0.25, "learning_rate": 7.5146756118014795e-06, "loss": 2.1896, "step": 3260 }, { "epoch": 0.25, "learning_rate": 7.507051917359153e-06, "loss": 2.1836, "step": 3270 }, { "epoch": 0.25, "learning_rate": 7.499428222916826e-06, "loss": 2.2072, "step": 3280 }, { "epoch": 0.25, "learning_rate": 7.491804528474499e-06, "loss": 2.182, "step": 3290 }, { "epoch": 0.25, "learning_rate": 7.484180834032173e-06, "loss": 2.2078, "step": 3300 }, { "epoch": 0.25, "learning_rate": 7.476557139589846e-06, "loss": 2.202, "step": 3310 }, { "epoch": 0.25, "learning_rate": 7.468933445147519e-06, "loss": 2.2119, "step": 3320 }, { "epoch": 0.25, "learning_rate": 7.461309750705192e-06, "loss": 2.1961, "step": 3330 }, { "epoch": 0.25, "learning_rate": 7.453686056262865e-06, "loss": 2.1996, "step": 3340 }, { "epoch": 0.26, "learning_rate": 7.4460623618205385e-06, "loss": 2.1904, "step": 3350 }, { "epoch": 0.26, "learning_rate": 7.438438667378212e-06, "loss": 2.1912, "step": 3360 }, { "epoch": 0.26, "learning_rate": 7.430814972935886e-06, "loss": 2.2055, "step": 3370 }, { "epoch": 0.26, "learning_rate": 7.423191278493559e-06, "loss": 2.176, "step": 3380 }, { "epoch": 0.26, "learning_rate": 7.415567584051232e-06, "loss": 2.1834, "step": 3390 }, { "epoch": 0.26, "learning_rate": 7.407943889608905e-06, "loss": 2.1508, "step": 3400 }, { "epoch": 0.26, "learning_rate": 7.400320195166578e-06, "loss": 2.1447, "step": 3410 }, { "epoch": 0.26, "learning_rate": 7.392696500724251e-06, "loss": 2.1648, "step": 3420 }, { "epoch": 0.26, "learning_rate": 7.385072806281924e-06, "loss": 2.1432, "step": 3430 }, { "epoch": 0.26, "learning_rate": 7.377449111839598e-06, "loss": 2.1818, "step": 3440 }, { "epoch": 0.26, "learning_rate": 7.3698254173972715e-06, "loss": 2.1557, "step": 3450 }, { "epoch": 0.26, "learning_rate": 7.362201722954945e-06, "loss": 2.1549, "step": 3460 }, { "epoch": 0.26, "learning_rate": 7.354578028512618e-06, "loss": 2.1576, "step": 3470 }, { "epoch": 0.27, "learning_rate": 7.346954334070291e-06, "loss": 2.1533, "step": 3480 }, { "epoch": 0.27, "learning_rate": 7.339330639627964e-06, "loss": 2.1576, "step": 3490 }, { "epoch": 0.27, "learning_rate": 7.331706945185637e-06, "loss": 2.1648, "step": 3500 }, { "epoch": 0.27, "learning_rate": 7.324083250743311e-06, "loss": 2.1676, "step": 3510 }, { "epoch": 0.27, "learning_rate": 7.316459556300984e-06, "loss": 2.1617, "step": 3520 }, { "epoch": 0.27, "learning_rate": 7.308835861858657e-06, "loss": 2.1432, "step": 3530 }, { "epoch": 0.27, "learning_rate": 7.3012121674163305e-06, "loss": 2.1645, "step": 3540 }, { "epoch": 0.27, "learning_rate": 7.293588472974004e-06, "loss": 2.174, "step": 3550 }, { "epoch": 0.27, "learning_rate": 7.285964778531677e-06, "loss": 2.1467, "step": 3560 }, { "epoch": 0.27, "learning_rate": 7.27834108408935e-06, "loss": 2.1385, "step": 3570 }, { "epoch": 0.27, "learning_rate": 7.270717389647024e-06, "loss": 2.1344, "step": 3580 }, { "epoch": 0.27, "learning_rate": 7.263093695204697e-06, "loss": 2.1182, "step": 3590 }, { "epoch": 0.27, "learning_rate": 7.25547000076237e-06, "loss": 2.1393, "step": 3600 }, { "epoch": 0.28, "learning_rate": 7.247846306320043e-06, "loss": 2.1361, "step": 3610 }, { "epoch": 0.28, "learning_rate": 7.240222611877716e-06, "loss": 2.1328, "step": 3620 }, { "epoch": 0.28, "learning_rate": 7.2325989174353895e-06, "loss": 2.1322, "step": 3630 }, { "epoch": 0.28, "learning_rate": 7.224975222993063e-06, "loss": 2.1455, "step": 3640 }, { "epoch": 0.28, "learning_rate": 7.217351528550737e-06, "loss": 2.1078, "step": 3650 }, { "epoch": 0.28, "learning_rate": 7.20972783410841e-06, "loss": 2.1408, "step": 3660 }, { "epoch": 0.28, "learning_rate": 7.202104139666083e-06, "loss": 2.1225, "step": 3670 }, { "epoch": 0.28, "learning_rate": 7.194480445223756e-06, "loss": 2.1324, "step": 3680 }, { "epoch": 0.28, "learning_rate": 7.186856750781429e-06, "loss": 2.107, "step": 3690 }, { "epoch": 0.28, "learning_rate": 7.179233056339102e-06, "loss": 2.1271, "step": 3700 }, { "epoch": 0.28, "learning_rate": 7.1716093618967754e-06, "loss": 2.1141, "step": 3710 }, { "epoch": 0.28, "learning_rate": 7.163985667454449e-06, "loss": 2.1141, "step": 3720 }, { "epoch": 0.28, "learning_rate": 7.1563619730121225e-06, "loss": 2.0951, "step": 3730 }, { "epoch": 0.29, "learning_rate": 7.148738278569796e-06, "loss": 2.0971, "step": 3740 }, { "epoch": 0.29, "learning_rate": 7.141114584127469e-06, "loss": 2.1066, "step": 3750 }, { "epoch": 0.29, "learning_rate": 7.133490889685142e-06, "loss": 2.0941, "step": 3760 }, { "epoch": 0.29, "learning_rate": 7.125867195242815e-06, "loss": 2.1232, "step": 3770 }, { "epoch": 0.29, "learning_rate": 7.118243500800488e-06, "loss": 2.1021, "step": 3780 }, { "epoch": 0.29, "learning_rate": 7.110619806358162e-06, "loss": 2.0938, "step": 3790 }, { "epoch": 0.29, "learning_rate": 7.102996111915835e-06, "loss": 2.1164, "step": 3800 }, { "epoch": 0.29, "learning_rate": 7.0953724174735084e-06, "loss": 2.0836, "step": 3810 }, { "epoch": 0.29, "learning_rate": 7.0877487230311816e-06, "loss": 2.0916, "step": 3820 }, { "epoch": 0.29, "learning_rate": 7.080125028588855e-06, "loss": 2.1068, "step": 3830 }, { "epoch": 0.29, "learning_rate": 7.072501334146528e-06, "loss": 2.0977, "step": 3840 }, { "epoch": 0.29, "learning_rate": 7.064877639704201e-06, "loss": 2.0994, "step": 3850 }, { "epoch": 0.29, "learning_rate": 7.057253945261875e-06, "loss": 2.0932, "step": 3860 }, { "epoch": 0.3, "learning_rate": 7.049630250819548e-06, "loss": 2.099, "step": 3870 }, { "epoch": 0.3, "learning_rate": 7.042006556377221e-06, "loss": 2.1025, "step": 3880 }, { "epoch": 0.3, "learning_rate": 7.034382861934894e-06, "loss": 2.0963, "step": 3890 }, { "epoch": 0.3, "learning_rate": 7.0267591674925675e-06, "loss": 2.0982, "step": 3900 }, { "epoch": 0.3, "learning_rate": 7.019135473050241e-06, "loss": 2.0689, "step": 3910 }, { "epoch": 0.3, "learning_rate": 7.011511778607914e-06, "loss": 2.0893, "step": 3920 }, { "epoch": 0.3, "learning_rate": 7.003888084165588e-06, "loss": 2.0986, "step": 3930 }, { "epoch": 0.3, "learning_rate": 6.996264389723261e-06, "loss": 2.0852, "step": 3940 }, { "epoch": 0.3, "learning_rate": 6.988640695280934e-06, "loss": 2.1145, "step": 3950 }, { "epoch": 0.3, "learning_rate": 6.981017000838607e-06, "loss": 2.0885, "step": 3960 }, { "epoch": 0.3, "learning_rate": 6.97339330639628e-06, "loss": 2.0957, "step": 3970 }, { "epoch": 0.3, "learning_rate": 6.965769611953953e-06, "loss": 2.0619, "step": 3980 }, { "epoch": 0.3, "learning_rate": 6.9581459175116265e-06, "loss": 2.074, "step": 3990 }, { "epoch": 0.3, "learning_rate": 6.9505222230693005e-06, "loss": 2.0637, "step": 4000 }, { "epoch": 0.31, "learning_rate": 6.942898528626974e-06, "loss": 2.0822, "step": 4010 }, { "epoch": 0.31, "learning_rate": 6.935274834184647e-06, "loss": 2.0922, "step": 4020 }, { "epoch": 0.31, "learning_rate": 6.92765113974232e-06, "loss": 2.0863, "step": 4030 }, { "epoch": 0.31, "learning_rate": 6.920027445299993e-06, "loss": 2.0684, "step": 4040 }, { "epoch": 0.31, "learning_rate": 6.912403750857666e-06, "loss": 2.0896, "step": 4050 }, { "epoch": 0.31, "learning_rate": 6.904780056415339e-06, "loss": 2.0615, "step": 4060 }, { "epoch": 0.31, "learning_rate": 6.897156361973013e-06, "loss": 2.0467, "step": 4070 }, { "epoch": 0.31, "learning_rate": 6.889532667530686e-06, "loss": 2.0713, "step": 4080 }, { "epoch": 0.31, "learning_rate": 6.8819089730883595e-06, "loss": 2.0795, "step": 4090 }, { "epoch": 0.31, "learning_rate": 6.874285278646033e-06, "loss": 2.0762, "step": 4100 }, { "epoch": 0.31, "learning_rate": 6.866661584203706e-06, "loss": 2.0539, "step": 4110 }, { "epoch": 0.31, "learning_rate": 6.859037889761379e-06, "loss": 2.066, "step": 4120 }, { "epoch": 0.31, "learning_rate": 6.851414195319053e-06, "loss": 2.082, "step": 4130 }, { "epoch": 0.32, "learning_rate": 6.843790500876726e-06, "loss": 2.076, "step": 4140 }, { "epoch": 0.32, "learning_rate": 6.836166806434399e-06, "loss": 2.084, "step": 4150 }, { "epoch": 0.32, "learning_rate": 6.828543111992072e-06, "loss": 2.0691, "step": 4160 }, { "epoch": 0.32, "learning_rate": 6.820919417549745e-06, "loss": 2.0727, "step": 4170 }, { "epoch": 0.32, "learning_rate": 6.8132957231074185e-06, "loss": 2.0645, "step": 4180 }, { "epoch": 0.32, "learning_rate": 6.805672028665092e-06, "loss": 2.0625, "step": 4190 }, { "epoch": 0.32, "learning_rate": 6.798048334222766e-06, "loss": 2.0729, "step": 4200 }, { "epoch": 0.32, "learning_rate": 6.790424639780439e-06, "loss": 2.0545, "step": 4210 }, { "epoch": 0.32, "learning_rate": 6.782800945338112e-06, "loss": 2.0551, "step": 4220 }, { "epoch": 0.32, "learning_rate": 6.775177250895785e-06, "loss": 2.0553, "step": 4230 }, { "epoch": 0.32, "learning_rate": 6.767553556453458e-06, "loss": 2.0568, "step": 4240 }, { "epoch": 0.32, "learning_rate": 6.759929862011131e-06, "loss": 2.0494, "step": 4250 }, { "epoch": 0.32, "learning_rate": 6.752306167568804e-06, "loss": 2.052, "step": 4260 }, { "epoch": 0.33, "learning_rate": 6.744682473126478e-06, "loss": 2.0559, "step": 4270 }, { "epoch": 0.33, "learning_rate": 6.7370587786841515e-06, "loss": 2.0336, "step": 4280 }, { "epoch": 0.33, "learning_rate": 6.729435084241825e-06, "loss": 2.0472, "step": 4290 }, { "epoch": 0.33, "learning_rate": 6.721811389799498e-06, "loss": 2.0562, "step": 4300 }, { "epoch": 0.33, "learning_rate": 6.714187695357171e-06, "loss": 2.0545, "step": 4310 }, { "epoch": 0.33, "learning_rate": 6.706564000914844e-06, "loss": 2.0535, "step": 4320 }, { "epoch": 0.33, "learning_rate": 6.698940306472516e-06, "loss": 2.0263, "step": 4330 }, { "epoch": 0.33, "learning_rate": 6.691316612030191e-06, "loss": 2.058, "step": 4340 }, { "epoch": 0.33, "learning_rate": 6.683692917587864e-06, "loss": 2.0533, "step": 4350 }, { "epoch": 0.33, "learning_rate": 6.676069223145537e-06, "loss": 2.0605, "step": 4360 }, { "epoch": 0.33, "learning_rate": 6.6684455287032105e-06, "loss": 2.0426, "step": 4370 }, { "epoch": 0.33, "learning_rate": 6.660821834260884e-06, "loss": 2.05, "step": 4380 }, { "epoch": 0.33, "learning_rate": 6.653198139818556e-06, "loss": 2.0334, "step": 4390 }, { "epoch": 0.34, "learning_rate": 6.645574445376229e-06, "loss": 2.0363, "step": 4400 }, { "epoch": 0.34, "learning_rate": 6.637950750933904e-06, "loss": 2.0394, "step": 4410 }, { "epoch": 0.34, "learning_rate": 6.630327056491577e-06, "loss": 2.0482, "step": 4420 }, { "epoch": 0.34, "learning_rate": 6.62270336204925e-06, "loss": 2.0266, "step": 4430 }, { "epoch": 0.34, "learning_rate": 6.615079667606923e-06, "loss": 2.0434, "step": 4440 }, { "epoch": 0.34, "learning_rate": 6.6074559731645956e-06, "loss": 2.0577, "step": 4450 }, { "epoch": 0.34, "learning_rate": 6.599832278722269e-06, "loss": 2.0351, "step": 4460 }, { "epoch": 0.34, "learning_rate": 6.592208584279942e-06, "loss": 2.0363, "step": 4470 }, { "epoch": 0.34, "learning_rate": 6.584584889837617e-06, "loss": 2.0621, "step": 4480 }, { "epoch": 0.34, "learning_rate": 6.57696119539529e-06, "loss": 2.0369, "step": 4490 }, { "epoch": 0.34, "learning_rate": 6.569337500952963e-06, "loss": 2.0353, "step": 4500 }, { "epoch": 0.34, "learning_rate": 6.561713806510635e-06, "loss": 2.0434, "step": 4510 }, { "epoch": 0.34, "learning_rate": 6.554090112068308e-06, "loss": 2.0469, "step": 4520 }, { "epoch": 0.35, "learning_rate": 6.5464664176259815e-06, "loss": 2.0525, "step": 4530 }, { "epoch": 0.35, "learning_rate": 6.538842723183655e-06, "loss": 2.048, "step": 4540 }, { "epoch": 0.35, "learning_rate": 6.531219028741329e-06, "loss": 2.041, "step": 4550 }, { "epoch": 0.35, "learning_rate": 6.5235953342990025e-06, "loss": 2.0564, "step": 4560 }, { "epoch": 0.35, "learning_rate": 6.515971639856676e-06, "loss": 2.0571, "step": 4570 }, { "epoch": 0.35, "learning_rate": 6.508347945414348e-06, "loss": 2.0462, "step": 4580 }, { "epoch": 0.35, "learning_rate": 6.500724250972021e-06, "loss": 2.0322, "step": 4590 }, { "epoch": 0.35, "learning_rate": 6.493100556529694e-06, "loss": 2.0574, "step": 4600 }, { "epoch": 0.35, "learning_rate": 6.485476862087367e-06, "loss": 2.0396, "step": 4610 }, { "epoch": 0.35, "learning_rate": 6.477853167645042e-06, "loss": 2.0214, "step": 4620 }, { "epoch": 0.35, "learning_rate": 6.470229473202715e-06, "loss": 2.0372, "step": 4630 }, { "epoch": 0.35, "learning_rate": 6.462605778760388e-06, "loss": 2.0311, "step": 4640 }, { "epoch": 0.35, "learning_rate": 6.454982084318061e-06, "loss": 2.049, "step": 4650 }, { "epoch": 0.36, "learning_rate": 6.447358389875734e-06, "loss": 2.0438, "step": 4660 }, { "epoch": 0.36, "learning_rate": 6.439734695433407e-06, "loss": 2.0396, "step": 4670 }, { "epoch": 0.36, "learning_rate": 6.43211100099108e-06, "loss": 2.0375, "step": 4680 }, { "epoch": 0.36, "learning_rate": 6.424487306548755e-06, "loss": 2.0537, "step": 4690 }, { "epoch": 0.36, "learning_rate": 6.416863612106427e-06, "loss": 2.0272, "step": 4700 }, { "epoch": 0.36, "learning_rate": 6.4092399176641e-06, "loss": 2.0449, "step": 4710 }, { "epoch": 0.36, "learning_rate": 6.4016162232217735e-06, "loss": 2.0355, "step": 4720 }, { "epoch": 0.36, "learning_rate": 6.393992528779447e-06, "loss": 2.0334, "step": 4730 }, { "epoch": 0.36, "learning_rate": 6.38636883433712e-06, "loss": 2.0424, "step": 4740 }, { "epoch": 0.36, "learning_rate": 6.378745139894793e-06, "loss": 2.0303, "step": 4750 }, { "epoch": 0.36, "learning_rate": 6.371121445452467e-06, "loss": 2.0482, "step": 4760 }, { "epoch": 0.36, "learning_rate": 6.36349775101014e-06, "loss": 2.0299, "step": 4770 }, { "epoch": 0.36, "learning_rate": 6.355874056567813e-06, "loss": 2.0357, "step": 4780 }, { "epoch": 0.37, "learning_rate": 6.348250362125486e-06, "loss": 2.032, "step": 4790 }, { "epoch": 0.37, "learning_rate": 6.340626667683159e-06, "loss": 2.0398, "step": 4800 }, { "epoch": 0.37, "learning_rate": 6.3330029732408325e-06, "loss": 2.027, "step": 4810 }, { "epoch": 0.37, "learning_rate": 6.325379278798506e-06, "loss": 2.0309, "step": 4820 }, { "epoch": 0.37, "learning_rate": 6.31775558435618e-06, "loss": 2.024, "step": 4830 }, { "epoch": 0.37, "learning_rate": 6.310131889913853e-06, "loss": 2.0318, "step": 4840 }, { "epoch": 0.37, "learning_rate": 6.302508195471526e-06, "loss": 2.0342, "step": 4850 }, { "epoch": 0.37, "learning_rate": 6.294884501029199e-06, "loss": 2.0428, "step": 4860 }, { "epoch": 0.37, "learning_rate": 6.287260806586872e-06, "loss": 2.0347, "step": 4870 }, { "epoch": 0.37, "learning_rate": 6.279637112144545e-06, "loss": 2.0327, "step": 4880 }, { "epoch": 0.37, "learning_rate": 6.272013417702218e-06, "loss": 2.0364, "step": 4890 }, { "epoch": 0.37, "learning_rate": 6.264389723259892e-06, "loss": 2.0394, "step": 4900 }, { "epoch": 0.37, "learning_rate": 6.2567660288175655e-06, "loss": 2.0394, "step": 4910 }, { "epoch": 0.38, "learning_rate": 6.249142334375239e-06, "loss": 2.0299, "step": 4920 }, { "epoch": 0.38, "learning_rate": 6.241518639932912e-06, "loss": 2.0387, "step": 4930 }, { "epoch": 0.38, "learning_rate": 6.233894945490585e-06, "loss": 2.0266, "step": 4940 }, { "epoch": 0.38, "learning_rate": 6.226271251048258e-06, "loss": 2.0578, "step": 4950 }, { "epoch": 0.38, "learning_rate": 6.218647556605931e-06, "loss": 2.0301, "step": 4960 }, { "epoch": 0.38, "learning_rate": 6.211023862163605e-06, "loss": 2.0484, "step": 4970 }, { "epoch": 0.38, "learning_rate": 6.203400167721278e-06, "loss": 2.0398, "step": 4980 }, { "epoch": 0.38, "learning_rate": 6.195776473278951e-06, "loss": 2.0304, "step": 4990 }, { "epoch": 0.38, "learning_rate": 6.1881527788366245e-06, "loss": 2.0255, "step": 5000 }, { "epoch": 0.38, "learning_rate": 6.180529084394298e-06, "loss": 2.0346, "step": 5010 }, { "epoch": 0.38, "learning_rate": 6.172905389951971e-06, "loss": 2.0393, "step": 5020 }, { "epoch": 0.38, "learning_rate": 6.165281695509644e-06, "loss": 2.0346, "step": 5030 }, { "epoch": 0.38, "learning_rate": 6.157658001067318e-06, "loss": 2.0217, "step": 5040 }, { "epoch": 0.38, "learning_rate": 6.150034306624991e-06, "loss": 2.0355, "step": 5050 }, { "epoch": 0.39, "learning_rate": 6.142410612182664e-06, "loss": 2.035, "step": 5060 }, { "epoch": 0.39, "learning_rate": 6.134786917740337e-06, "loss": 2.0451, "step": 5070 }, { "epoch": 0.39, "learning_rate": 6.12716322329801e-06, "loss": 2.0346, "step": 5080 }, { "epoch": 0.39, "learning_rate": 6.1195395288556835e-06, "loss": 2.0338, "step": 5090 }, { "epoch": 0.39, "learning_rate": 6.111915834413357e-06, "loss": 2.0347, "step": 5100 }, { "epoch": 0.39, "learning_rate": 6.104292139971031e-06, "loss": 2.0337, "step": 5110 }, { "epoch": 0.39, "learning_rate": 6.096668445528704e-06, "loss": 2.0334, "step": 5120 }, { "epoch": 0.39, "learning_rate": 6.089044751086377e-06, "loss": 2.0412, "step": 5130 }, { "epoch": 0.39, "learning_rate": 6.08142105664405e-06, "loss": 2.0336, "step": 5140 }, { "epoch": 0.39, "learning_rate": 6.073797362201723e-06, "loss": 2.0422, "step": 5150 }, { "epoch": 0.39, "learning_rate": 6.066173667759396e-06, "loss": 2.0469, "step": 5160 }, { "epoch": 0.39, "learning_rate": 6.0585499733170694e-06, "loss": 2.0363, "step": 5170 }, { "epoch": 0.39, "learning_rate": 6.050926278874743e-06, "loss": 2.0424, "step": 5180 }, { "epoch": 0.4, "learning_rate": 6.0433025844324165e-06, "loss": 2.03, "step": 5190 }, { "epoch": 0.4, "learning_rate": 6.03567888999009e-06, "loss": 2.0365, "step": 5200 }, { "epoch": 0.4, "learning_rate": 6.028055195547763e-06, "loss": 2.0434, "step": 5210 }, { "epoch": 0.4, "learning_rate": 6.020431501105436e-06, "loss": 2.0355, "step": 5220 }, { "epoch": 0.4, "learning_rate": 6.012807806663109e-06, "loss": 2.0566, "step": 5230 }, { "epoch": 0.4, "learning_rate": 6.005184112220783e-06, "loss": 2.0328, "step": 5240 }, { "epoch": 0.4, "learning_rate": 5.997560417778456e-06, "loss": 2.0492, "step": 5250 }, { "epoch": 0.4, "learning_rate": 5.989936723336129e-06, "loss": 2.0398, "step": 5260 }, { "epoch": 0.4, "learning_rate": 5.9823130288938024e-06, "loss": 2.0492, "step": 5270 }, { "epoch": 0.4, "learning_rate": 5.9746893344514756e-06, "loss": 2.0533, "step": 5280 }, { "epoch": 0.4, "learning_rate": 5.967065640009149e-06, "loss": 2.041, "step": 5290 }, { "epoch": 0.4, "learning_rate": 5.959441945566822e-06, "loss": 2.05, "step": 5300 }, { "epoch": 0.4, "learning_rate": 5.951818251124496e-06, "loss": 2.048, "step": 5310 }, { "epoch": 0.41, "learning_rate": 5.944194556682169e-06, "loss": 2.0496, "step": 5320 }, { "epoch": 0.41, "learning_rate": 5.936570862239842e-06, "loss": 2.0385, "step": 5330 }, { "epoch": 0.41, "learning_rate": 5.928947167797515e-06, "loss": 2.0449, "step": 5340 }, { "epoch": 0.41, "learning_rate": 5.921323473355188e-06, "loss": 2.0557, "step": 5350 }, { "epoch": 0.41, "learning_rate": 5.9136997789128614e-06, "loss": 2.0461, "step": 5360 }, { "epoch": 0.41, "learning_rate": 5.906076084470535e-06, "loss": 2.0613, "step": 5370 }, { "epoch": 0.41, "learning_rate": 5.8984523900282086e-06, "loss": 2.0502, "step": 5380 }, { "epoch": 0.41, "learning_rate": 5.890828695585882e-06, "loss": 2.0527, "step": 5390 }, { "epoch": 0.41, "learning_rate": 5.883205001143555e-06, "loss": 2.0432, "step": 5400 }, { "epoch": 0.41, "learning_rate": 5.875581306701228e-06, "loss": 2.0625, "step": 5410 }, { "epoch": 0.41, "learning_rate": 5.867957612258901e-06, "loss": 2.0451, "step": 5420 }, { "epoch": 0.41, "learning_rate": 5.860333917816574e-06, "loss": 2.0502, "step": 5430 }, { "epoch": 0.41, "learning_rate": 5.852710223374247e-06, "loss": 2.0471, "step": 5440 }, { "epoch": 0.42, "learning_rate": 5.845086528931921e-06, "loss": 2.0594, "step": 5450 }, { "epoch": 0.42, "learning_rate": 5.8374628344895944e-06, "loss": 2.0691, "step": 5460 }, { "epoch": 0.42, "learning_rate": 5.8298391400472676e-06, "loss": 2.0635, "step": 5470 }, { "epoch": 0.42, "learning_rate": 5.822215445604941e-06, "loss": 2.0578, "step": 5480 }, { "epoch": 0.42, "learning_rate": 5.814591751162614e-06, "loss": 2.0492, "step": 5490 }, { "epoch": 0.42, "learning_rate": 5.806968056720287e-06, "loss": 2.0477, "step": 5500 }, { "epoch": 0.42, "learning_rate": 5.79934436227796e-06, "loss": 2.0588, "step": 5510 }, { "epoch": 0.42, "learning_rate": 5.791720667835634e-06, "loss": 2.0828, "step": 5520 }, { "epoch": 0.42, "learning_rate": 5.784096973393307e-06, "loss": 2.0654, "step": 5530 }, { "epoch": 0.42, "learning_rate": 5.77647327895098e-06, "loss": 2.0557, "step": 5540 }, { "epoch": 0.42, "learning_rate": 5.7688495845086535e-06, "loss": 2.0631, "step": 5550 }, { "epoch": 0.42, "learning_rate": 5.761225890066327e-06, "loss": 2.0498, "step": 5560 }, { "epoch": 0.42, "learning_rate": 5.753602195624e-06, "loss": 2.0621, "step": 5570 }, { "epoch": 0.43, "learning_rate": 5.745978501181673e-06, "loss": 2.0678, "step": 5580 }, { "epoch": 0.43, "learning_rate": 5.738354806739347e-06, "loss": 2.0576, "step": 5590 }, { "epoch": 0.43, "learning_rate": 5.73073111229702e-06, "loss": 2.0664, "step": 5600 }, { "epoch": 0.43, "learning_rate": 5.723107417854693e-06, "loss": 2.0656, "step": 5610 }, { "epoch": 0.43, "learning_rate": 5.715483723412366e-06, "loss": 2.0602, "step": 5620 }, { "epoch": 0.43, "learning_rate": 5.707860028970039e-06, "loss": 2.0629, "step": 5630 }, { "epoch": 0.43, "learning_rate": 5.7002363345277125e-06, "loss": 2.0559, "step": 5640 }, { "epoch": 0.43, "learning_rate": 5.692612640085386e-06, "loss": 2.0746, "step": 5650 }, { "epoch": 0.43, "learning_rate": 5.68498894564306e-06, "loss": 2.0811, "step": 5660 }, { "epoch": 0.43, "learning_rate": 5.677365251200733e-06, "loss": 2.066, "step": 5670 }, { "epoch": 0.43, "learning_rate": 5.669741556758406e-06, "loss": 2.067, "step": 5680 }, { "epoch": 0.43, "learning_rate": 5.662117862316079e-06, "loss": 2.0666, "step": 5690 }, { "epoch": 0.43, "learning_rate": 5.654494167873752e-06, "loss": 2.0752, "step": 5700 }, { "epoch": 0.44, "learning_rate": 5.646870473431425e-06, "loss": 2.0674, "step": 5710 }, { "epoch": 0.44, "learning_rate": 5.639246778989098e-06, "loss": 2.0709, "step": 5720 }, { "epoch": 0.44, "learning_rate": 5.631623084546772e-06, "loss": 2.0773, "step": 5730 }, { "epoch": 0.44, "learning_rate": 5.6239993901044455e-06, "loss": 2.0711, "step": 5740 }, { "epoch": 0.44, "learning_rate": 5.616375695662119e-06, "loss": 2.0736, "step": 5750 }, { "epoch": 0.44, "learning_rate": 5.608752001219792e-06, "loss": 2.0818, "step": 5760 }, { "epoch": 0.44, "learning_rate": 5.601128306777465e-06, "loss": 2.0799, "step": 5770 }, { "epoch": 0.44, "learning_rate": 5.593504612335138e-06, "loss": 2.0695, "step": 5780 }, { "epoch": 0.44, "learning_rate": 5.585880917892811e-06, "loss": 2.0869, "step": 5790 }, { "epoch": 0.44, "learning_rate": 5.578257223450485e-06, "loss": 2.0715, "step": 5800 }, { "epoch": 0.44, "learning_rate": 5.570633529008158e-06, "loss": 2.0811, "step": 5810 }, { "epoch": 0.44, "learning_rate": 5.563009834565831e-06, "loss": 2.0723, "step": 5820 }, { "epoch": 0.44, "learning_rate": 5.5553861401235045e-06, "loss": 2.076, "step": 5830 }, { "epoch": 0.45, "learning_rate": 5.547762445681178e-06, "loss": 2.0818, "step": 5840 }, { "epoch": 0.45, "learning_rate": 5.540138751238851e-06, "loss": 2.0766, "step": 5850 }, { "epoch": 0.45, "learning_rate": 5.532515056796524e-06, "loss": 2.0785, "step": 5860 }, { "epoch": 0.45, "learning_rate": 5.524891362354198e-06, "loss": 2.0932, "step": 5870 }, { "epoch": 0.45, "learning_rate": 5.517267667911871e-06, "loss": 2.0781, "step": 5880 }, { "epoch": 0.45, "learning_rate": 5.509643973469544e-06, "loss": 2.0846, "step": 5890 }, { "epoch": 0.45, "learning_rate": 5.502020279027217e-06, "loss": 2.0805, "step": 5900 }, { "epoch": 0.45, "learning_rate": 5.49439658458489e-06, "loss": 2.0871, "step": 5910 }, { "epoch": 0.45, "learning_rate": 5.4867728901425635e-06, "loss": 2.076, "step": 5920 }, { "epoch": 0.45, "learning_rate": 5.479149195700237e-06, "loss": 2.0955, "step": 5930 }, { "epoch": 0.45, "learning_rate": 5.471525501257911e-06, "loss": 2.0824, "step": 5940 }, { "epoch": 0.45, "learning_rate": 5.463901806815584e-06, "loss": 2.0908, "step": 5950 }, { "epoch": 0.45, "learning_rate": 5.456278112373257e-06, "loss": 2.0912, "step": 5960 }, { "epoch": 0.46, "learning_rate": 5.44865441793093e-06, "loss": 2.0803, "step": 5970 }, { "epoch": 0.46, "learning_rate": 5.441030723488603e-06, "loss": 2.0879, "step": 5980 }, { "epoch": 0.46, "learning_rate": 5.433407029046276e-06, "loss": 2.0838, "step": 5990 }, { "epoch": 0.46, "learning_rate": 5.425783334603949e-06, "loss": 2.0855, "step": 6000 }, { "epoch": 0.46, "learning_rate": 5.418159640161623e-06, "loss": 2.0756, "step": 6010 }, { "epoch": 0.46, "learning_rate": 5.4105359457192965e-06, "loss": 2.0959, "step": 6020 }, { "epoch": 0.46, "learning_rate": 5.40291225127697e-06, "loss": 2.1043, "step": 6030 }, { "epoch": 0.46, "learning_rate": 5.395288556834643e-06, "loss": 2.0967, "step": 6040 }, { "epoch": 0.46, "learning_rate": 5.387664862392316e-06, "loss": 2.0955, "step": 6050 }, { "epoch": 0.46, "learning_rate": 5.380041167949989e-06, "loss": 2.109, "step": 6060 }, { "epoch": 0.46, "learning_rate": 5.372417473507661e-06, "loss": 2.0945, "step": 6070 }, { "epoch": 0.46, "learning_rate": 5.364793779065336e-06, "loss": 2.0941, "step": 6080 }, { "epoch": 0.46, "learning_rate": 5.357170084623009e-06, "loss": 2.0947, "step": 6090 }, { "epoch": 0.47, "learning_rate": 5.349546390180682e-06, "loss": 2.1063, "step": 6100 }, { "epoch": 0.47, "learning_rate": 5.3419226957383556e-06, "loss": 2.0994, "step": 6110 }, { "epoch": 0.47, "learning_rate": 5.334299001296029e-06, "loss": 2.0977, "step": 6120 }, { "epoch": 0.47, "learning_rate": 5.326675306853701e-06, "loss": 2.1, "step": 6130 }, { "epoch": 0.47, "learning_rate": 5.319051612411374e-06, "loss": 2.1055, "step": 6140 }, { "epoch": 0.47, "learning_rate": 5.311427917969049e-06, "loss": 2.1027, "step": 6150 }, { "epoch": 0.47, "learning_rate": 5.303804223526722e-06, "loss": 2.0982, "step": 6160 }, { "epoch": 0.47, "learning_rate": 5.296180529084395e-06, "loss": 2.1033, "step": 6170 }, { "epoch": 0.47, "learning_rate": 5.288556834642068e-06, "loss": 2.1006, "step": 6180 }, { "epoch": 0.47, "learning_rate": 5.280933140199741e-06, "loss": 2.1041, "step": 6190 }, { "epoch": 0.47, "learning_rate": 5.273309445757414e-06, "loss": 2.1023, "step": 6200 }, { "epoch": 0.47, "learning_rate": 5.265685751315087e-06, "loss": 2.101, "step": 6210 }, { "epoch": 0.47, "learning_rate": 5.258062056872762e-06, "loss": 2.1107, "step": 6220 }, { "epoch": 0.47, "learning_rate": 5.250438362430435e-06, "loss": 2.1088, "step": 6230 }, { "epoch": 0.48, "learning_rate": 5.242814667988108e-06, "loss": 2.1156, "step": 6240 }, { "epoch": 0.48, "learning_rate": 5.235190973545781e-06, "loss": 2.1107, "step": 6250 }, { "epoch": 0.48, "learning_rate": 5.227567279103453e-06, "loss": 2.1043, "step": 6260 }, { "epoch": 0.48, "learning_rate": 5.2199435846611265e-06, "loss": 2.1189, "step": 6270 }, { "epoch": 0.48, "learning_rate": 5.2123198902188e-06, "loss": 2.1127, "step": 6280 }, { "epoch": 0.48, "learning_rate": 5.2046961957764744e-06, "loss": 2.1166, "step": 6290 }, { "epoch": 0.48, "learning_rate": 5.1970725013341476e-06, "loss": 2.1162, "step": 6300 }, { "epoch": 0.48, "learning_rate": 5.189448806891821e-06, "loss": 2.1133, "step": 6310 }, { "epoch": 0.48, "learning_rate": 5.181825112449493e-06, "loss": 2.1123, "step": 6320 }, { "epoch": 0.48, "learning_rate": 5.174201418007166e-06, "loss": 2.1129, "step": 6330 }, { "epoch": 0.48, "learning_rate": 5.166577723564839e-06, "loss": 2.1121, "step": 6340 }, { "epoch": 0.48, "learning_rate": 5.158954029122512e-06, "loss": 2.1117, "step": 6350 }, { "epoch": 0.48, "learning_rate": 5.151330334680187e-06, "loss": 2.1299, "step": 6360 }, { "epoch": 0.49, "learning_rate": 5.14370664023786e-06, "loss": 2.1189, "step": 6370 }, { "epoch": 0.49, "learning_rate": 5.136082945795533e-06, "loss": 2.115, "step": 6380 }, { "epoch": 0.49, "learning_rate": 5.128459251353206e-06, "loss": 2.1154, "step": 6390 }, { "epoch": 0.49, "learning_rate": 5.120835556910879e-06, "loss": 2.1184, "step": 6400 }, { "epoch": 0.49, "learning_rate": 5.113211862468552e-06, "loss": 2.1283, "step": 6410 }, { "epoch": 0.49, "learning_rate": 5.105588168026227e-06, "loss": 2.1189, "step": 6420 }, { "epoch": 0.49, "learning_rate": 5.0979644735839e-06, "loss": 2.1111, "step": 6430 }, { "epoch": 0.49, "learning_rate": 5.090340779141572e-06, "loss": 2.1221, "step": 6440 }, { "epoch": 0.49, "learning_rate": 5.082717084699245e-06, "loss": 2.1205, "step": 6450 }, { "epoch": 0.49, "learning_rate": 5.0750933902569185e-06, "loss": 2.1203, "step": 6460 }, { "epoch": 0.49, "learning_rate": 5.067469695814592e-06, "loss": 2.1207, "step": 6470 }, { "epoch": 0.49, "learning_rate": 5.059846001372265e-06, "loss": 2.1307, "step": 6480 }, { "epoch": 0.49, "learning_rate": 5.05222230692994e-06, "loss": 2.1279, "step": 6490 }, { "epoch": 0.5, "learning_rate": 5.044598612487612e-06, "loss": 2.1328, "step": 6500 }, { "epoch": 0.5, "learning_rate": 5.036974918045285e-06, "loss": 2.1258, "step": 6510 }, { "epoch": 0.5, "learning_rate": 5.029351223602958e-06, "loss": 2.1244, "step": 6520 }, { "epoch": 0.5, "learning_rate": 5.021727529160631e-06, "loss": 2.1348, "step": 6530 }, { "epoch": 0.5, "learning_rate": 5.014103834718304e-06, "loss": 2.127, "step": 6540 }, { "epoch": 0.5, "learning_rate": 5.0064801402759775e-06, "loss": 2.1283, "step": 6550 }, { "epoch": 0.5, "learning_rate": 4.9988564458336515e-06, "loss": 2.1297, "step": 6560 }, { "epoch": 0.5, "learning_rate": 4.991232751391325e-06, "loss": 2.1398, "step": 6570 }, { "epoch": 0.5, "learning_rate": 4.983609056948998e-06, "loss": 2.1322, "step": 6580 }, { "epoch": 0.5, "learning_rate": 4.975985362506671e-06, "loss": 2.1365, "step": 6590 }, { "epoch": 0.5, "learning_rate": 4.968361668064344e-06, "loss": 2.1221, "step": 6600 }, { "epoch": 0.5, "learning_rate": 4.960737973622017e-06, "loss": 2.1385, "step": 6610 }, { "epoch": 0.5, "learning_rate": 4.953114279179691e-06, "loss": 2.1387, "step": 6620 }, { "epoch": 0.51, "learning_rate": 4.945490584737364e-06, "loss": 2.1324, "step": 6630 }, { "epoch": 0.51, "learning_rate": 4.937866890295037e-06, "loss": 2.1234, "step": 6640 }, { "epoch": 0.51, "learning_rate": 4.9302431958527105e-06, "loss": 2.1393, "step": 6650 }, { "epoch": 0.51, "learning_rate": 4.922619501410384e-06, "loss": 2.1354, "step": 6660 }, { "epoch": 0.51, "learning_rate": 4.914995806968057e-06, "loss": 2.1361, "step": 6670 }, { "epoch": 0.51, "learning_rate": 4.907372112525731e-06, "loss": 2.1305, "step": 6680 }, { "epoch": 0.51, "learning_rate": 4.899748418083404e-06, "loss": 2.1346, "step": 6690 }, { "epoch": 0.51, "learning_rate": 4.892124723641077e-06, "loss": 2.1426, "step": 6700 }, { "epoch": 0.51, "learning_rate": 4.88450102919875e-06, "loss": 2.1357, "step": 6710 }, { "epoch": 0.51, "learning_rate": 4.876877334756423e-06, "loss": 2.1373, "step": 6720 }, { "epoch": 0.51, "learning_rate": 4.869253640314096e-06, "loss": 2.1367, "step": 6730 }, { "epoch": 0.51, "learning_rate": 4.8616299458717695e-06, "loss": 2.1273, "step": 6740 }, { "epoch": 0.51, "learning_rate": 4.8540062514294435e-06, "loss": 2.1436, "step": 6750 }, { "epoch": 0.52, "learning_rate": 4.846382556987117e-06, "loss": 2.1396, "step": 6760 }, { "epoch": 0.52, "learning_rate": 4.83875886254479e-06, "loss": 2.1492, "step": 6770 }, { "epoch": 0.52, "learning_rate": 4.831135168102463e-06, "loss": 2.1594, "step": 6780 }, { "epoch": 0.52, "learning_rate": 4.823511473660136e-06, "loss": 2.1484, "step": 6790 }, { "epoch": 0.52, "learning_rate": 4.815887779217809e-06, "loss": 2.1502, "step": 6800 }, { "epoch": 0.52, "learning_rate": 4.808264084775482e-06, "loss": 2.1383, "step": 6810 }, { "epoch": 0.52, "learning_rate": 4.800640390333156e-06, "loss": 2.1465, "step": 6820 }, { "epoch": 0.52, "learning_rate": 4.793016695890829e-06, "loss": 2.1533, "step": 6830 }, { "epoch": 0.52, "learning_rate": 4.7853930014485025e-06, "loss": 2.1541, "step": 6840 }, { "epoch": 0.52, "learning_rate": 4.777769307006176e-06, "loss": 2.149, "step": 6850 }, { "epoch": 0.52, "learning_rate": 4.770145612563849e-06, "loss": 2.1529, "step": 6860 }, { "epoch": 0.52, "learning_rate": 4.762521918121522e-06, "loss": 2.1562, "step": 6870 }, { "epoch": 0.52, "learning_rate": 4.754898223679195e-06, "loss": 2.1553, "step": 6880 }, { "epoch": 0.53, "learning_rate": 4.747274529236869e-06, "loss": 2.1527, "step": 6890 }, { "epoch": 0.53, "learning_rate": 4.739650834794542e-06, "loss": 2.1514, "step": 6900 }, { "epoch": 0.53, "learning_rate": 4.732027140352215e-06, "loss": 2.1623, "step": 6910 }, { "epoch": 0.53, "learning_rate": 4.7244034459098884e-06, "loss": 2.1709, "step": 6920 }, { "epoch": 0.53, "learning_rate": 4.7167797514675616e-06, "loss": 2.1619, "step": 6930 }, { "epoch": 0.53, "learning_rate": 4.709156057025235e-06, "loss": 2.1502, "step": 6940 }, { "epoch": 0.53, "learning_rate": 4.701532362582908e-06, "loss": 2.1461, "step": 6950 }, { "epoch": 0.53, "learning_rate": 4.693908668140582e-06, "loss": 2.1596, "step": 6960 }, { "epoch": 0.53, "learning_rate": 4.686284973698255e-06, "loss": 2.1648, "step": 6970 }, { "epoch": 0.53, "learning_rate": 4.678661279255928e-06, "loss": 2.1654, "step": 6980 }, { "epoch": 0.53, "learning_rate": 4.671037584813601e-06, "loss": 2.1627, "step": 6990 }, { "epoch": 0.53, "learning_rate": 4.663413890371274e-06, "loss": 2.1688, "step": 7000 }, { "epoch": 0.53, "learning_rate": 4.6557901959289475e-06, "loss": 2.1623, "step": 7010 }, { "epoch": 0.54, "learning_rate": 4.648166501486621e-06, "loss": 2.1662, "step": 7020 }, { "epoch": 0.54, "learning_rate": 4.6405428070442946e-06, "loss": 2.1654, "step": 7030 }, { "epoch": 0.54, "learning_rate": 4.632919112601968e-06, "loss": 2.1582, "step": 7040 }, { "epoch": 0.54, "learning_rate": 4.625295418159641e-06, "loss": 2.1562, "step": 7050 }, { "epoch": 0.54, "learning_rate": 4.617671723717314e-06, "loss": 2.1633, "step": 7060 }, { "epoch": 0.54, "learning_rate": 4.610048029274987e-06, "loss": 2.1588, "step": 7070 }, { "epoch": 0.54, "learning_rate": 4.60242433483266e-06, "loss": 2.1645, "step": 7080 }, { "epoch": 0.54, "learning_rate": 4.594800640390333e-06, "loss": 2.1621, "step": 7090 }, { "epoch": 0.54, "learning_rate": 4.587176945948007e-06, "loss": 2.1658, "step": 7100 }, { "epoch": 0.54, "learning_rate": 4.5795532515056805e-06, "loss": 2.1678, "step": 7110 }, { "epoch": 0.54, "learning_rate": 4.571929557063354e-06, "loss": 2.1645, "step": 7120 }, { "epoch": 0.54, "learning_rate": 4.564305862621026e-06, "loss": 2.1699, "step": 7130 }, { "epoch": 0.54, "learning_rate": 4.5566821681787e-06, "loss": 2.1736, "step": 7140 }, { "epoch": 0.55, "learning_rate": 4.549058473736373e-06, "loss": 2.167, "step": 7150 }, { "epoch": 0.55, "learning_rate": 4.541434779294046e-06, "loss": 2.177, "step": 7160 }, { "epoch": 0.55, "learning_rate": 4.53381108485172e-06, "loss": 2.1752, "step": 7170 }, { "epoch": 0.55, "learning_rate": 4.526187390409393e-06, "loss": 2.1721, "step": 7180 }, { "epoch": 0.55, "learning_rate": 4.5185636959670655e-06, "loss": 2.1844, "step": 7190 }, { "epoch": 0.55, "learning_rate": 4.510940001524739e-06, "loss": 2.1762, "step": 7200 }, { "epoch": 0.55, "learning_rate": 4.503316307082413e-06, "loss": 2.182, "step": 7210 }, { "epoch": 0.55, "learning_rate": 4.495692612640086e-06, "loss": 2.1824, "step": 7220 }, { "epoch": 0.55, "learning_rate": 4.488068918197759e-06, "loss": 2.175, "step": 7230 }, { "epoch": 0.55, "learning_rate": 4.480445223755433e-06, "loss": 2.1848, "step": 7240 }, { "epoch": 0.55, "learning_rate": 4.472821529313105e-06, "loss": 2.1758, "step": 7250 }, { "epoch": 0.55, "learning_rate": 4.465197834870778e-06, "loss": 2.1885, "step": 7260 }, { "epoch": 0.55, "learning_rate": 4.457574140428452e-06, "loss": 2.1809, "step": 7270 }, { "epoch": 0.56, "learning_rate": 4.449950445986125e-06, "loss": 2.1908, "step": 7280 }, { "epoch": 0.56, "learning_rate": 4.4423267515437985e-06, "loss": 2.183, "step": 7290 }, { "epoch": 0.56, "learning_rate": 4.434703057101472e-06, "loss": 2.1826, "step": 7300 }, { "epoch": 0.56, "learning_rate": 4.427079362659145e-06, "loss": 2.1908, "step": 7310 }, { "epoch": 0.56, "learning_rate": 4.419455668216818e-06, "loss": 2.1854, "step": 7320 }, { "epoch": 0.56, "learning_rate": 4.411831973774491e-06, "loss": 2.1916, "step": 7330 }, { "epoch": 0.56, "learning_rate": 4.404208279332165e-06, "loss": 2.1836, "step": 7340 }, { "epoch": 0.56, "learning_rate": 4.396584584889838e-06, "loss": 2.1855, "step": 7350 }, { "epoch": 0.56, "learning_rate": 4.388960890447511e-06, "loss": 2.1895, "step": 7360 }, { "epoch": 0.56, "learning_rate": 4.381337196005184e-06, "loss": 2.1855, "step": 7370 }, { "epoch": 0.56, "learning_rate": 4.3737135015628575e-06, "loss": 2.1826, "step": 7380 }, { "epoch": 0.56, "learning_rate": 4.366089807120531e-06, "loss": 2.1926, "step": 7390 }, { "epoch": 0.56, "learning_rate": 4.358466112678204e-06, "loss": 2.1895, "step": 7400 }, { "epoch": 0.56, "learning_rate": 4.350842418235878e-06, "loss": 2.1973, "step": 7410 }, { "epoch": 0.57, "learning_rate": 4.343218723793551e-06, "loss": 2.1936, "step": 7420 }, { "epoch": 0.57, "learning_rate": 4.335595029351224e-06, "loss": 2.1982, "step": 7430 }, { "epoch": 0.57, "learning_rate": 4.327971334908897e-06, "loss": 2.1973, "step": 7440 }, { "epoch": 0.57, "learning_rate": 4.32034764046657e-06, "loss": 2.1963, "step": 7450 }, { "epoch": 0.57, "learning_rate": 4.312723946024243e-06, "loss": 2.193, "step": 7460 }, { "epoch": 0.57, "learning_rate": 4.3051002515819165e-06, "loss": 2.2084, "step": 7470 }, { "epoch": 0.57, "learning_rate": 4.2974765571395905e-06, "loss": 2.2049, "step": 7480 }, { "epoch": 0.57, "learning_rate": 4.289852862697264e-06, "loss": 2.1889, "step": 7490 }, { "epoch": 0.57, "learning_rate": 4.282229168254937e-06, "loss": 2.1986, "step": 7500 }, { "epoch": 0.57, "learning_rate": 4.27460547381261e-06, "loss": 2.1945, "step": 7510 }, { "epoch": 0.57, "learning_rate": 4.266981779370283e-06, "loss": 2.1998, "step": 7520 }, { "epoch": 0.57, "learning_rate": 4.259358084927956e-06, "loss": 2.1986, "step": 7530 }, { "epoch": 0.57, "learning_rate": 4.251734390485629e-06, "loss": 2.201, "step": 7540 }, { "epoch": 0.58, "learning_rate": 4.244110696043303e-06, "loss": 2.2078, "step": 7550 }, { "epoch": 0.58, "learning_rate": 4.236487001600976e-06, "loss": 2.2041, "step": 7560 }, { "epoch": 0.58, "learning_rate": 4.2288633071586495e-06, "loss": 2.2051, "step": 7570 }, { "epoch": 0.58, "learning_rate": 4.221239612716323e-06, "loss": 2.207, "step": 7580 }, { "epoch": 0.58, "learning_rate": 4.213615918273996e-06, "loss": 2.2041, "step": 7590 }, { "epoch": 0.58, "learning_rate": 4.205992223831669e-06, "loss": 2.2074, "step": 7600 }, { "epoch": 0.58, "learning_rate": 4.198368529389342e-06, "loss": 2.2004, "step": 7610 }, { "epoch": 0.58, "learning_rate": 4.190744834947016e-06, "loss": 2.2121, "step": 7620 }, { "epoch": 0.58, "learning_rate": 4.183121140504689e-06, "loss": 2.2131, "step": 7630 }, { "epoch": 0.58, "learning_rate": 4.175497446062362e-06, "loss": 2.2131, "step": 7640 }, { "epoch": 0.58, "learning_rate": 4.1678737516200354e-06, "loss": 2.2043, "step": 7650 }, { "epoch": 0.58, "learning_rate": 4.1602500571777086e-06, "loss": 2.2057, "step": 7660 }, { "epoch": 0.58, "learning_rate": 4.152626362735382e-06, "loss": 2.2066, "step": 7670 }, { "epoch": 0.59, "learning_rate": 4.145002668293055e-06, "loss": 2.2213, "step": 7680 }, { "epoch": 0.59, "learning_rate": 4.137378973850729e-06, "loss": 2.2234, "step": 7690 }, { "epoch": 0.59, "learning_rate": 4.129755279408402e-06, "loss": 2.2174, "step": 7700 }, { "epoch": 0.59, "learning_rate": 4.122131584966075e-06, "loss": 2.2186, "step": 7710 }, { "epoch": 0.59, "learning_rate": 4.114507890523748e-06, "loss": 2.2115, "step": 7720 }, { "epoch": 0.59, "learning_rate": 4.106884196081421e-06, "loss": 2.2293, "step": 7730 }, { "epoch": 0.59, "learning_rate": 4.0992605016390945e-06, "loss": 2.2129, "step": 7740 }, { "epoch": 0.59, "learning_rate": 4.091636807196768e-06, "loss": 2.2178, "step": 7750 }, { "epoch": 0.59, "learning_rate": 4.0840131127544416e-06, "loss": 2.2182, "step": 7760 }, { "epoch": 0.59, "learning_rate": 4.076389418312115e-06, "loss": 2.227, "step": 7770 }, { "epoch": 0.59, "learning_rate": 4.068765723869788e-06, "loss": 2.2273, "step": 7780 }, { "epoch": 0.59, "learning_rate": 4.061142029427461e-06, "loss": 2.2225, "step": 7790 }, { "epoch": 0.59, "learning_rate": 4.053518334985134e-06, "loss": 2.2307, "step": 7800 }, { "epoch": 0.6, "learning_rate": 4.045894640542807e-06, "loss": 2.2209, "step": 7810 }, { "epoch": 0.6, "learning_rate": 4.03827094610048e-06, "loss": 2.2314, "step": 7820 }, { "epoch": 0.6, "learning_rate": 4.030647251658154e-06, "loss": 2.2258, "step": 7830 }, { "epoch": 0.6, "learning_rate": 4.0230235572158275e-06, "loss": 2.233, "step": 7840 }, { "epoch": 0.6, "learning_rate": 4.015399862773501e-06, "loss": 2.2275, "step": 7850 }, { "epoch": 0.6, "learning_rate": 4.007776168331174e-06, "loss": 2.2271, "step": 7860 }, { "epoch": 0.6, "learning_rate": 4.000152473888847e-06, "loss": 2.2324, "step": 7870 }, { "epoch": 0.6, "learning_rate": 3.99252877944652e-06, "loss": 2.2418, "step": 7880 }, { "epoch": 0.6, "learning_rate": 3.984905085004193e-06, "loss": 2.2348, "step": 7890 }, { "epoch": 0.6, "learning_rate": 3.977281390561867e-06, "loss": 2.2416, "step": 7900 }, { "epoch": 0.6, "learning_rate": 3.96965769611954e-06, "loss": 2.2408, "step": 7910 }, { "epoch": 0.6, "learning_rate": 3.962034001677213e-06, "loss": 2.2395, "step": 7920 }, { "epoch": 0.6, "learning_rate": 3.9544103072348865e-06, "loss": 2.2328, "step": 7930 }, { "epoch": 0.61, "learning_rate": 3.94678661279256e-06, "loss": 2.2471, "step": 7940 }, { "epoch": 0.61, "learning_rate": 3.939162918350233e-06, "loss": 2.2352, "step": 7950 }, { "epoch": 0.61, "learning_rate": 3.931539223907906e-06, "loss": 2.2406, "step": 7960 }, { "epoch": 0.61, "learning_rate": 3.92391552946558e-06, "loss": 2.2322, "step": 7970 }, { "epoch": 0.61, "learning_rate": 3.916291835023253e-06, "loss": 2.2447, "step": 7980 }, { "epoch": 0.61, "learning_rate": 3.908668140580926e-06, "loss": 2.2412, "step": 7990 }, { "epoch": 0.61, "learning_rate": 3.901044446138599e-06, "loss": 2.2412, "step": 8000 }, { "epoch": 0.61, "learning_rate": 3.893420751696272e-06, "loss": 2.2361, "step": 8010 }, { "epoch": 0.61, "learning_rate": 3.8857970572539455e-06, "loss": 2.2393, "step": 8020 }, { "epoch": 0.61, "learning_rate": 3.878173362811619e-06, "loss": 2.2414, "step": 8030 }, { "epoch": 0.61, "learning_rate": 3.870549668369293e-06, "loss": 2.2477, "step": 8040 }, { "epoch": 0.61, "learning_rate": 3.862925973926966e-06, "loss": 2.2484, "step": 8050 }, { "epoch": 0.61, "learning_rate": 3.855302279484638e-06, "loss": 2.2463, "step": 8060 }, { "epoch": 0.62, "learning_rate": 3.847678585042312e-06, "loss": 2.2463, "step": 8070 }, { "epoch": 0.62, "learning_rate": 3.840054890599985e-06, "loss": 2.2387, "step": 8080 }, { "epoch": 0.62, "learning_rate": 3.832431196157658e-06, "loss": 2.2482, "step": 8090 }, { "epoch": 0.62, "learning_rate": 3.824807501715331e-06, "loss": 2.249, "step": 8100 }, { "epoch": 0.62, "learning_rate": 3.817183807273005e-06, "loss": 2.251, "step": 8110 }, { "epoch": 0.62, "learning_rate": 3.809560112830678e-06, "loss": 2.2467, "step": 8120 }, { "epoch": 0.62, "learning_rate": 3.801936418388351e-06, "loss": 2.2529, "step": 8130 }, { "epoch": 0.62, "learning_rate": 3.7943127239460248e-06, "loss": 2.25, "step": 8140 }, { "epoch": 0.62, "learning_rate": 3.786689029503698e-06, "loss": 2.2533, "step": 8150 }, { "epoch": 0.62, "learning_rate": 3.779065335061371e-06, "loss": 2.2539, "step": 8160 }, { "epoch": 0.62, "learning_rate": 3.771441640619044e-06, "loss": 2.2549, "step": 8170 }, { "epoch": 0.62, "learning_rate": 3.7638179461767177e-06, "loss": 2.2439, "step": 8180 }, { "epoch": 0.62, "learning_rate": 3.756194251734391e-06, "loss": 2.2607, "step": 8190 }, { "epoch": 0.63, "learning_rate": 3.748570557292064e-06, "loss": 2.2596, "step": 8200 }, { "epoch": 0.63, "learning_rate": 3.7409468628497375e-06, "loss": 2.2547, "step": 8210 }, { "epoch": 0.63, "learning_rate": 3.7333231684074107e-06, "loss": 2.2695, "step": 8220 }, { "epoch": 0.63, "learning_rate": 3.7256994739650838e-06, "loss": 2.2631, "step": 8230 }, { "epoch": 0.63, "learning_rate": 3.718075779522757e-06, "loss": 2.2543, "step": 8240 }, { "epoch": 0.63, "learning_rate": 3.7104520850804305e-06, "loss": 2.2664, "step": 8250 }, { "epoch": 0.63, "learning_rate": 3.7028283906381036e-06, "loss": 2.2568, "step": 8260 }, { "epoch": 0.63, "learning_rate": 3.6952046961957767e-06, "loss": 2.2645, "step": 8270 }, { "epoch": 0.63, "learning_rate": 3.6875810017534503e-06, "loss": 2.2658, "step": 8280 }, { "epoch": 0.63, "learning_rate": 3.6799573073111234e-06, "loss": 2.2654, "step": 8290 }, { "epoch": 0.63, "learning_rate": 3.6723336128687965e-06, "loss": 2.2682, "step": 8300 }, { "epoch": 0.63, "learning_rate": 3.6647099184264697e-06, "loss": 2.2715, "step": 8310 }, { "epoch": 0.63, "learning_rate": 3.6570862239841432e-06, "loss": 2.2707, "step": 8320 }, { "epoch": 0.64, "learning_rate": 3.6494625295418164e-06, "loss": 2.2648, "step": 8330 }, { "epoch": 0.64, "learning_rate": 3.6418388350994895e-06, "loss": 2.2693, "step": 8340 }, { "epoch": 0.64, "learning_rate": 3.634215140657163e-06, "loss": 2.2752, "step": 8350 }, { "epoch": 0.64, "learning_rate": 3.626591446214836e-06, "loss": 2.2711, "step": 8360 }, { "epoch": 0.64, "learning_rate": 3.6189677517725093e-06, "loss": 2.2746, "step": 8370 }, { "epoch": 0.64, "learning_rate": 3.6113440573301824e-06, "loss": 2.2654, "step": 8380 }, { "epoch": 0.64, "learning_rate": 3.603720362887856e-06, "loss": 2.2721, "step": 8390 }, { "epoch": 0.64, "learning_rate": 3.596096668445529e-06, "loss": 2.2746, "step": 8400 }, { "epoch": 0.64, "learning_rate": 3.5884729740032022e-06, "loss": 2.2684, "step": 8410 }, { "epoch": 0.64, "learning_rate": 3.580849279560876e-06, "loss": 2.2781, "step": 8420 }, { "epoch": 0.64, "learning_rate": 3.573225585118549e-06, "loss": 2.2775, "step": 8430 }, { "epoch": 0.64, "learning_rate": 3.565601890676222e-06, "loss": 2.2754, "step": 8440 }, { "epoch": 0.64, "learning_rate": 3.5579781962338956e-06, "loss": 2.2756, "step": 8450 }, { "epoch": 0.64, "learning_rate": 3.5503545017915687e-06, "loss": 2.2834, "step": 8460 }, { "epoch": 0.65, "learning_rate": 3.542730807349242e-06, "loss": 2.282, "step": 8470 }, { "epoch": 0.65, "learning_rate": 3.5351071129069146e-06, "loss": 2.2748, "step": 8480 }, { "epoch": 0.65, "learning_rate": 3.5274834184645886e-06, "loss": 2.2865, "step": 8490 }, { "epoch": 0.65, "learning_rate": 3.5198597240222617e-06, "loss": 2.2859, "step": 8500 }, { "epoch": 0.65, "learning_rate": 3.5122360295799344e-06, "loss": 2.2908, "step": 8510 }, { "epoch": 0.65, "learning_rate": 3.5046123351376084e-06, "loss": 2.2895, "step": 8520 }, { "epoch": 0.65, "learning_rate": 3.4969886406952815e-06, "loss": 2.2873, "step": 8530 }, { "epoch": 0.65, "learning_rate": 3.4893649462529542e-06, "loss": 2.2807, "step": 8540 }, { "epoch": 0.65, "learning_rate": 3.4817412518106273e-06, "loss": 2.2889, "step": 8550 }, { "epoch": 0.65, "learning_rate": 3.4741175573683013e-06, "loss": 2.2893, "step": 8560 }, { "epoch": 0.65, "learning_rate": 3.466493862925974e-06, "loss": 2.2877, "step": 8570 }, { "epoch": 0.65, "learning_rate": 3.458870168483647e-06, "loss": 2.2939, "step": 8580 }, { "epoch": 0.65, "learning_rate": 3.451246474041321e-06, "loss": 2.2947, "step": 8590 }, { "epoch": 0.66, "learning_rate": 3.443622779598994e-06, "loss": 2.2932, "step": 8600 }, { "epoch": 0.66, "learning_rate": 3.435999085156667e-06, "loss": 2.2941, "step": 8610 }, { "epoch": 0.66, "learning_rate": 3.42837539071434e-06, "loss": 2.291, "step": 8620 }, { "epoch": 0.66, "learning_rate": 3.4207516962720137e-06, "loss": 2.2855, "step": 8630 }, { "epoch": 0.66, "learning_rate": 3.413128001829687e-06, "loss": 2.2846, "step": 8640 }, { "epoch": 0.66, "learning_rate": 3.40550430738736e-06, "loss": 2.283, "step": 8650 }, { "epoch": 0.66, "learning_rate": 3.3978806129450335e-06, "loss": 2.2908, "step": 8660 }, { "epoch": 0.66, "learning_rate": 3.3902569185027066e-06, "loss": 2.2951, "step": 8670 }, { "epoch": 0.66, "learning_rate": 3.3826332240603797e-06, "loss": 2.2889, "step": 8680 }, { "epoch": 0.66, "learning_rate": 3.375009529618053e-06, "loss": 2.2871, "step": 8690 }, { "epoch": 0.66, "learning_rate": 3.3673858351757264e-06, "loss": 2.2875, "step": 8700 }, { "epoch": 0.66, "learning_rate": 3.3597621407333996e-06, "loss": 2.2971, "step": 8710 }, { "epoch": 0.66, "learning_rate": 3.3521384462910727e-06, "loss": 2.2992, "step": 8720 }, { "epoch": 0.67, "learning_rate": 3.3445147518487462e-06, "loss": 2.2961, "step": 8730 }, { "epoch": 0.67, "learning_rate": 3.3368910574064194e-06, "loss": 2.3055, "step": 8740 }, { "epoch": 0.67, "learning_rate": 3.3292673629640925e-06, "loss": 2.3043, "step": 8750 }, { "epoch": 0.67, "learning_rate": 3.3216436685217656e-06, "loss": 2.3014, "step": 8760 }, { "epoch": 0.67, "learning_rate": 3.314019974079439e-06, "loss": 2.3037, "step": 8770 }, { "epoch": 0.67, "learning_rate": 3.3063962796371123e-06, "loss": 2.2908, "step": 8780 }, { "epoch": 0.67, "learning_rate": 3.2987725851947854e-06, "loss": 2.3041, "step": 8790 }, { "epoch": 0.67, "learning_rate": 3.291148890752459e-06, "loss": 2.3021, "step": 8800 }, { "epoch": 0.67, "learning_rate": 3.283525196310132e-06, "loss": 2.3039, "step": 8810 }, { "epoch": 0.67, "learning_rate": 3.2759015018678053e-06, "loss": 2.3041, "step": 8820 }, { "epoch": 0.67, "learning_rate": 3.2682778074254784e-06, "loss": 2.3012, "step": 8830 }, { "epoch": 0.67, "learning_rate": 3.260654112983152e-06, "loss": 2.2986, "step": 8840 }, { "epoch": 0.67, "learning_rate": 3.253030418540825e-06, "loss": 2.3004, "step": 8850 }, { "epoch": 0.68, "learning_rate": 3.245406724098498e-06, "loss": 2.301, "step": 8860 }, { "epoch": 0.68, "learning_rate": 3.2377830296561718e-06, "loss": 2.3121, "step": 8870 }, { "epoch": 0.68, "learning_rate": 3.230159335213845e-06, "loss": 2.3102, "step": 8880 }, { "epoch": 0.68, "learning_rate": 3.222535640771518e-06, "loss": 2.3115, "step": 8890 }, { "epoch": 0.68, "learning_rate": 3.214911946329191e-06, "loss": 2.31, "step": 8900 }, { "epoch": 0.68, "learning_rate": 3.2072882518868647e-06, "loss": 2.3121, "step": 8910 }, { "epoch": 0.68, "learning_rate": 3.199664557444538e-06, "loss": 2.3137, "step": 8920 }, { "epoch": 0.68, "learning_rate": 3.192040863002211e-06, "loss": 2.309, "step": 8930 }, { "epoch": 0.68, "learning_rate": 3.1844171685598845e-06, "loss": 2.3104, "step": 8940 }, { "epoch": 0.68, "learning_rate": 3.1767934741175576e-06, "loss": 2.308, "step": 8950 }, { "epoch": 0.68, "learning_rate": 3.1691697796752308e-06, "loss": 2.3199, "step": 8960 }, { "epoch": 0.68, "learning_rate": 3.161546085232904e-06, "loss": 2.3139, "step": 8970 }, { "epoch": 0.68, "learning_rate": 3.1539223907905775e-06, "loss": 2.3105, "step": 8980 }, { "epoch": 0.69, "learning_rate": 3.1462986963482506e-06, "loss": 2.3207, "step": 8990 }, { "epoch": 0.69, "learning_rate": 3.1386750019059237e-06, "loss": 2.3186, "step": 9000 }, { "epoch": 0.69, "learning_rate": 3.1310513074635973e-06, "loss": 2.3189, "step": 9010 }, { "epoch": 0.69, "learning_rate": 3.1234276130212704e-06, "loss": 2.3186, "step": 9020 }, { "epoch": 0.69, "learning_rate": 3.1158039185789435e-06, "loss": 2.3158, "step": 9030 }, { "epoch": 0.69, "learning_rate": 3.108180224136617e-06, "loss": 2.3297, "step": 9040 }, { "epoch": 0.69, "learning_rate": 3.1005565296942902e-06, "loss": 2.3268, "step": 9050 }, { "epoch": 0.69, "learning_rate": 3.0929328352519634e-06, "loss": 2.3172, "step": 9060 }, { "epoch": 0.69, "learning_rate": 3.0853091408096365e-06, "loss": 2.3182, "step": 9070 }, { "epoch": 0.69, "learning_rate": 3.07768544636731e-06, "loss": 2.3219, "step": 9080 }, { "epoch": 0.69, "learning_rate": 3.070061751924983e-06, "loss": 2.3189, "step": 9090 }, { "epoch": 0.69, "learning_rate": 3.0624380574826563e-06, "loss": 2.3193, "step": 9100 }, { "epoch": 0.69, "learning_rate": 3.05481436304033e-06, "loss": 2.3289, "step": 9110 }, { "epoch": 0.7, "learning_rate": 3.047190668598003e-06, "loss": 2.3322, "step": 9120 }, { "epoch": 0.7, "learning_rate": 3.039566974155676e-06, "loss": 2.3242, "step": 9130 }, { "epoch": 0.7, "learning_rate": 3.0319432797133492e-06, "loss": 2.3229, "step": 9140 }, { "epoch": 0.7, "learning_rate": 3.024319585271023e-06, "loss": 2.334, "step": 9150 }, { "epoch": 0.7, "learning_rate": 3.016695890828696e-06, "loss": 2.3248, "step": 9160 }, { "epoch": 0.7, "learning_rate": 3.009072196386369e-06, "loss": 2.326, "step": 9170 }, { "epoch": 0.7, "learning_rate": 3.0014485019440426e-06, "loss": 2.3285, "step": 9180 }, { "epoch": 0.7, "learning_rate": 2.9938248075017157e-06, "loss": 2.3281, "step": 9190 }, { "epoch": 0.7, "learning_rate": 2.986201113059389e-06, "loss": 2.3379, "step": 9200 }, { "epoch": 0.7, "learning_rate": 2.978577418617062e-06, "loss": 2.3277, "step": 9210 }, { "epoch": 0.7, "learning_rate": 2.9709537241747356e-06, "loss": 2.3326, "step": 9220 }, { "epoch": 0.7, "learning_rate": 2.9633300297324087e-06, "loss": 2.3279, "step": 9230 }, { "epoch": 0.7, "learning_rate": 2.955706335290082e-06, "loss": 2.3307, "step": 9240 }, { "epoch": 0.71, "learning_rate": 2.9480826408477554e-06, "loss": 2.3354, "step": 9250 }, { "epoch": 0.71, "learning_rate": 2.9404589464054285e-06, "loss": 2.3357, "step": 9260 }, { "epoch": 0.71, "learning_rate": 2.9328352519631016e-06, "loss": 2.3281, "step": 9270 }, { "epoch": 0.71, "learning_rate": 2.9252115575207748e-06, "loss": 2.3297, "step": 9280 }, { "epoch": 0.71, "learning_rate": 2.9175878630784483e-06, "loss": 2.3375, "step": 9290 }, { "epoch": 0.71, "learning_rate": 2.9099641686361214e-06, "loss": 2.342, "step": 9300 }, { "epoch": 0.71, "learning_rate": 2.9023404741937946e-06, "loss": 2.3305, "step": 9310 }, { "epoch": 0.71, "learning_rate": 2.894716779751468e-06, "loss": 2.3375, "step": 9320 }, { "epoch": 0.71, "learning_rate": 2.8870930853091413e-06, "loss": 2.3422, "step": 9330 }, { "epoch": 0.71, "learning_rate": 2.8794693908668144e-06, "loss": 2.3529, "step": 9340 }, { "epoch": 0.71, "learning_rate": 2.871845696424487e-06, "loss": 2.3475, "step": 9350 }, { "epoch": 0.71, "learning_rate": 2.864222001982161e-06, "loss": 2.3412, "step": 9360 }, { "epoch": 0.71, "learning_rate": 2.856598307539834e-06, "loss": 2.3396, "step": 9370 }, { "epoch": 0.72, "learning_rate": 2.848974613097507e-06, "loss": 2.3504, "step": 9380 }, { "epoch": 0.72, "learning_rate": 2.841350918655181e-06, "loss": 2.3422, "step": 9390 }, { "epoch": 0.72, "learning_rate": 2.833727224212854e-06, "loss": 2.3461, "step": 9400 }, { "epoch": 0.72, "learning_rate": 2.8261035297705267e-06, "loss": 2.3482, "step": 9410 }, { "epoch": 0.72, "learning_rate": 2.8184798353282e-06, "loss": 2.3441, "step": 9420 }, { "epoch": 0.72, "learning_rate": 2.810856140885874e-06, "loss": 2.3504, "step": 9430 }, { "epoch": 0.72, "learning_rate": 2.8032324464435465e-06, "loss": 2.3469, "step": 9440 }, { "epoch": 0.72, "learning_rate": 2.7956087520012197e-06, "loss": 2.349, "step": 9450 }, { "epoch": 0.72, "learning_rate": 2.7879850575588937e-06, "loss": 2.3463, "step": 9460 }, { "epoch": 0.72, "learning_rate": 2.7803613631165664e-06, "loss": 2.3475, "step": 9470 }, { "epoch": 0.72, "learning_rate": 2.7727376686742395e-06, "loss": 2.3492, "step": 9480 }, { "epoch": 0.72, "learning_rate": 2.7651139742319126e-06, "loss": 2.3531, "step": 9490 }, { "epoch": 0.72, "learning_rate": 2.757490279789586e-06, "loss": 2.343, "step": 9500 }, { "epoch": 0.73, "learning_rate": 2.7498665853472593e-06, "loss": 2.3504, "step": 9510 }, { "epoch": 0.73, "learning_rate": 2.7422428909049324e-06, "loss": 2.3529, "step": 9520 }, { "epoch": 0.73, "learning_rate": 2.7346191964626064e-06, "loss": 2.3527, "step": 9530 }, { "epoch": 0.73, "learning_rate": 2.726995502020279e-06, "loss": 2.3572, "step": 9540 }, { "epoch": 0.73, "learning_rate": 2.7193718075779523e-06, "loss": 2.3451, "step": 9550 }, { "epoch": 0.73, "learning_rate": 2.7117481131356254e-06, "loss": 2.3551, "step": 9560 }, { "epoch": 0.73, "learning_rate": 2.704124418693299e-06, "loss": 2.359, "step": 9570 }, { "epoch": 0.73, "learning_rate": 2.696500724250972e-06, "loss": 2.352, "step": 9580 }, { "epoch": 0.73, "learning_rate": 2.688877029808645e-06, "loss": 2.3549, "step": 9590 }, { "epoch": 0.73, "learning_rate": 2.6812533353663188e-06, "loss": 2.3525, "step": 9600 }, { "epoch": 0.73, "learning_rate": 2.673629640923992e-06, "loss": 2.3625, "step": 9610 }, { "epoch": 0.73, "learning_rate": 2.666005946481665e-06, "loss": 2.3549, "step": 9620 }, { "epoch": 0.73, "learning_rate": 2.6583822520393386e-06, "loss": 2.365, "step": 9630 }, { "epoch": 0.73, "learning_rate": 2.6507585575970117e-06, "loss": 2.3639, "step": 9640 }, { "epoch": 0.74, "learning_rate": 2.643134863154685e-06, "loss": 2.3607, "step": 9650 }, { "epoch": 0.74, "learning_rate": 2.635511168712358e-06, "loss": 2.3613, "step": 9660 }, { "epoch": 0.74, "learning_rate": 2.6278874742700315e-06, "loss": 2.3684, "step": 9670 }, { "epoch": 0.74, "learning_rate": 2.6202637798277046e-06, "loss": 2.3645, "step": 9680 }, { "epoch": 0.74, "learning_rate": 2.6126400853853778e-06, "loss": 2.3689, "step": 9690 }, { "epoch": 0.74, "learning_rate": 2.6050163909430513e-06, "loss": 2.3691, "step": 9700 }, { "epoch": 0.74, "learning_rate": 2.5973926965007245e-06, "loss": 2.3695, "step": 9710 }, { "epoch": 0.74, "learning_rate": 2.5897690020583976e-06, "loss": 2.3758, "step": 9720 }, { "epoch": 0.74, "learning_rate": 2.5821453076160707e-06, "loss": 2.3701, "step": 9730 }, { "epoch": 0.74, "learning_rate": 2.5745216131737443e-06, "loss": 2.3643, "step": 9740 }, { "epoch": 0.74, "learning_rate": 2.5668979187314174e-06, "loss": 2.3766, "step": 9750 }, { "epoch": 0.74, "learning_rate": 2.5592742242890905e-06, "loss": 2.374, "step": 9760 }, { "epoch": 0.74, "learning_rate": 2.551650529846764e-06, "loss": 2.3727, "step": 9770 }, { "epoch": 0.75, "learning_rate": 2.5440268354044372e-06, "loss": 2.3783, "step": 9780 }, { "epoch": 0.75, "learning_rate": 2.5364031409621103e-06, "loss": 2.3645, "step": 9790 }, { "epoch": 0.75, "learning_rate": 2.5287794465197835e-06, "loss": 2.3775, "step": 9800 }, { "epoch": 0.75, "learning_rate": 2.521155752077457e-06, "loss": 2.3707, "step": 9810 }, { "epoch": 0.75, "learning_rate": 2.51353205763513e-06, "loss": 2.3828, "step": 9820 }, { "epoch": 0.75, "learning_rate": 2.5059083631928033e-06, "loss": 2.3697, "step": 9830 }, { "epoch": 0.75, "learning_rate": 2.498284668750477e-06, "loss": 2.3814, "step": 9840 }, { "epoch": 0.75, "learning_rate": 2.49066097430815e-06, "loss": 2.3709, "step": 9850 }, { "epoch": 0.75, "learning_rate": 2.483037279865823e-06, "loss": 2.3766, "step": 9860 }, { "epoch": 0.75, "learning_rate": 2.4754135854234967e-06, "loss": 2.3768, "step": 9870 }, { "epoch": 0.75, "learning_rate": 2.4677898909811694e-06, "loss": 2.3793, "step": 9880 }, { "epoch": 0.75, "learning_rate": 2.460166196538843e-06, "loss": 2.3777, "step": 9890 }, { "epoch": 0.75, "learning_rate": 2.4525425020965165e-06, "loss": 2.3809, "step": 9900 }, { "epoch": 0.76, "learning_rate": 2.444918807654189e-06, "loss": 2.3795, "step": 9910 }, { "epoch": 0.76, "learning_rate": 2.4372951132118627e-06, "loss": 2.3756, "step": 9920 }, { "epoch": 0.76, "learning_rate": 2.429671418769536e-06, "loss": 2.3805, "step": 9930 }, { "epoch": 0.76, "learning_rate": 2.422047724327209e-06, "loss": 2.3861, "step": 9940 }, { "epoch": 0.76, "learning_rate": 2.414424029884882e-06, "loss": 2.3816, "step": 9950 }, { "epoch": 0.76, "learning_rate": 2.4068003354425557e-06, "loss": 2.3879, "step": 9960 }, { "epoch": 0.76, "learning_rate": 2.399176641000229e-06, "loss": 2.3818, "step": 9970 }, { "epoch": 0.76, "learning_rate": 2.391552946557902e-06, "loss": 2.3775, "step": 9980 }, { "epoch": 0.76, "learning_rate": 2.3839292521155755e-06, "loss": 2.3809, "step": 9990 }, { "epoch": 0.76, "learning_rate": 2.3763055576732486e-06, "loss": 2.3775, "step": 10000 }, { "epoch": 0.76, "learning_rate": 2.3686818632309218e-06, "loss": 2.3885, "step": 10010 }, { "epoch": 0.76, "learning_rate": 2.3610581687885953e-06, "loss": 2.3879, "step": 10020 }, { "epoch": 0.76, "learning_rate": 2.3534344743462684e-06, "loss": 2.385, "step": 10030 }, { "epoch": 0.77, "learning_rate": 2.3458107799039416e-06, "loss": 2.3793, "step": 10040 }, { "epoch": 0.77, "learning_rate": 2.3381870854616147e-06, "loss": 2.3898, "step": 10050 }, { "epoch": 0.77, "learning_rate": 2.3305633910192883e-06, "loss": 2.382, "step": 10060 }, { "epoch": 0.77, "learning_rate": 2.3229396965769614e-06, "loss": 2.39, "step": 10070 }, { "epoch": 0.77, "learning_rate": 2.3153160021346345e-06, "loss": 2.3848, "step": 10080 }, { "epoch": 0.77, "learning_rate": 2.307692307692308e-06, "loss": 2.3936, "step": 10090 }, { "epoch": 0.77, "learning_rate": 2.300068613249981e-06, "loss": 2.3937, "step": 10100 }, { "epoch": 0.77, "learning_rate": 2.2924449188076543e-06, "loss": 2.3939, "step": 10110 }, { "epoch": 0.77, "learning_rate": 2.2848212243653275e-06, "loss": 2.3889, "step": 10120 }, { "epoch": 0.77, "learning_rate": 2.277197529923001e-06, "loss": 2.382, "step": 10130 }, { "epoch": 0.77, "learning_rate": 2.269573835480674e-06, "loss": 2.3936, "step": 10140 }, { "epoch": 0.77, "learning_rate": 2.2619501410383473e-06, "loss": 2.3896, "step": 10150 }, { "epoch": 0.77, "learning_rate": 2.254326446596021e-06, "loss": 2.3908, "step": 10160 }, { "epoch": 0.78, "learning_rate": 2.246702752153694e-06, "loss": 2.3871, "step": 10170 }, { "epoch": 0.78, "learning_rate": 2.239079057711367e-06, "loss": 2.3945, "step": 10180 }, { "epoch": 0.78, "learning_rate": 2.2314553632690402e-06, "loss": 2.4012, "step": 10190 }, { "epoch": 0.78, "learning_rate": 2.2238316688267138e-06, "loss": 2.3996, "step": 10200 }, { "epoch": 0.78, "learning_rate": 2.216207974384387e-06, "loss": 2.3977, "step": 10210 }, { "epoch": 0.78, "learning_rate": 2.20858427994206e-06, "loss": 2.4, "step": 10220 }, { "epoch": 0.78, "learning_rate": 2.2009605854997336e-06, "loss": 2.3959, "step": 10230 }, { "epoch": 0.78, "learning_rate": 2.1933368910574067e-06, "loss": 2.3996, "step": 10240 }, { "epoch": 0.78, "learning_rate": 2.18571319661508e-06, "loss": 2.4045, "step": 10250 }, { "epoch": 0.78, "learning_rate": 2.178089502172753e-06, "loss": 2.3979, "step": 10260 }, { "epoch": 0.78, "learning_rate": 2.1704658077304265e-06, "loss": 2.4029, "step": 10270 }, { "epoch": 0.78, "learning_rate": 2.1628421132880993e-06, "loss": 2.4057, "step": 10280 }, { "epoch": 0.78, "learning_rate": 2.155218418845773e-06, "loss": 2.4002, "step": 10290 }, { "epoch": 0.79, "learning_rate": 2.1475947244034464e-06, "loss": 2.3965, "step": 10300 }, { "epoch": 0.79, "learning_rate": 2.139971029961119e-06, "loss": 2.4021, "step": 10310 }, { "epoch": 0.79, "learning_rate": 2.1323473355187926e-06, "loss": 2.4031, "step": 10320 } ], "max_steps": 13117, "num_train_epochs": 1, "total_flos": 6400995950592000.0, "trial_name": null, "trial_params": null }