bhuvanmdev's picture
Upload folder using huggingface_hub
39fd1db verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7867652664481207,
"global_step": 10320,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.992376305557673e-06,
"loss": 30.7359,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 9.984752611115347e-06,
"loss": 31.0328,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 9.97712891667302e-06,
"loss": 30.2281,
"step": 30
},
{
"epoch": 0.0,
"learning_rate": 9.969505222230695e-06,
"loss": 30.8781,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 9.961881527788367e-06,
"loss": 29.9109,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 9.954257833346041e-06,
"loss": 29.7312,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 9.946634138903714e-06,
"loss": 28.8719,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 9.939010444461386e-06,
"loss": 28.7094,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 9.93138675001906e-06,
"loss": 28.0828,
"step": 90
},
{
"epoch": 0.01,
"learning_rate": 9.923763055576732e-06,
"loss": 27.6328,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 9.916139361134408e-06,
"loss": 26.9016,
"step": 110
},
{
"epoch": 0.01,
"learning_rate": 9.90851566669208e-06,
"loss": 26.3891,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 9.900891972249752e-06,
"loss": 25.8813,
"step": 130
},
{
"epoch": 0.01,
"learning_rate": 9.893268277807426e-06,
"loss": 25.7141,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 9.885644583365099e-06,
"loss": 25.2875,
"step": 150
},
{
"epoch": 0.01,
"learning_rate": 9.878020888922773e-06,
"loss": 24.6219,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 9.870397194480445e-06,
"loss": 24.1156,
"step": 170
},
{
"epoch": 0.01,
"learning_rate": 9.86277350003812e-06,
"loss": 24.0234,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 9.855149805595793e-06,
"loss": 23.6562,
"step": 190
},
{
"epoch": 0.02,
"learning_rate": 9.847526111153465e-06,
"loss": 23.2922,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 9.839902416711139e-06,
"loss": 22.7734,
"step": 210
},
{
"epoch": 0.02,
"learning_rate": 9.832278722268811e-06,
"loss": 22.5281,
"step": 220
},
{
"epoch": 0.02,
"learning_rate": 9.824655027826485e-06,
"loss": 22.0547,
"step": 230
},
{
"epoch": 0.02,
"learning_rate": 9.817031333384158e-06,
"loss": 22.1594,
"step": 240
},
{
"epoch": 0.02,
"learning_rate": 9.809407638941832e-06,
"loss": 21.7297,
"step": 250
},
{
"epoch": 0.02,
"learning_rate": 9.801783944499506e-06,
"loss": 21.2375,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 9.794160250057178e-06,
"loss": 20.6297,
"step": 270
},
{
"epoch": 0.02,
"learning_rate": 9.786536555614852e-06,
"loss": 20.3438,
"step": 280
},
{
"epoch": 0.02,
"learning_rate": 9.778912861172524e-06,
"loss": 20.0625,
"step": 290
},
{
"epoch": 0.02,
"learning_rate": 9.771289166730198e-06,
"loss": 19.825,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 9.76366547228787e-06,
"loss": 19.6578,
"step": 310
},
{
"epoch": 0.02,
"learning_rate": 9.756041777845544e-06,
"loss": 19.4641,
"step": 320
},
{
"epoch": 0.03,
"learning_rate": 9.748418083403218e-06,
"loss": 18.9703,
"step": 330
},
{
"epoch": 0.03,
"learning_rate": 9.74079438896089e-06,
"loss": 18.5938,
"step": 340
},
{
"epoch": 0.03,
"learning_rate": 9.733170694518565e-06,
"loss": 18.2047,
"step": 350
},
{
"epoch": 0.03,
"learning_rate": 9.725547000076237e-06,
"loss": 17.3922,
"step": 360
},
{
"epoch": 0.03,
"learning_rate": 9.717923305633911e-06,
"loss": 17.1758,
"step": 370
},
{
"epoch": 0.03,
"learning_rate": 9.710299611191583e-06,
"loss": 16.975,
"step": 380
},
{
"epoch": 0.03,
"learning_rate": 9.702675916749257e-06,
"loss": 15.7492,
"step": 390
},
{
"epoch": 0.03,
"learning_rate": 9.695052222306931e-06,
"loss": 15.2898,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 9.687428527864603e-06,
"loss": 14.4289,
"step": 410
},
{
"epoch": 0.03,
"learning_rate": 9.679804833422277e-06,
"loss": 14.3398,
"step": 420
},
{
"epoch": 0.03,
"learning_rate": 9.67218113897995e-06,
"loss": 13.6992,
"step": 430
},
{
"epoch": 0.03,
"learning_rate": 9.664557444537624e-06,
"loss": 12.75,
"step": 440
},
{
"epoch": 0.03,
"learning_rate": 9.656933750095296e-06,
"loss": 12.8219,
"step": 450
},
{
"epoch": 0.04,
"learning_rate": 9.64931005565297e-06,
"loss": 12.1195,
"step": 460
},
{
"epoch": 0.04,
"learning_rate": 9.641686361210644e-06,
"loss": 10.5531,
"step": 470
},
{
"epoch": 0.04,
"learning_rate": 9.634062666768316e-06,
"loss": 9.9711,
"step": 480
},
{
"epoch": 0.04,
"learning_rate": 9.62643897232599e-06,
"loss": 10.1711,
"step": 490
},
{
"epoch": 0.04,
"learning_rate": 9.618815277883662e-06,
"loss": 9.2937,
"step": 500
},
{
"epoch": 0.04,
"learning_rate": 9.611191583441336e-06,
"loss": 8.8313,
"step": 510
},
{
"epoch": 0.04,
"learning_rate": 9.603567888999009e-06,
"loss": 8.4027,
"step": 520
},
{
"epoch": 0.04,
"learning_rate": 9.595944194556683e-06,
"loss": 8.5461,
"step": 530
},
{
"epoch": 0.04,
"learning_rate": 9.588320500114357e-06,
"loss": 8.4668,
"step": 540
},
{
"epoch": 0.04,
"learning_rate": 9.580696805672029e-06,
"loss": 7.9266,
"step": 550
},
{
"epoch": 0.04,
"learning_rate": 9.573073111229703e-06,
"loss": 7.718,
"step": 560
},
{
"epoch": 0.04,
"learning_rate": 9.565449416787375e-06,
"loss": 7.6984,
"step": 570
},
{
"epoch": 0.04,
"learning_rate": 9.55782572234505e-06,
"loss": 7.4594,
"step": 580
},
{
"epoch": 0.04,
"learning_rate": 9.550202027902723e-06,
"loss": 7.302,
"step": 590
},
{
"epoch": 0.05,
"learning_rate": 9.542578333460395e-06,
"loss": 7.2758,
"step": 600
},
{
"epoch": 0.05,
"learning_rate": 9.53495463901807e-06,
"loss": 6.9484,
"step": 610
},
{
"epoch": 0.05,
"learning_rate": 9.527330944575742e-06,
"loss": 6.5934,
"step": 620
},
{
"epoch": 0.05,
"learning_rate": 9.519707250133416e-06,
"loss": 6.4156,
"step": 630
},
{
"epoch": 0.05,
"learning_rate": 9.512083555691088e-06,
"loss": 6.3332,
"step": 640
},
{
"epoch": 0.05,
"learning_rate": 9.504459861248762e-06,
"loss": 6.3387,
"step": 650
},
{
"epoch": 0.05,
"learning_rate": 9.496836166806436e-06,
"loss": 6.7496,
"step": 660
},
{
"epoch": 0.05,
"learning_rate": 9.489212472364108e-06,
"loss": 6.2605,
"step": 670
},
{
"epoch": 0.05,
"learning_rate": 9.481588777921782e-06,
"loss": 6.1672,
"step": 680
},
{
"epoch": 0.05,
"learning_rate": 9.473965083479454e-06,
"loss": 6.448,
"step": 690
},
{
"epoch": 0.05,
"learning_rate": 9.466341389037128e-06,
"loss": 6.2863,
"step": 700
},
{
"epoch": 0.05,
"learning_rate": 9.4587176945948e-06,
"loss": 5.8332,
"step": 710
},
{
"epoch": 0.05,
"learning_rate": 9.451094000152475e-06,
"loss": 5.7637,
"step": 720
},
{
"epoch": 0.06,
"learning_rate": 9.443470305710149e-06,
"loss": 5.8871,
"step": 730
},
{
"epoch": 0.06,
"learning_rate": 9.435846611267821e-06,
"loss": 5.6824,
"step": 740
},
{
"epoch": 0.06,
"learning_rate": 9.428222916825495e-06,
"loss": 5.6824,
"step": 750
},
{
"epoch": 0.06,
"learning_rate": 9.420599222383167e-06,
"loss": 5.623,
"step": 760
},
{
"epoch": 0.06,
"learning_rate": 9.412975527940841e-06,
"loss": 5.793,
"step": 770
},
{
"epoch": 0.06,
"learning_rate": 9.405351833498513e-06,
"loss": 5.6578,
"step": 780
},
{
"epoch": 0.06,
"learning_rate": 9.397728139056187e-06,
"loss": 5.4418,
"step": 790
},
{
"epoch": 0.06,
"learning_rate": 9.390104444613861e-06,
"loss": 5.6582,
"step": 800
},
{
"epoch": 0.06,
"learning_rate": 9.382480750171534e-06,
"loss": 5.4758,
"step": 810
},
{
"epoch": 0.06,
"learning_rate": 9.374857055729208e-06,
"loss": 5.3496,
"step": 820
},
{
"epoch": 0.06,
"learning_rate": 9.36723336128688e-06,
"loss": 5.809,
"step": 830
},
{
"epoch": 0.06,
"learning_rate": 9.359609666844554e-06,
"loss": 5.3367,
"step": 840
},
{
"epoch": 0.06,
"learning_rate": 9.351985972402226e-06,
"loss": 5.3164,
"step": 850
},
{
"epoch": 0.07,
"learning_rate": 9.3443622779599e-06,
"loss": 5.2313,
"step": 860
},
{
"epoch": 0.07,
"learning_rate": 9.336738583517574e-06,
"loss": 5.3535,
"step": 870
},
{
"epoch": 0.07,
"learning_rate": 9.329114889075246e-06,
"loss": 5.477,
"step": 880
},
{
"epoch": 0.07,
"learning_rate": 9.32149119463292e-06,
"loss": 5.5727,
"step": 890
},
{
"epoch": 0.07,
"learning_rate": 9.313867500190593e-06,
"loss": 5.1645,
"step": 900
},
{
"epoch": 0.07,
"learning_rate": 9.306243805748267e-06,
"loss": 5.0938,
"step": 910
},
{
"epoch": 0.07,
"learning_rate": 9.298620111305939e-06,
"loss": 5.1703,
"step": 920
},
{
"epoch": 0.07,
"learning_rate": 9.290996416863613e-06,
"loss": 5.0945,
"step": 930
},
{
"epoch": 0.07,
"learning_rate": 9.283372722421287e-06,
"loss": 5.1152,
"step": 940
},
{
"epoch": 0.07,
"learning_rate": 9.27574902797896e-06,
"loss": 5.0703,
"step": 950
},
{
"epoch": 0.07,
"learning_rate": 9.268125333536633e-06,
"loss": 5.0184,
"step": 960
},
{
"epoch": 0.07,
"learning_rate": 9.260501639094305e-06,
"loss": 5.0059,
"step": 970
},
{
"epoch": 0.07,
"learning_rate": 9.25287794465198e-06,
"loss": 4.9516,
"step": 980
},
{
"epoch": 0.08,
"learning_rate": 9.245254250209652e-06,
"loss": 4.9363,
"step": 990
},
{
"epoch": 0.08,
"learning_rate": 9.237630555767326e-06,
"loss": 5.1004,
"step": 1000
},
{
"epoch": 0.08,
"learning_rate": 9.230006861325e-06,
"loss": 4.8988,
"step": 1010
},
{
"epoch": 0.08,
"learning_rate": 9.222383166882672e-06,
"loss": 5.0332,
"step": 1020
},
{
"epoch": 0.08,
"learning_rate": 9.214759472440346e-06,
"loss": 5.0336,
"step": 1030
},
{
"epoch": 0.08,
"learning_rate": 9.207135777998018e-06,
"loss": 4.7898,
"step": 1040
},
{
"epoch": 0.08,
"learning_rate": 9.199512083555692e-06,
"loss": 4.8375,
"step": 1050
},
{
"epoch": 0.08,
"learning_rate": 9.191888389113365e-06,
"loss": 4.7547,
"step": 1060
},
{
"epoch": 0.08,
"learning_rate": 9.184264694671038e-06,
"loss": 4.7156,
"step": 1070
},
{
"epoch": 0.08,
"learning_rate": 9.176641000228712e-06,
"loss": 4.7141,
"step": 1080
},
{
"epoch": 0.08,
"learning_rate": 9.169017305786385e-06,
"loss": 4.7141,
"step": 1090
},
{
"epoch": 0.08,
"learning_rate": 9.161393611344059e-06,
"loss": 4.718,
"step": 1100
},
{
"epoch": 0.08,
"learning_rate": 9.153769916901731e-06,
"loss": 4.6184,
"step": 1110
},
{
"epoch": 0.09,
"learning_rate": 9.146146222459405e-06,
"loss": 4.7738,
"step": 1120
},
{
"epoch": 0.09,
"learning_rate": 9.138522528017077e-06,
"loss": 4.552,
"step": 1130
},
{
"epoch": 0.09,
"learning_rate": 9.130898833574751e-06,
"loss": 4.5945,
"step": 1140
},
{
"epoch": 0.09,
"learning_rate": 9.123275139132425e-06,
"loss": 4.5016,
"step": 1150
},
{
"epoch": 0.09,
"learning_rate": 9.115651444690098e-06,
"loss": 4.516,
"step": 1160
},
{
"epoch": 0.09,
"learning_rate": 9.108027750247771e-06,
"loss": 4.557,
"step": 1170
},
{
"epoch": 0.09,
"learning_rate": 9.100404055805444e-06,
"loss": 4.4656,
"step": 1180
},
{
"epoch": 0.09,
"learning_rate": 9.092780361363118e-06,
"loss": 4.4598,
"step": 1190
},
{
"epoch": 0.09,
"learning_rate": 9.08515666692079e-06,
"loss": 4.425,
"step": 1200
},
{
"epoch": 0.09,
"learning_rate": 9.077532972478464e-06,
"loss": 4.5328,
"step": 1210
},
{
"epoch": 0.09,
"learning_rate": 9.069909278036138e-06,
"loss": 4.4902,
"step": 1220
},
{
"epoch": 0.09,
"learning_rate": 9.06228558359381e-06,
"loss": 4.4094,
"step": 1230
},
{
"epoch": 0.09,
"learning_rate": 9.054661889151484e-06,
"loss": 4.4609,
"step": 1240
},
{
"epoch": 0.1,
"learning_rate": 9.047038194709157e-06,
"loss": 4.2906,
"step": 1250
},
{
"epoch": 0.1,
"learning_rate": 9.03941450026683e-06,
"loss": 4.2863,
"step": 1260
},
{
"epoch": 0.1,
"learning_rate": 9.031790805824503e-06,
"loss": 4.3141,
"step": 1270
},
{
"epoch": 0.1,
"learning_rate": 9.024167111382177e-06,
"loss": 4.2844,
"step": 1280
},
{
"epoch": 0.1,
"learning_rate": 9.01654341693985e-06,
"loss": 4.323,
"step": 1290
},
{
"epoch": 0.1,
"learning_rate": 9.008919722497523e-06,
"loss": 4.2539,
"step": 1300
},
{
"epoch": 0.1,
"learning_rate": 9.001296028055197e-06,
"loss": 4.2047,
"step": 1310
},
{
"epoch": 0.1,
"learning_rate": 8.99367233361287e-06,
"loss": 4.2406,
"step": 1320
},
{
"epoch": 0.1,
"learning_rate": 8.986048639170543e-06,
"loss": 4.1656,
"step": 1330
},
{
"epoch": 0.1,
"learning_rate": 8.978424944728216e-06,
"loss": 4.1324,
"step": 1340
},
{
"epoch": 0.1,
"learning_rate": 8.97080125028589e-06,
"loss": 4.2102,
"step": 1350
},
{
"epoch": 0.1,
"learning_rate": 8.963177555843564e-06,
"loss": 4.1785,
"step": 1360
},
{
"epoch": 0.1,
"learning_rate": 8.955553861401236e-06,
"loss": 4.1496,
"step": 1370
},
{
"epoch": 0.11,
"learning_rate": 8.94793016695891e-06,
"loss": 4.148,
"step": 1380
},
{
"epoch": 0.11,
"learning_rate": 8.940306472516582e-06,
"loss": 4.0875,
"step": 1390
},
{
"epoch": 0.11,
"learning_rate": 8.932682778074256e-06,
"loss": 4.0488,
"step": 1400
},
{
"epoch": 0.11,
"learning_rate": 8.925059083631928e-06,
"loss": 4.0926,
"step": 1410
},
{
"epoch": 0.11,
"learning_rate": 8.917435389189602e-06,
"loss": 4.0521,
"step": 1420
},
{
"epoch": 0.11,
"learning_rate": 8.909811694747276e-06,
"loss": 4.0557,
"step": 1430
},
{
"epoch": 0.11,
"learning_rate": 8.902188000304949e-06,
"loss": 4.057,
"step": 1440
},
{
"epoch": 0.11,
"learning_rate": 8.894564305862623e-06,
"loss": 3.9955,
"step": 1450
},
{
"epoch": 0.11,
"learning_rate": 8.886940611420295e-06,
"loss": 3.9541,
"step": 1460
},
{
"epoch": 0.11,
"learning_rate": 8.879316916977969e-06,
"loss": 4.0002,
"step": 1470
},
{
"epoch": 0.11,
"learning_rate": 8.871693222535641e-06,
"loss": 3.8512,
"step": 1480
},
{
"epoch": 0.11,
"learning_rate": 8.864069528093315e-06,
"loss": 3.9119,
"step": 1490
},
{
"epoch": 0.11,
"learning_rate": 8.856445833650989e-06,
"loss": 4.0641,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 8.848822139208661e-06,
"loss": 3.8793,
"step": 1510
},
{
"epoch": 0.12,
"learning_rate": 8.841198444766335e-06,
"loss": 3.8955,
"step": 1520
},
{
"epoch": 0.12,
"learning_rate": 8.833574750324008e-06,
"loss": 3.8289,
"step": 1530
},
{
"epoch": 0.12,
"learning_rate": 8.825951055881682e-06,
"loss": 3.8039,
"step": 1540
},
{
"epoch": 0.12,
"learning_rate": 8.818327361439354e-06,
"loss": 3.7967,
"step": 1550
},
{
"epoch": 0.12,
"learning_rate": 8.810703666997026e-06,
"loss": 3.8004,
"step": 1560
},
{
"epoch": 0.12,
"learning_rate": 8.803079972554702e-06,
"loss": 3.7502,
"step": 1570
},
{
"epoch": 0.12,
"learning_rate": 8.795456278112374e-06,
"loss": 3.7746,
"step": 1580
},
{
"epoch": 0.12,
"learning_rate": 8.787832583670048e-06,
"loss": 3.74,
"step": 1590
},
{
"epoch": 0.12,
"learning_rate": 8.78020888922772e-06,
"loss": 3.7273,
"step": 1600
},
{
"epoch": 0.12,
"learning_rate": 8.772585194785394e-06,
"loss": 3.7084,
"step": 1610
},
{
"epoch": 0.12,
"learning_rate": 8.764961500343067e-06,
"loss": 3.6695,
"step": 1620
},
{
"epoch": 0.12,
"learning_rate": 8.757337805900739e-06,
"loss": 3.6781,
"step": 1630
},
{
"epoch": 0.13,
"learning_rate": 8.749714111458415e-06,
"loss": 3.6531,
"step": 1640
},
{
"epoch": 0.13,
"learning_rate": 8.742090417016087e-06,
"loss": 3.6125,
"step": 1650
},
{
"epoch": 0.13,
"learning_rate": 8.73446672257376e-06,
"loss": 3.6434,
"step": 1660
},
{
"epoch": 0.13,
"learning_rate": 8.726843028131433e-06,
"loss": 3.5969,
"step": 1670
},
{
"epoch": 0.13,
"learning_rate": 8.719219333689107e-06,
"loss": 3.6078,
"step": 1680
},
{
"epoch": 0.13,
"learning_rate": 8.71159563924678e-06,
"loss": 3.6463,
"step": 1690
},
{
"epoch": 0.13,
"learning_rate": 8.703971944804452e-06,
"loss": 3.5914,
"step": 1700
},
{
"epoch": 0.13,
"learning_rate": 8.696348250362127e-06,
"loss": 3.5229,
"step": 1710
},
{
"epoch": 0.13,
"learning_rate": 8.6887245559198e-06,
"loss": 3.5178,
"step": 1720
},
{
"epoch": 0.13,
"learning_rate": 8.681100861477474e-06,
"loss": 3.5465,
"step": 1730
},
{
"epoch": 0.13,
"learning_rate": 8.673477167035146e-06,
"loss": 3.4473,
"step": 1740
},
{
"epoch": 0.13,
"learning_rate": 8.665853472592818e-06,
"loss": 3.4814,
"step": 1750
},
{
"epoch": 0.13,
"learning_rate": 8.658229778150492e-06,
"loss": 3.459,
"step": 1760
},
{
"epoch": 0.13,
"learning_rate": 8.650606083708166e-06,
"loss": 3.4336,
"step": 1770
},
{
"epoch": 0.14,
"learning_rate": 8.64298238926584e-06,
"loss": 3.3758,
"step": 1780
},
{
"epoch": 0.14,
"learning_rate": 8.635358694823512e-06,
"loss": 3.3777,
"step": 1790
},
{
"epoch": 0.14,
"learning_rate": 8.627735000381186e-06,
"loss": 3.3633,
"step": 1800
},
{
"epoch": 0.14,
"learning_rate": 8.620111305938859e-06,
"loss": 3.3711,
"step": 1810
},
{
"epoch": 0.14,
"learning_rate": 8.612487611496531e-06,
"loss": 3.3563,
"step": 1820
},
{
"epoch": 0.14,
"learning_rate": 8.604863917054205e-06,
"loss": 3.3432,
"step": 1830
},
{
"epoch": 0.14,
"learning_rate": 8.597240222611879e-06,
"loss": 3.4258,
"step": 1840
},
{
"epoch": 0.14,
"learning_rate": 8.589616528169553e-06,
"loss": 3.3188,
"step": 1850
},
{
"epoch": 0.14,
"learning_rate": 8.581992833727225e-06,
"loss": 3.2518,
"step": 1860
},
{
"epoch": 0.14,
"learning_rate": 8.574369139284897e-06,
"loss": 3.283,
"step": 1870
},
{
"epoch": 0.14,
"learning_rate": 8.566745444842571e-06,
"loss": 3.2523,
"step": 1880
},
{
"epoch": 0.14,
"learning_rate": 8.559121750400244e-06,
"loss": 3.2496,
"step": 1890
},
{
"epoch": 0.14,
"learning_rate": 8.551498055957918e-06,
"loss": 3.2254,
"step": 1900
},
{
"epoch": 0.15,
"learning_rate": 8.543874361515592e-06,
"loss": 3.1781,
"step": 1910
},
{
"epoch": 0.15,
"learning_rate": 8.536250667073266e-06,
"loss": 3.1686,
"step": 1920
},
{
"epoch": 0.15,
"learning_rate": 8.528626972630938e-06,
"loss": 3.2557,
"step": 1930
},
{
"epoch": 0.15,
"learning_rate": 8.52100327818861e-06,
"loss": 3.1295,
"step": 1940
},
{
"epoch": 0.15,
"learning_rate": 8.513379583746284e-06,
"loss": 3.1766,
"step": 1950
},
{
"epoch": 0.15,
"learning_rate": 8.505755889303956e-06,
"loss": 3.1641,
"step": 1960
},
{
"epoch": 0.15,
"learning_rate": 8.49813219486163e-06,
"loss": 3.1301,
"step": 1970
},
{
"epoch": 0.15,
"learning_rate": 8.490508500419304e-06,
"loss": 3.1424,
"step": 1980
},
{
"epoch": 0.15,
"learning_rate": 8.482884805976977e-06,
"loss": 3.0842,
"step": 1990
},
{
"epoch": 0.15,
"learning_rate": 8.47526111153465e-06,
"loss": 3.0824,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 8.467637417092323e-06,
"loss": 3.0498,
"step": 2010
},
{
"epoch": 0.15,
"learning_rate": 8.460013722649997e-06,
"loss": 3.1193,
"step": 2020
},
{
"epoch": 0.15,
"learning_rate": 8.45239002820767e-06,
"loss": 3.0367,
"step": 2030
},
{
"epoch": 0.16,
"learning_rate": 8.444766333765343e-06,
"loss": 3.051,
"step": 2040
},
{
"epoch": 0.16,
"learning_rate": 8.437142639323017e-06,
"loss": 3.0029,
"step": 2050
},
{
"epoch": 0.16,
"learning_rate": 8.42951894488069e-06,
"loss": 2.9674,
"step": 2060
},
{
"epoch": 0.16,
"learning_rate": 8.421895250438363e-06,
"loss": 2.9797,
"step": 2070
},
{
"epoch": 0.16,
"learning_rate": 8.414271555996036e-06,
"loss": 3.0271,
"step": 2080
},
{
"epoch": 0.16,
"learning_rate": 8.40664786155371e-06,
"loss": 2.9619,
"step": 2090
},
{
"epoch": 0.16,
"learning_rate": 8.399024167111382e-06,
"loss": 3.0301,
"step": 2100
},
{
"epoch": 0.16,
"learning_rate": 8.391400472669056e-06,
"loss": 2.9232,
"step": 2110
},
{
"epoch": 0.16,
"learning_rate": 8.38377677822673e-06,
"loss": 2.935,
"step": 2120
},
{
"epoch": 0.16,
"learning_rate": 8.376153083784402e-06,
"loss": 2.924,
"step": 2130
},
{
"epoch": 0.16,
"learning_rate": 8.368529389342076e-06,
"loss": 2.8533,
"step": 2140
},
{
"epoch": 0.16,
"learning_rate": 8.360905694899748e-06,
"loss": 2.859,
"step": 2150
},
{
"epoch": 0.16,
"learning_rate": 8.353282000457422e-06,
"loss": 2.8752,
"step": 2160
},
{
"epoch": 0.17,
"learning_rate": 8.345658306015095e-06,
"loss": 2.8092,
"step": 2170
},
{
"epoch": 0.17,
"learning_rate": 8.338034611572769e-06,
"loss": 2.9111,
"step": 2180
},
{
"epoch": 0.17,
"learning_rate": 8.330410917130443e-06,
"loss": 2.8344,
"step": 2190
},
{
"epoch": 0.17,
"learning_rate": 8.322787222688115e-06,
"loss": 2.8535,
"step": 2200
},
{
"epoch": 0.17,
"learning_rate": 8.315163528245789e-06,
"loss": 2.8996,
"step": 2210
},
{
"epoch": 0.17,
"learning_rate": 8.307539833803461e-06,
"loss": 2.8619,
"step": 2220
},
{
"epoch": 0.17,
"learning_rate": 8.299916139361135e-06,
"loss": 2.8908,
"step": 2230
},
{
"epoch": 0.17,
"learning_rate": 8.292292444918807e-06,
"loss": 2.8221,
"step": 2240
},
{
"epoch": 0.17,
"learning_rate": 8.284668750476481e-06,
"loss": 2.8062,
"step": 2250
},
{
"epoch": 0.17,
"learning_rate": 8.277045056034155e-06,
"loss": 2.7785,
"step": 2260
},
{
"epoch": 0.17,
"learning_rate": 8.269421361591828e-06,
"loss": 2.8027,
"step": 2270
},
{
"epoch": 0.17,
"learning_rate": 8.261797667149502e-06,
"loss": 2.7518,
"step": 2280
},
{
"epoch": 0.17,
"learning_rate": 8.254173972707174e-06,
"loss": 2.7961,
"step": 2290
},
{
"epoch": 0.18,
"learning_rate": 8.246550278264848e-06,
"loss": 2.7605,
"step": 2300
},
{
"epoch": 0.18,
"learning_rate": 8.23892658382252e-06,
"loss": 2.7055,
"step": 2310
},
{
"epoch": 0.18,
"learning_rate": 8.231302889380194e-06,
"loss": 2.6449,
"step": 2320
},
{
"epoch": 0.18,
"learning_rate": 8.223679194937868e-06,
"loss": 2.6789,
"step": 2330
},
{
"epoch": 0.18,
"learning_rate": 8.21605550049554e-06,
"loss": 2.6936,
"step": 2340
},
{
"epoch": 0.18,
"learning_rate": 8.208431806053214e-06,
"loss": 2.6859,
"step": 2350
},
{
"epoch": 0.18,
"learning_rate": 8.200808111610887e-06,
"loss": 2.7135,
"step": 2360
},
{
"epoch": 0.18,
"learning_rate": 8.19318441716856e-06,
"loss": 2.634,
"step": 2370
},
{
"epoch": 0.18,
"learning_rate": 8.185560722726233e-06,
"loss": 2.643,
"step": 2380
},
{
"epoch": 0.18,
"learning_rate": 8.177937028283907e-06,
"loss": 2.6611,
"step": 2390
},
{
"epoch": 0.18,
"learning_rate": 8.170313333841581e-06,
"loss": 2.6721,
"step": 2400
},
{
"epoch": 0.18,
"learning_rate": 8.162689639399253e-06,
"loss": 2.6314,
"step": 2410
},
{
"epoch": 0.18,
"learning_rate": 8.155065944956927e-06,
"loss": 2.666,
"step": 2420
},
{
"epoch": 0.19,
"learning_rate": 8.1474422505146e-06,
"loss": 2.6109,
"step": 2430
},
{
"epoch": 0.19,
"learning_rate": 8.139818556072273e-06,
"loss": 2.6479,
"step": 2440
},
{
"epoch": 0.19,
"learning_rate": 8.132194861629946e-06,
"loss": 2.6289,
"step": 2450
},
{
"epoch": 0.19,
"learning_rate": 8.12457116718762e-06,
"loss": 2.5789,
"step": 2460
},
{
"epoch": 0.19,
"learning_rate": 8.116947472745294e-06,
"loss": 2.5514,
"step": 2470
},
{
"epoch": 0.19,
"learning_rate": 8.109323778302966e-06,
"loss": 2.5531,
"step": 2480
},
{
"epoch": 0.19,
"learning_rate": 8.10170008386064e-06,
"loss": 2.5922,
"step": 2490
},
{
"epoch": 0.19,
"learning_rate": 8.094076389418312e-06,
"loss": 2.5785,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 8.086452694975986e-06,
"loss": 2.5496,
"step": 2510
},
{
"epoch": 0.19,
"learning_rate": 8.078829000533659e-06,
"loss": 2.5227,
"step": 2520
},
{
"epoch": 0.19,
"learning_rate": 8.071205306091332e-06,
"loss": 2.6195,
"step": 2530
},
{
"epoch": 0.19,
"learning_rate": 8.063581611649006e-06,
"loss": 2.5441,
"step": 2540
},
{
"epoch": 0.19,
"learning_rate": 8.055957917206679e-06,
"loss": 2.5348,
"step": 2550
},
{
"epoch": 0.2,
"learning_rate": 8.048334222764353e-06,
"loss": 2.5326,
"step": 2560
},
{
"epoch": 0.2,
"learning_rate": 8.040710528322025e-06,
"loss": 2.5195,
"step": 2570
},
{
"epoch": 0.2,
"learning_rate": 8.033086833879699e-06,
"loss": 2.5826,
"step": 2580
},
{
"epoch": 0.2,
"learning_rate": 8.025463139437371e-06,
"loss": 2.5199,
"step": 2590
},
{
"epoch": 0.2,
"learning_rate": 8.017839444995045e-06,
"loss": 2.4783,
"step": 2600
},
{
"epoch": 0.2,
"learning_rate": 8.01021575055272e-06,
"loss": 2.5186,
"step": 2610
},
{
"epoch": 0.2,
"learning_rate": 8.002592056110392e-06,
"loss": 2.5002,
"step": 2620
},
{
"epoch": 0.2,
"learning_rate": 7.994968361668065e-06,
"loss": 2.5014,
"step": 2630
},
{
"epoch": 0.2,
"learning_rate": 7.987344667225738e-06,
"loss": 2.4631,
"step": 2640
},
{
"epoch": 0.2,
"learning_rate": 7.979720972783412e-06,
"loss": 2.4621,
"step": 2650
},
{
"epoch": 0.2,
"learning_rate": 7.972097278341084e-06,
"loss": 2.4621,
"step": 2660
},
{
"epoch": 0.2,
"learning_rate": 7.964473583898758e-06,
"loss": 2.524,
"step": 2670
},
{
"epoch": 0.2,
"learning_rate": 7.956849889456432e-06,
"loss": 2.4607,
"step": 2680
},
{
"epoch": 0.21,
"learning_rate": 7.949226195014104e-06,
"loss": 2.4621,
"step": 2690
},
{
"epoch": 0.21,
"learning_rate": 7.941602500571778e-06,
"loss": 2.4975,
"step": 2700
},
{
"epoch": 0.21,
"learning_rate": 7.93397880612945e-06,
"loss": 2.4941,
"step": 2710
},
{
"epoch": 0.21,
"learning_rate": 7.926355111687125e-06,
"loss": 2.3941,
"step": 2720
},
{
"epoch": 0.21,
"learning_rate": 7.918731417244797e-06,
"loss": 2.457,
"step": 2730
},
{
"epoch": 0.21,
"learning_rate": 7.91110772280247e-06,
"loss": 2.4133,
"step": 2740
},
{
"epoch": 0.21,
"learning_rate": 7.903484028360145e-06,
"loss": 2.41,
"step": 2750
},
{
"epoch": 0.21,
"learning_rate": 7.895860333917817e-06,
"loss": 2.4154,
"step": 2760
},
{
"epoch": 0.21,
"learning_rate": 7.888236639475491e-06,
"loss": 2.3951,
"step": 2770
},
{
"epoch": 0.21,
"learning_rate": 7.880612945033163e-06,
"loss": 2.3861,
"step": 2780
},
{
"epoch": 0.21,
"learning_rate": 7.872989250590837e-06,
"loss": 2.3781,
"step": 2790
},
{
"epoch": 0.21,
"learning_rate": 7.86536555614851e-06,
"loss": 2.3859,
"step": 2800
},
{
"epoch": 0.21,
"learning_rate": 7.857741861706184e-06,
"loss": 2.3857,
"step": 2810
},
{
"epoch": 0.21,
"learning_rate": 7.850118167263858e-06,
"loss": 2.367,
"step": 2820
},
{
"epoch": 0.22,
"learning_rate": 7.84249447282153e-06,
"loss": 2.3711,
"step": 2830
},
{
"epoch": 0.22,
"learning_rate": 7.834870778379204e-06,
"loss": 2.342,
"step": 2840
},
{
"epoch": 0.22,
"learning_rate": 7.827247083936876e-06,
"loss": 2.3771,
"step": 2850
},
{
"epoch": 0.22,
"learning_rate": 7.81962338949455e-06,
"loss": 2.3484,
"step": 2860
},
{
"epoch": 0.22,
"learning_rate": 7.811999695052222e-06,
"loss": 2.3561,
"step": 2870
},
{
"epoch": 0.22,
"learning_rate": 7.804376000609896e-06,
"loss": 2.3777,
"step": 2880
},
{
"epoch": 0.22,
"learning_rate": 7.79675230616757e-06,
"loss": 2.3965,
"step": 2890
},
{
"epoch": 0.22,
"learning_rate": 7.789128611725243e-06,
"loss": 2.3568,
"step": 2900
},
{
"epoch": 0.22,
"learning_rate": 7.781504917282917e-06,
"loss": 2.3586,
"step": 2910
},
{
"epoch": 0.22,
"learning_rate": 7.773881222840589e-06,
"loss": 2.3389,
"step": 2920
},
{
"epoch": 0.22,
"learning_rate": 7.766257528398263e-06,
"loss": 2.3141,
"step": 2930
},
{
"epoch": 0.22,
"learning_rate": 7.758633833955935e-06,
"loss": 2.3244,
"step": 2940
},
{
"epoch": 0.22,
"learning_rate": 7.751010139513609e-06,
"loss": 2.323,
"step": 2950
},
{
"epoch": 0.23,
"learning_rate": 7.743386445071283e-06,
"loss": 2.3035,
"step": 2960
},
{
"epoch": 0.23,
"learning_rate": 7.735762750628955e-06,
"loss": 2.2945,
"step": 2970
},
{
"epoch": 0.23,
"learning_rate": 7.72813905618663e-06,
"loss": 2.2807,
"step": 2980
},
{
"epoch": 0.23,
"learning_rate": 7.720515361744302e-06,
"loss": 2.3168,
"step": 2990
},
{
"epoch": 0.23,
"learning_rate": 7.712891667301976e-06,
"loss": 2.291,
"step": 3000
},
{
"epoch": 0.23,
"learning_rate": 7.705267972859648e-06,
"loss": 2.2896,
"step": 3010
},
{
"epoch": 0.23,
"learning_rate": 7.697644278417322e-06,
"loss": 2.285,
"step": 3020
},
{
"epoch": 0.23,
"learning_rate": 7.690020583974996e-06,
"loss": 2.2836,
"step": 3030
},
{
"epoch": 0.23,
"learning_rate": 7.682396889532668e-06,
"loss": 2.2945,
"step": 3040
},
{
"epoch": 0.23,
"learning_rate": 7.674773195090342e-06,
"loss": 2.299,
"step": 3050
},
{
"epoch": 0.23,
"learning_rate": 7.667149500648014e-06,
"loss": 2.3023,
"step": 3060
},
{
"epoch": 0.23,
"learning_rate": 7.659525806205688e-06,
"loss": 2.2889,
"step": 3070
},
{
"epoch": 0.23,
"learning_rate": 7.65190211176336e-06,
"loss": 2.265,
"step": 3080
},
{
"epoch": 0.24,
"learning_rate": 7.644278417321035e-06,
"loss": 2.276,
"step": 3090
},
{
"epoch": 0.24,
"learning_rate": 7.636654722878709e-06,
"loss": 2.2551,
"step": 3100
},
{
"epoch": 0.24,
"learning_rate": 7.629031028436381e-06,
"loss": 2.2389,
"step": 3110
},
{
"epoch": 0.24,
"learning_rate": 7.621407333994054e-06,
"loss": 2.252,
"step": 3120
},
{
"epoch": 0.24,
"learning_rate": 7.613783639551727e-06,
"loss": 2.2695,
"step": 3130
},
{
"epoch": 0.24,
"learning_rate": 7.6061599451094e-06,
"loss": 2.2432,
"step": 3140
},
{
"epoch": 0.24,
"learning_rate": 7.598536250667073e-06,
"loss": 2.2299,
"step": 3150
},
{
"epoch": 0.24,
"learning_rate": 7.590912556224747e-06,
"loss": 2.2291,
"step": 3160
},
{
"epoch": 0.24,
"learning_rate": 7.5832888617824205e-06,
"loss": 2.2617,
"step": 3170
},
{
"epoch": 0.24,
"learning_rate": 7.575665167340094e-06,
"loss": 2.268,
"step": 3180
},
{
"epoch": 0.24,
"learning_rate": 7.568041472897767e-06,
"loss": 2.2533,
"step": 3190
},
{
"epoch": 0.24,
"learning_rate": 7.56041777845544e-06,
"loss": 2.2246,
"step": 3200
},
{
"epoch": 0.24,
"learning_rate": 7.552794084013113e-06,
"loss": 2.2418,
"step": 3210
},
{
"epoch": 0.25,
"learning_rate": 7.545170389570786e-06,
"loss": 2.2529,
"step": 3220
},
{
"epoch": 0.25,
"learning_rate": 7.53754669512846e-06,
"loss": 2.2363,
"step": 3230
},
{
"epoch": 0.25,
"learning_rate": 7.529923000686133e-06,
"loss": 2.2176,
"step": 3240
},
{
"epoch": 0.25,
"learning_rate": 7.522299306243806e-06,
"loss": 2.2248,
"step": 3250
},
{
"epoch": 0.25,
"learning_rate": 7.5146756118014795e-06,
"loss": 2.1896,
"step": 3260
},
{
"epoch": 0.25,
"learning_rate": 7.507051917359153e-06,
"loss": 2.1836,
"step": 3270
},
{
"epoch": 0.25,
"learning_rate": 7.499428222916826e-06,
"loss": 2.2072,
"step": 3280
},
{
"epoch": 0.25,
"learning_rate": 7.491804528474499e-06,
"loss": 2.182,
"step": 3290
},
{
"epoch": 0.25,
"learning_rate": 7.484180834032173e-06,
"loss": 2.2078,
"step": 3300
},
{
"epoch": 0.25,
"learning_rate": 7.476557139589846e-06,
"loss": 2.202,
"step": 3310
},
{
"epoch": 0.25,
"learning_rate": 7.468933445147519e-06,
"loss": 2.2119,
"step": 3320
},
{
"epoch": 0.25,
"learning_rate": 7.461309750705192e-06,
"loss": 2.1961,
"step": 3330
},
{
"epoch": 0.25,
"learning_rate": 7.453686056262865e-06,
"loss": 2.1996,
"step": 3340
},
{
"epoch": 0.26,
"learning_rate": 7.4460623618205385e-06,
"loss": 2.1904,
"step": 3350
},
{
"epoch": 0.26,
"learning_rate": 7.438438667378212e-06,
"loss": 2.1912,
"step": 3360
},
{
"epoch": 0.26,
"learning_rate": 7.430814972935886e-06,
"loss": 2.2055,
"step": 3370
},
{
"epoch": 0.26,
"learning_rate": 7.423191278493559e-06,
"loss": 2.176,
"step": 3380
},
{
"epoch": 0.26,
"learning_rate": 7.415567584051232e-06,
"loss": 2.1834,
"step": 3390
},
{
"epoch": 0.26,
"learning_rate": 7.407943889608905e-06,
"loss": 2.1508,
"step": 3400
},
{
"epoch": 0.26,
"learning_rate": 7.400320195166578e-06,
"loss": 2.1447,
"step": 3410
},
{
"epoch": 0.26,
"learning_rate": 7.392696500724251e-06,
"loss": 2.1648,
"step": 3420
},
{
"epoch": 0.26,
"learning_rate": 7.385072806281924e-06,
"loss": 2.1432,
"step": 3430
},
{
"epoch": 0.26,
"learning_rate": 7.377449111839598e-06,
"loss": 2.1818,
"step": 3440
},
{
"epoch": 0.26,
"learning_rate": 7.3698254173972715e-06,
"loss": 2.1557,
"step": 3450
},
{
"epoch": 0.26,
"learning_rate": 7.362201722954945e-06,
"loss": 2.1549,
"step": 3460
},
{
"epoch": 0.26,
"learning_rate": 7.354578028512618e-06,
"loss": 2.1576,
"step": 3470
},
{
"epoch": 0.27,
"learning_rate": 7.346954334070291e-06,
"loss": 2.1533,
"step": 3480
},
{
"epoch": 0.27,
"learning_rate": 7.339330639627964e-06,
"loss": 2.1576,
"step": 3490
},
{
"epoch": 0.27,
"learning_rate": 7.331706945185637e-06,
"loss": 2.1648,
"step": 3500
},
{
"epoch": 0.27,
"learning_rate": 7.324083250743311e-06,
"loss": 2.1676,
"step": 3510
},
{
"epoch": 0.27,
"learning_rate": 7.316459556300984e-06,
"loss": 2.1617,
"step": 3520
},
{
"epoch": 0.27,
"learning_rate": 7.308835861858657e-06,
"loss": 2.1432,
"step": 3530
},
{
"epoch": 0.27,
"learning_rate": 7.3012121674163305e-06,
"loss": 2.1645,
"step": 3540
},
{
"epoch": 0.27,
"learning_rate": 7.293588472974004e-06,
"loss": 2.174,
"step": 3550
},
{
"epoch": 0.27,
"learning_rate": 7.285964778531677e-06,
"loss": 2.1467,
"step": 3560
},
{
"epoch": 0.27,
"learning_rate": 7.27834108408935e-06,
"loss": 2.1385,
"step": 3570
},
{
"epoch": 0.27,
"learning_rate": 7.270717389647024e-06,
"loss": 2.1344,
"step": 3580
},
{
"epoch": 0.27,
"learning_rate": 7.263093695204697e-06,
"loss": 2.1182,
"step": 3590
},
{
"epoch": 0.27,
"learning_rate": 7.25547000076237e-06,
"loss": 2.1393,
"step": 3600
},
{
"epoch": 0.28,
"learning_rate": 7.247846306320043e-06,
"loss": 2.1361,
"step": 3610
},
{
"epoch": 0.28,
"learning_rate": 7.240222611877716e-06,
"loss": 2.1328,
"step": 3620
},
{
"epoch": 0.28,
"learning_rate": 7.2325989174353895e-06,
"loss": 2.1322,
"step": 3630
},
{
"epoch": 0.28,
"learning_rate": 7.224975222993063e-06,
"loss": 2.1455,
"step": 3640
},
{
"epoch": 0.28,
"learning_rate": 7.217351528550737e-06,
"loss": 2.1078,
"step": 3650
},
{
"epoch": 0.28,
"learning_rate": 7.20972783410841e-06,
"loss": 2.1408,
"step": 3660
},
{
"epoch": 0.28,
"learning_rate": 7.202104139666083e-06,
"loss": 2.1225,
"step": 3670
},
{
"epoch": 0.28,
"learning_rate": 7.194480445223756e-06,
"loss": 2.1324,
"step": 3680
},
{
"epoch": 0.28,
"learning_rate": 7.186856750781429e-06,
"loss": 2.107,
"step": 3690
},
{
"epoch": 0.28,
"learning_rate": 7.179233056339102e-06,
"loss": 2.1271,
"step": 3700
},
{
"epoch": 0.28,
"learning_rate": 7.1716093618967754e-06,
"loss": 2.1141,
"step": 3710
},
{
"epoch": 0.28,
"learning_rate": 7.163985667454449e-06,
"loss": 2.1141,
"step": 3720
},
{
"epoch": 0.28,
"learning_rate": 7.1563619730121225e-06,
"loss": 2.0951,
"step": 3730
},
{
"epoch": 0.29,
"learning_rate": 7.148738278569796e-06,
"loss": 2.0971,
"step": 3740
},
{
"epoch": 0.29,
"learning_rate": 7.141114584127469e-06,
"loss": 2.1066,
"step": 3750
},
{
"epoch": 0.29,
"learning_rate": 7.133490889685142e-06,
"loss": 2.0941,
"step": 3760
},
{
"epoch": 0.29,
"learning_rate": 7.125867195242815e-06,
"loss": 2.1232,
"step": 3770
},
{
"epoch": 0.29,
"learning_rate": 7.118243500800488e-06,
"loss": 2.1021,
"step": 3780
},
{
"epoch": 0.29,
"learning_rate": 7.110619806358162e-06,
"loss": 2.0938,
"step": 3790
},
{
"epoch": 0.29,
"learning_rate": 7.102996111915835e-06,
"loss": 2.1164,
"step": 3800
},
{
"epoch": 0.29,
"learning_rate": 7.0953724174735084e-06,
"loss": 2.0836,
"step": 3810
},
{
"epoch": 0.29,
"learning_rate": 7.0877487230311816e-06,
"loss": 2.0916,
"step": 3820
},
{
"epoch": 0.29,
"learning_rate": 7.080125028588855e-06,
"loss": 2.1068,
"step": 3830
},
{
"epoch": 0.29,
"learning_rate": 7.072501334146528e-06,
"loss": 2.0977,
"step": 3840
},
{
"epoch": 0.29,
"learning_rate": 7.064877639704201e-06,
"loss": 2.0994,
"step": 3850
},
{
"epoch": 0.29,
"learning_rate": 7.057253945261875e-06,
"loss": 2.0932,
"step": 3860
},
{
"epoch": 0.3,
"learning_rate": 7.049630250819548e-06,
"loss": 2.099,
"step": 3870
},
{
"epoch": 0.3,
"learning_rate": 7.042006556377221e-06,
"loss": 2.1025,
"step": 3880
},
{
"epoch": 0.3,
"learning_rate": 7.034382861934894e-06,
"loss": 2.0963,
"step": 3890
},
{
"epoch": 0.3,
"learning_rate": 7.0267591674925675e-06,
"loss": 2.0982,
"step": 3900
},
{
"epoch": 0.3,
"learning_rate": 7.019135473050241e-06,
"loss": 2.0689,
"step": 3910
},
{
"epoch": 0.3,
"learning_rate": 7.011511778607914e-06,
"loss": 2.0893,
"step": 3920
},
{
"epoch": 0.3,
"learning_rate": 7.003888084165588e-06,
"loss": 2.0986,
"step": 3930
},
{
"epoch": 0.3,
"learning_rate": 6.996264389723261e-06,
"loss": 2.0852,
"step": 3940
},
{
"epoch": 0.3,
"learning_rate": 6.988640695280934e-06,
"loss": 2.1145,
"step": 3950
},
{
"epoch": 0.3,
"learning_rate": 6.981017000838607e-06,
"loss": 2.0885,
"step": 3960
},
{
"epoch": 0.3,
"learning_rate": 6.97339330639628e-06,
"loss": 2.0957,
"step": 3970
},
{
"epoch": 0.3,
"learning_rate": 6.965769611953953e-06,
"loss": 2.0619,
"step": 3980
},
{
"epoch": 0.3,
"learning_rate": 6.9581459175116265e-06,
"loss": 2.074,
"step": 3990
},
{
"epoch": 0.3,
"learning_rate": 6.9505222230693005e-06,
"loss": 2.0637,
"step": 4000
},
{
"epoch": 0.31,
"learning_rate": 6.942898528626974e-06,
"loss": 2.0822,
"step": 4010
},
{
"epoch": 0.31,
"learning_rate": 6.935274834184647e-06,
"loss": 2.0922,
"step": 4020
},
{
"epoch": 0.31,
"learning_rate": 6.92765113974232e-06,
"loss": 2.0863,
"step": 4030
},
{
"epoch": 0.31,
"learning_rate": 6.920027445299993e-06,
"loss": 2.0684,
"step": 4040
},
{
"epoch": 0.31,
"learning_rate": 6.912403750857666e-06,
"loss": 2.0896,
"step": 4050
},
{
"epoch": 0.31,
"learning_rate": 6.904780056415339e-06,
"loss": 2.0615,
"step": 4060
},
{
"epoch": 0.31,
"learning_rate": 6.897156361973013e-06,
"loss": 2.0467,
"step": 4070
},
{
"epoch": 0.31,
"learning_rate": 6.889532667530686e-06,
"loss": 2.0713,
"step": 4080
},
{
"epoch": 0.31,
"learning_rate": 6.8819089730883595e-06,
"loss": 2.0795,
"step": 4090
},
{
"epoch": 0.31,
"learning_rate": 6.874285278646033e-06,
"loss": 2.0762,
"step": 4100
},
{
"epoch": 0.31,
"learning_rate": 6.866661584203706e-06,
"loss": 2.0539,
"step": 4110
},
{
"epoch": 0.31,
"learning_rate": 6.859037889761379e-06,
"loss": 2.066,
"step": 4120
},
{
"epoch": 0.31,
"learning_rate": 6.851414195319053e-06,
"loss": 2.082,
"step": 4130
},
{
"epoch": 0.32,
"learning_rate": 6.843790500876726e-06,
"loss": 2.076,
"step": 4140
},
{
"epoch": 0.32,
"learning_rate": 6.836166806434399e-06,
"loss": 2.084,
"step": 4150
},
{
"epoch": 0.32,
"learning_rate": 6.828543111992072e-06,
"loss": 2.0691,
"step": 4160
},
{
"epoch": 0.32,
"learning_rate": 6.820919417549745e-06,
"loss": 2.0727,
"step": 4170
},
{
"epoch": 0.32,
"learning_rate": 6.8132957231074185e-06,
"loss": 2.0645,
"step": 4180
},
{
"epoch": 0.32,
"learning_rate": 6.805672028665092e-06,
"loss": 2.0625,
"step": 4190
},
{
"epoch": 0.32,
"learning_rate": 6.798048334222766e-06,
"loss": 2.0729,
"step": 4200
},
{
"epoch": 0.32,
"learning_rate": 6.790424639780439e-06,
"loss": 2.0545,
"step": 4210
},
{
"epoch": 0.32,
"learning_rate": 6.782800945338112e-06,
"loss": 2.0551,
"step": 4220
},
{
"epoch": 0.32,
"learning_rate": 6.775177250895785e-06,
"loss": 2.0553,
"step": 4230
},
{
"epoch": 0.32,
"learning_rate": 6.767553556453458e-06,
"loss": 2.0568,
"step": 4240
},
{
"epoch": 0.32,
"learning_rate": 6.759929862011131e-06,
"loss": 2.0494,
"step": 4250
},
{
"epoch": 0.32,
"learning_rate": 6.752306167568804e-06,
"loss": 2.052,
"step": 4260
},
{
"epoch": 0.33,
"learning_rate": 6.744682473126478e-06,
"loss": 2.0559,
"step": 4270
},
{
"epoch": 0.33,
"learning_rate": 6.7370587786841515e-06,
"loss": 2.0336,
"step": 4280
},
{
"epoch": 0.33,
"learning_rate": 6.729435084241825e-06,
"loss": 2.0472,
"step": 4290
},
{
"epoch": 0.33,
"learning_rate": 6.721811389799498e-06,
"loss": 2.0562,
"step": 4300
},
{
"epoch": 0.33,
"learning_rate": 6.714187695357171e-06,
"loss": 2.0545,
"step": 4310
},
{
"epoch": 0.33,
"learning_rate": 6.706564000914844e-06,
"loss": 2.0535,
"step": 4320
},
{
"epoch": 0.33,
"learning_rate": 6.698940306472516e-06,
"loss": 2.0263,
"step": 4330
},
{
"epoch": 0.33,
"learning_rate": 6.691316612030191e-06,
"loss": 2.058,
"step": 4340
},
{
"epoch": 0.33,
"learning_rate": 6.683692917587864e-06,
"loss": 2.0533,
"step": 4350
},
{
"epoch": 0.33,
"learning_rate": 6.676069223145537e-06,
"loss": 2.0605,
"step": 4360
},
{
"epoch": 0.33,
"learning_rate": 6.6684455287032105e-06,
"loss": 2.0426,
"step": 4370
},
{
"epoch": 0.33,
"learning_rate": 6.660821834260884e-06,
"loss": 2.05,
"step": 4380
},
{
"epoch": 0.33,
"learning_rate": 6.653198139818556e-06,
"loss": 2.0334,
"step": 4390
},
{
"epoch": 0.34,
"learning_rate": 6.645574445376229e-06,
"loss": 2.0363,
"step": 4400
},
{
"epoch": 0.34,
"learning_rate": 6.637950750933904e-06,
"loss": 2.0394,
"step": 4410
},
{
"epoch": 0.34,
"learning_rate": 6.630327056491577e-06,
"loss": 2.0482,
"step": 4420
},
{
"epoch": 0.34,
"learning_rate": 6.62270336204925e-06,
"loss": 2.0266,
"step": 4430
},
{
"epoch": 0.34,
"learning_rate": 6.615079667606923e-06,
"loss": 2.0434,
"step": 4440
},
{
"epoch": 0.34,
"learning_rate": 6.6074559731645956e-06,
"loss": 2.0577,
"step": 4450
},
{
"epoch": 0.34,
"learning_rate": 6.599832278722269e-06,
"loss": 2.0351,
"step": 4460
},
{
"epoch": 0.34,
"learning_rate": 6.592208584279942e-06,
"loss": 2.0363,
"step": 4470
},
{
"epoch": 0.34,
"learning_rate": 6.584584889837617e-06,
"loss": 2.0621,
"step": 4480
},
{
"epoch": 0.34,
"learning_rate": 6.57696119539529e-06,
"loss": 2.0369,
"step": 4490
},
{
"epoch": 0.34,
"learning_rate": 6.569337500952963e-06,
"loss": 2.0353,
"step": 4500
},
{
"epoch": 0.34,
"learning_rate": 6.561713806510635e-06,
"loss": 2.0434,
"step": 4510
},
{
"epoch": 0.34,
"learning_rate": 6.554090112068308e-06,
"loss": 2.0469,
"step": 4520
},
{
"epoch": 0.35,
"learning_rate": 6.5464664176259815e-06,
"loss": 2.0525,
"step": 4530
},
{
"epoch": 0.35,
"learning_rate": 6.538842723183655e-06,
"loss": 2.048,
"step": 4540
},
{
"epoch": 0.35,
"learning_rate": 6.531219028741329e-06,
"loss": 2.041,
"step": 4550
},
{
"epoch": 0.35,
"learning_rate": 6.5235953342990025e-06,
"loss": 2.0564,
"step": 4560
},
{
"epoch": 0.35,
"learning_rate": 6.515971639856676e-06,
"loss": 2.0571,
"step": 4570
},
{
"epoch": 0.35,
"learning_rate": 6.508347945414348e-06,
"loss": 2.0462,
"step": 4580
},
{
"epoch": 0.35,
"learning_rate": 6.500724250972021e-06,
"loss": 2.0322,
"step": 4590
},
{
"epoch": 0.35,
"learning_rate": 6.493100556529694e-06,
"loss": 2.0574,
"step": 4600
},
{
"epoch": 0.35,
"learning_rate": 6.485476862087367e-06,
"loss": 2.0396,
"step": 4610
},
{
"epoch": 0.35,
"learning_rate": 6.477853167645042e-06,
"loss": 2.0214,
"step": 4620
},
{
"epoch": 0.35,
"learning_rate": 6.470229473202715e-06,
"loss": 2.0372,
"step": 4630
},
{
"epoch": 0.35,
"learning_rate": 6.462605778760388e-06,
"loss": 2.0311,
"step": 4640
},
{
"epoch": 0.35,
"learning_rate": 6.454982084318061e-06,
"loss": 2.049,
"step": 4650
},
{
"epoch": 0.36,
"learning_rate": 6.447358389875734e-06,
"loss": 2.0438,
"step": 4660
},
{
"epoch": 0.36,
"learning_rate": 6.439734695433407e-06,
"loss": 2.0396,
"step": 4670
},
{
"epoch": 0.36,
"learning_rate": 6.43211100099108e-06,
"loss": 2.0375,
"step": 4680
},
{
"epoch": 0.36,
"learning_rate": 6.424487306548755e-06,
"loss": 2.0537,
"step": 4690
},
{
"epoch": 0.36,
"learning_rate": 6.416863612106427e-06,
"loss": 2.0272,
"step": 4700
},
{
"epoch": 0.36,
"learning_rate": 6.4092399176641e-06,
"loss": 2.0449,
"step": 4710
},
{
"epoch": 0.36,
"learning_rate": 6.4016162232217735e-06,
"loss": 2.0355,
"step": 4720
},
{
"epoch": 0.36,
"learning_rate": 6.393992528779447e-06,
"loss": 2.0334,
"step": 4730
},
{
"epoch": 0.36,
"learning_rate": 6.38636883433712e-06,
"loss": 2.0424,
"step": 4740
},
{
"epoch": 0.36,
"learning_rate": 6.378745139894793e-06,
"loss": 2.0303,
"step": 4750
},
{
"epoch": 0.36,
"learning_rate": 6.371121445452467e-06,
"loss": 2.0482,
"step": 4760
},
{
"epoch": 0.36,
"learning_rate": 6.36349775101014e-06,
"loss": 2.0299,
"step": 4770
},
{
"epoch": 0.36,
"learning_rate": 6.355874056567813e-06,
"loss": 2.0357,
"step": 4780
},
{
"epoch": 0.37,
"learning_rate": 6.348250362125486e-06,
"loss": 2.032,
"step": 4790
},
{
"epoch": 0.37,
"learning_rate": 6.340626667683159e-06,
"loss": 2.0398,
"step": 4800
},
{
"epoch": 0.37,
"learning_rate": 6.3330029732408325e-06,
"loss": 2.027,
"step": 4810
},
{
"epoch": 0.37,
"learning_rate": 6.325379278798506e-06,
"loss": 2.0309,
"step": 4820
},
{
"epoch": 0.37,
"learning_rate": 6.31775558435618e-06,
"loss": 2.024,
"step": 4830
},
{
"epoch": 0.37,
"learning_rate": 6.310131889913853e-06,
"loss": 2.0318,
"step": 4840
},
{
"epoch": 0.37,
"learning_rate": 6.302508195471526e-06,
"loss": 2.0342,
"step": 4850
},
{
"epoch": 0.37,
"learning_rate": 6.294884501029199e-06,
"loss": 2.0428,
"step": 4860
},
{
"epoch": 0.37,
"learning_rate": 6.287260806586872e-06,
"loss": 2.0347,
"step": 4870
},
{
"epoch": 0.37,
"learning_rate": 6.279637112144545e-06,
"loss": 2.0327,
"step": 4880
},
{
"epoch": 0.37,
"learning_rate": 6.272013417702218e-06,
"loss": 2.0364,
"step": 4890
},
{
"epoch": 0.37,
"learning_rate": 6.264389723259892e-06,
"loss": 2.0394,
"step": 4900
},
{
"epoch": 0.37,
"learning_rate": 6.2567660288175655e-06,
"loss": 2.0394,
"step": 4910
},
{
"epoch": 0.38,
"learning_rate": 6.249142334375239e-06,
"loss": 2.0299,
"step": 4920
},
{
"epoch": 0.38,
"learning_rate": 6.241518639932912e-06,
"loss": 2.0387,
"step": 4930
},
{
"epoch": 0.38,
"learning_rate": 6.233894945490585e-06,
"loss": 2.0266,
"step": 4940
},
{
"epoch": 0.38,
"learning_rate": 6.226271251048258e-06,
"loss": 2.0578,
"step": 4950
},
{
"epoch": 0.38,
"learning_rate": 6.218647556605931e-06,
"loss": 2.0301,
"step": 4960
},
{
"epoch": 0.38,
"learning_rate": 6.211023862163605e-06,
"loss": 2.0484,
"step": 4970
},
{
"epoch": 0.38,
"learning_rate": 6.203400167721278e-06,
"loss": 2.0398,
"step": 4980
},
{
"epoch": 0.38,
"learning_rate": 6.195776473278951e-06,
"loss": 2.0304,
"step": 4990
},
{
"epoch": 0.38,
"learning_rate": 6.1881527788366245e-06,
"loss": 2.0255,
"step": 5000
},
{
"epoch": 0.38,
"learning_rate": 6.180529084394298e-06,
"loss": 2.0346,
"step": 5010
},
{
"epoch": 0.38,
"learning_rate": 6.172905389951971e-06,
"loss": 2.0393,
"step": 5020
},
{
"epoch": 0.38,
"learning_rate": 6.165281695509644e-06,
"loss": 2.0346,
"step": 5030
},
{
"epoch": 0.38,
"learning_rate": 6.157658001067318e-06,
"loss": 2.0217,
"step": 5040
},
{
"epoch": 0.38,
"learning_rate": 6.150034306624991e-06,
"loss": 2.0355,
"step": 5050
},
{
"epoch": 0.39,
"learning_rate": 6.142410612182664e-06,
"loss": 2.035,
"step": 5060
},
{
"epoch": 0.39,
"learning_rate": 6.134786917740337e-06,
"loss": 2.0451,
"step": 5070
},
{
"epoch": 0.39,
"learning_rate": 6.12716322329801e-06,
"loss": 2.0346,
"step": 5080
},
{
"epoch": 0.39,
"learning_rate": 6.1195395288556835e-06,
"loss": 2.0338,
"step": 5090
},
{
"epoch": 0.39,
"learning_rate": 6.111915834413357e-06,
"loss": 2.0347,
"step": 5100
},
{
"epoch": 0.39,
"learning_rate": 6.104292139971031e-06,
"loss": 2.0337,
"step": 5110
},
{
"epoch": 0.39,
"learning_rate": 6.096668445528704e-06,
"loss": 2.0334,
"step": 5120
},
{
"epoch": 0.39,
"learning_rate": 6.089044751086377e-06,
"loss": 2.0412,
"step": 5130
},
{
"epoch": 0.39,
"learning_rate": 6.08142105664405e-06,
"loss": 2.0336,
"step": 5140
},
{
"epoch": 0.39,
"learning_rate": 6.073797362201723e-06,
"loss": 2.0422,
"step": 5150
},
{
"epoch": 0.39,
"learning_rate": 6.066173667759396e-06,
"loss": 2.0469,
"step": 5160
},
{
"epoch": 0.39,
"learning_rate": 6.0585499733170694e-06,
"loss": 2.0363,
"step": 5170
},
{
"epoch": 0.39,
"learning_rate": 6.050926278874743e-06,
"loss": 2.0424,
"step": 5180
},
{
"epoch": 0.4,
"learning_rate": 6.0433025844324165e-06,
"loss": 2.03,
"step": 5190
},
{
"epoch": 0.4,
"learning_rate": 6.03567888999009e-06,
"loss": 2.0365,
"step": 5200
},
{
"epoch": 0.4,
"learning_rate": 6.028055195547763e-06,
"loss": 2.0434,
"step": 5210
},
{
"epoch": 0.4,
"learning_rate": 6.020431501105436e-06,
"loss": 2.0355,
"step": 5220
},
{
"epoch": 0.4,
"learning_rate": 6.012807806663109e-06,
"loss": 2.0566,
"step": 5230
},
{
"epoch": 0.4,
"learning_rate": 6.005184112220783e-06,
"loss": 2.0328,
"step": 5240
},
{
"epoch": 0.4,
"learning_rate": 5.997560417778456e-06,
"loss": 2.0492,
"step": 5250
},
{
"epoch": 0.4,
"learning_rate": 5.989936723336129e-06,
"loss": 2.0398,
"step": 5260
},
{
"epoch": 0.4,
"learning_rate": 5.9823130288938024e-06,
"loss": 2.0492,
"step": 5270
},
{
"epoch": 0.4,
"learning_rate": 5.9746893344514756e-06,
"loss": 2.0533,
"step": 5280
},
{
"epoch": 0.4,
"learning_rate": 5.967065640009149e-06,
"loss": 2.041,
"step": 5290
},
{
"epoch": 0.4,
"learning_rate": 5.959441945566822e-06,
"loss": 2.05,
"step": 5300
},
{
"epoch": 0.4,
"learning_rate": 5.951818251124496e-06,
"loss": 2.048,
"step": 5310
},
{
"epoch": 0.41,
"learning_rate": 5.944194556682169e-06,
"loss": 2.0496,
"step": 5320
},
{
"epoch": 0.41,
"learning_rate": 5.936570862239842e-06,
"loss": 2.0385,
"step": 5330
},
{
"epoch": 0.41,
"learning_rate": 5.928947167797515e-06,
"loss": 2.0449,
"step": 5340
},
{
"epoch": 0.41,
"learning_rate": 5.921323473355188e-06,
"loss": 2.0557,
"step": 5350
},
{
"epoch": 0.41,
"learning_rate": 5.9136997789128614e-06,
"loss": 2.0461,
"step": 5360
},
{
"epoch": 0.41,
"learning_rate": 5.906076084470535e-06,
"loss": 2.0613,
"step": 5370
},
{
"epoch": 0.41,
"learning_rate": 5.8984523900282086e-06,
"loss": 2.0502,
"step": 5380
},
{
"epoch": 0.41,
"learning_rate": 5.890828695585882e-06,
"loss": 2.0527,
"step": 5390
},
{
"epoch": 0.41,
"learning_rate": 5.883205001143555e-06,
"loss": 2.0432,
"step": 5400
},
{
"epoch": 0.41,
"learning_rate": 5.875581306701228e-06,
"loss": 2.0625,
"step": 5410
},
{
"epoch": 0.41,
"learning_rate": 5.867957612258901e-06,
"loss": 2.0451,
"step": 5420
},
{
"epoch": 0.41,
"learning_rate": 5.860333917816574e-06,
"loss": 2.0502,
"step": 5430
},
{
"epoch": 0.41,
"learning_rate": 5.852710223374247e-06,
"loss": 2.0471,
"step": 5440
},
{
"epoch": 0.42,
"learning_rate": 5.845086528931921e-06,
"loss": 2.0594,
"step": 5450
},
{
"epoch": 0.42,
"learning_rate": 5.8374628344895944e-06,
"loss": 2.0691,
"step": 5460
},
{
"epoch": 0.42,
"learning_rate": 5.8298391400472676e-06,
"loss": 2.0635,
"step": 5470
},
{
"epoch": 0.42,
"learning_rate": 5.822215445604941e-06,
"loss": 2.0578,
"step": 5480
},
{
"epoch": 0.42,
"learning_rate": 5.814591751162614e-06,
"loss": 2.0492,
"step": 5490
},
{
"epoch": 0.42,
"learning_rate": 5.806968056720287e-06,
"loss": 2.0477,
"step": 5500
},
{
"epoch": 0.42,
"learning_rate": 5.79934436227796e-06,
"loss": 2.0588,
"step": 5510
},
{
"epoch": 0.42,
"learning_rate": 5.791720667835634e-06,
"loss": 2.0828,
"step": 5520
},
{
"epoch": 0.42,
"learning_rate": 5.784096973393307e-06,
"loss": 2.0654,
"step": 5530
},
{
"epoch": 0.42,
"learning_rate": 5.77647327895098e-06,
"loss": 2.0557,
"step": 5540
},
{
"epoch": 0.42,
"learning_rate": 5.7688495845086535e-06,
"loss": 2.0631,
"step": 5550
},
{
"epoch": 0.42,
"learning_rate": 5.761225890066327e-06,
"loss": 2.0498,
"step": 5560
},
{
"epoch": 0.42,
"learning_rate": 5.753602195624e-06,
"loss": 2.0621,
"step": 5570
},
{
"epoch": 0.43,
"learning_rate": 5.745978501181673e-06,
"loss": 2.0678,
"step": 5580
},
{
"epoch": 0.43,
"learning_rate": 5.738354806739347e-06,
"loss": 2.0576,
"step": 5590
},
{
"epoch": 0.43,
"learning_rate": 5.73073111229702e-06,
"loss": 2.0664,
"step": 5600
},
{
"epoch": 0.43,
"learning_rate": 5.723107417854693e-06,
"loss": 2.0656,
"step": 5610
},
{
"epoch": 0.43,
"learning_rate": 5.715483723412366e-06,
"loss": 2.0602,
"step": 5620
},
{
"epoch": 0.43,
"learning_rate": 5.707860028970039e-06,
"loss": 2.0629,
"step": 5630
},
{
"epoch": 0.43,
"learning_rate": 5.7002363345277125e-06,
"loss": 2.0559,
"step": 5640
},
{
"epoch": 0.43,
"learning_rate": 5.692612640085386e-06,
"loss": 2.0746,
"step": 5650
},
{
"epoch": 0.43,
"learning_rate": 5.68498894564306e-06,
"loss": 2.0811,
"step": 5660
},
{
"epoch": 0.43,
"learning_rate": 5.677365251200733e-06,
"loss": 2.066,
"step": 5670
},
{
"epoch": 0.43,
"learning_rate": 5.669741556758406e-06,
"loss": 2.067,
"step": 5680
},
{
"epoch": 0.43,
"learning_rate": 5.662117862316079e-06,
"loss": 2.0666,
"step": 5690
},
{
"epoch": 0.43,
"learning_rate": 5.654494167873752e-06,
"loss": 2.0752,
"step": 5700
},
{
"epoch": 0.44,
"learning_rate": 5.646870473431425e-06,
"loss": 2.0674,
"step": 5710
},
{
"epoch": 0.44,
"learning_rate": 5.639246778989098e-06,
"loss": 2.0709,
"step": 5720
},
{
"epoch": 0.44,
"learning_rate": 5.631623084546772e-06,
"loss": 2.0773,
"step": 5730
},
{
"epoch": 0.44,
"learning_rate": 5.6239993901044455e-06,
"loss": 2.0711,
"step": 5740
},
{
"epoch": 0.44,
"learning_rate": 5.616375695662119e-06,
"loss": 2.0736,
"step": 5750
},
{
"epoch": 0.44,
"learning_rate": 5.608752001219792e-06,
"loss": 2.0818,
"step": 5760
},
{
"epoch": 0.44,
"learning_rate": 5.601128306777465e-06,
"loss": 2.0799,
"step": 5770
},
{
"epoch": 0.44,
"learning_rate": 5.593504612335138e-06,
"loss": 2.0695,
"step": 5780
},
{
"epoch": 0.44,
"learning_rate": 5.585880917892811e-06,
"loss": 2.0869,
"step": 5790
},
{
"epoch": 0.44,
"learning_rate": 5.578257223450485e-06,
"loss": 2.0715,
"step": 5800
},
{
"epoch": 0.44,
"learning_rate": 5.570633529008158e-06,
"loss": 2.0811,
"step": 5810
},
{
"epoch": 0.44,
"learning_rate": 5.563009834565831e-06,
"loss": 2.0723,
"step": 5820
},
{
"epoch": 0.44,
"learning_rate": 5.5553861401235045e-06,
"loss": 2.076,
"step": 5830
},
{
"epoch": 0.45,
"learning_rate": 5.547762445681178e-06,
"loss": 2.0818,
"step": 5840
},
{
"epoch": 0.45,
"learning_rate": 5.540138751238851e-06,
"loss": 2.0766,
"step": 5850
},
{
"epoch": 0.45,
"learning_rate": 5.532515056796524e-06,
"loss": 2.0785,
"step": 5860
},
{
"epoch": 0.45,
"learning_rate": 5.524891362354198e-06,
"loss": 2.0932,
"step": 5870
},
{
"epoch": 0.45,
"learning_rate": 5.517267667911871e-06,
"loss": 2.0781,
"step": 5880
},
{
"epoch": 0.45,
"learning_rate": 5.509643973469544e-06,
"loss": 2.0846,
"step": 5890
},
{
"epoch": 0.45,
"learning_rate": 5.502020279027217e-06,
"loss": 2.0805,
"step": 5900
},
{
"epoch": 0.45,
"learning_rate": 5.49439658458489e-06,
"loss": 2.0871,
"step": 5910
},
{
"epoch": 0.45,
"learning_rate": 5.4867728901425635e-06,
"loss": 2.076,
"step": 5920
},
{
"epoch": 0.45,
"learning_rate": 5.479149195700237e-06,
"loss": 2.0955,
"step": 5930
},
{
"epoch": 0.45,
"learning_rate": 5.471525501257911e-06,
"loss": 2.0824,
"step": 5940
},
{
"epoch": 0.45,
"learning_rate": 5.463901806815584e-06,
"loss": 2.0908,
"step": 5950
},
{
"epoch": 0.45,
"learning_rate": 5.456278112373257e-06,
"loss": 2.0912,
"step": 5960
},
{
"epoch": 0.46,
"learning_rate": 5.44865441793093e-06,
"loss": 2.0803,
"step": 5970
},
{
"epoch": 0.46,
"learning_rate": 5.441030723488603e-06,
"loss": 2.0879,
"step": 5980
},
{
"epoch": 0.46,
"learning_rate": 5.433407029046276e-06,
"loss": 2.0838,
"step": 5990
},
{
"epoch": 0.46,
"learning_rate": 5.425783334603949e-06,
"loss": 2.0855,
"step": 6000
},
{
"epoch": 0.46,
"learning_rate": 5.418159640161623e-06,
"loss": 2.0756,
"step": 6010
},
{
"epoch": 0.46,
"learning_rate": 5.4105359457192965e-06,
"loss": 2.0959,
"step": 6020
},
{
"epoch": 0.46,
"learning_rate": 5.40291225127697e-06,
"loss": 2.1043,
"step": 6030
},
{
"epoch": 0.46,
"learning_rate": 5.395288556834643e-06,
"loss": 2.0967,
"step": 6040
},
{
"epoch": 0.46,
"learning_rate": 5.387664862392316e-06,
"loss": 2.0955,
"step": 6050
},
{
"epoch": 0.46,
"learning_rate": 5.380041167949989e-06,
"loss": 2.109,
"step": 6060
},
{
"epoch": 0.46,
"learning_rate": 5.372417473507661e-06,
"loss": 2.0945,
"step": 6070
},
{
"epoch": 0.46,
"learning_rate": 5.364793779065336e-06,
"loss": 2.0941,
"step": 6080
},
{
"epoch": 0.46,
"learning_rate": 5.357170084623009e-06,
"loss": 2.0947,
"step": 6090
},
{
"epoch": 0.47,
"learning_rate": 5.349546390180682e-06,
"loss": 2.1063,
"step": 6100
},
{
"epoch": 0.47,
"learning_rate": 5.3419226957383556e-06,
"loss": 2.0994,
"step": 6110
},
{
"epoch": 0.47,
"learning_rate": 5.334299001296029e-06,
"loss": 2.0977,
"step": 6120
},
{
"epoch": 0.47,
"learning_rate": 5.326675306853701e-06,
"loss": 2.1,
"step": 6130
},
{
"epoch": 0.47,
"learning_rate": 5.319051612411374e-06,
"loss": 2.1055,
"step": 6140
},
{
"epoch": 0.47,
"learning_rate": 5.311427917969049e-06,
"loss": 2.1027,
"step": 6150
},
{
"epoch": 0.47,
"learning_rate": 5.303804223526722e-06,
"loss": 2.0982,
"step": 6160
},
{
"epoch": 0.47,
"learning_rate": 5.296180529084395e-06,
"loss": 2.1033,
"step": 6170
},
{
"epoch": 0.47,
"learning_rate": 5.288556834642068e-06,
"loss": 2.1006,
"step": 6180
},
{
"epoch": 0.47,
"learning_rate": 5.280933140199741e-06,
"loss": 2.1041,
"step": 6190
},
{
"epoch": 0.47,
"learning_rate": 5.273309445757414e-06,
"loss": 2.1023,
"step": 6200
},
{
"epoch": 0.47,
"learning_rate": 5.265685751315087e-06,
"loss": 2.101,
"step": 6210
},
{
"epoch": 0.47,
"learning_rate": 5.258062056872762e-06,
"loss": 2.1107,
"step": 6220
},
{
"epoch": 0.47,
"learning_rate": 5.250438362430435e-06,
"loss": 2.1088,
"step": 6230
},
{
"epoch": 0.48,
"learning_rate": 5.242814667988108e-06,
"loss": 2.1156,
"step": 6240
},
{
"epoch": 0.48,
"learning_rate": 5.235190973545781e-06,
"loss": 2.1107,
"step": 6250
},
{
"epoch": 0.48,
"learning_rate": 5.227567279103453e-06,
"loss": 2.1043,
"step": 6260
},
{
"epoch": 0.48,
"learning_rate": 5.2199435846611265e-06,
"loss": 2.1189,
"step": 6270
},
{
"epoch": 0.48,
"learning_rate": 5.2123198902188e-06,
"loss": 2.1127,
"step": 6280
},
{
"epoch": 0.48,
"learning_rate": 5.2046961957764744e-06,
"loss": 2.1166,
"step": 6290
},
{
"epoch": 0.48,
"learning_rate": 5.1970725013341476e-06,
"loss": 2.1162,
"step": 6300
},
{
"epoch": 0.48,
"learning_rate": 5.189448806891821e-06,
"loss": 2.1133,
"step": 6310
},
{
"epoch": 0.48,
"learning_rate": 5.181825112449493e-06,
"loss": 2.1123,
"step": 6320
},
{
"epoch": 0.48,
"learning_rate": 5.174201418007166e-06,
"loss": 2.1129,
"step": 6330
},
{
"epoch": 0.48,
"learning_rate": 5.166577723564839e-06,
"loss": 2.1121,
"step": 6340
},
{
"epoch": 0.48,
"learning_rate": 5.158954029122512e-06,
"loss": 2.1117,
"step": 6350
},
{
"epoch": 0.48,
"learning_rate": 5.151330334680187e-06,
"loss": 2.1299,
"step": 6360
},
{
"epoch": 0.49,
"learning_rate": 5.14370664023786e-06,
"loss": 2.1189,
"step": 6370
},
{
"epoch": 0.49,
"learning_rate": 5.136082945795533e-06,
"loss": 2.115,
"step": 6380
},
{
"epoch": 0.49,
"learning_rate": 5.128459251353206e-06,
"loss": 2.1154,
"step": 6390
},
{
"epoch": 0.49,
"learning_rate": 5.120835556910879e-06,
"loss": 2.1184,
"step": 6400
},
{
"epoch": 0.49,
"learning_rate": 5.113211862468552e-06,
"loss": 2.1283,
"step": 6410
},
{
"epoch": 0.49,
"learning_rate": 5.105588168026227e-06,
"loss": 2.1189,
"step": 6420
},
{
"epoch": 0.49,
"learning_rate": 5.0979644735839e-06,
"loss": 2.1111,
"step": 6430
},
{
"epoch": 0.49,
"learning_rate": 5.090340779141572e-06,
"loss": 2.1221,
"step": 6440
},
{
"epoch": 0.49,
"learning_rate": 5.082717084699245e-06,
"loss": 2.1205,
"step": 6450
},
{
"epoch": 0.49,
"learning_rate": 5.0750933902569185e-06,
"loss": 2.1203,
"step": 6460
},
{
"epoch": 0.49,
"learning_rate": 5.067469695814592e-06,
"loss": 2.1207,
"step": 6470
},
{
"epoch": 0.49,
"learning_rate": 5.059846001372265e-06,
"loss": 2.1307,
"step": 6480
},
{
"epoch": 0.49,
"learning_rate": 5.05222230692994e-06,
"loss": 2.1279,
"step": 6490
},
{
"epoch": 0.5,
"learning_rate": 5.044598612487612e-06,
"loss": 2.1328,
"step": 6500
},
{
"epoch": 0.5,
"learning_rate": 5.036974918045285e-06,
"loss": 2.1258,
"step": 6510
},
{
"epoch": 0.5,
"learning_rate": 5.029351223602958e-06,
"loss": 2.1244,
"step": 6520
},
{
"epoch": 0.5,
"learning_rate": 5.021727529160631e-06,
"loss": 2.1348,
"step": 6530
},
{
"epoch": 0.5,
"learning_rate": 5.014103834718304e-06,
"loss": 2.127,
"step": 6540
},
{
"epoch": 0.5,
"learning_rate": 5.0064801402759775e-06,
"loss": 2.1283,
"step": 6550
},
{
"epoch": 0.5,
"learning_rate": 4.9988564458336515e-06,
"loss": 2.1297,
"step": 6560
},
{
"epoch": 0.5,
"learning_rate": 4.991232751391325e-06,
"loss": 2.1398,
"step": 6570
},
{
"epoch": 0.5,
"learning_rate": 4.983609056948998e-06,
"loss": 2.1322,
"step": 6580
},
{
"epoch": 0.5,
"learning_rate": 4.975985362506671e-06,
"loss": 2.1365,
"step": 6590
},
{
"epoch": 0.5,
"learning_rate": 4.968361668064344e-06,
"loss": 2.1221,
"step": 6600
},
{
"epoch": 0.5,
"learning_rate": 4.960737973622017e-06,
"loss": 2.1385,
"step": 6610
},
{
"epoch": 0.5,
"learning_rate": 4.953114279179691e-06,
"loss": 2.1387,
"step": 6620
},
{
"epoch": 0.51,
"learning_rate": 4.945490584737364e-06,
"loss": 2.1324,
"step": 6630
},
{
"epoch": 0.51,
"learning_rate": 4.937866890295037e-06,
"loss": 2.1234,
"step": 6640
},
{
"epoch": 0.51,
"learning_rate": 4.9302431958527105e-06,
"loss": 2.1393,
"step": 6650
},
{
"epoch": 0.51,
"learning_rate": 4.922619501410384e-06,
"loss": 2.1354,
"step": 6660
},
{
"epoch": 0.51,
"learning_rate": 4.914995806968057e-06,
"loss": 2.1361,
"step": 6670
},
{
"epoch": 0.51,
"learning_rate": 4.907372112525731e-06,
"loss": 2.1305,
"step": 6680
},
{
"epoch": 0.51,
"learning_rate": 4.899748418083404e-06,
"loss": 2.1346,
"step": 6690
},
{
"epoch": 0.51,
"learning_rate": 4.892124723641077e-06,
"loss": 2.1426,
"step": 6700
},
{
"epoch": 0.51,
"learning_rate": 4.88450102919875e-06,
"loss": 2.1357,
"step": 6710
},
{
"epoch": 0.51,
"learning_rate": 4.876877334756423e-06,
"loss": 2.1373,
"step": 6720
},
{
"epoch": 0.51,
"learning_rate": 4.869253640314096e-06,
"loss": 2.1367,
"step": 6730
},
{
"epoch": 0.51,
"learning_rate": 4.8616299458717695e-06,
"loss": 2.1273,
"step": 6740
},
{
"epoch": 0.51,
"learning_rate": 4.8540062514294435e-06,
"loss": 2.1436,
"step": 6750
},
{
"epoch": 0.52,
"learning_rate": 4.846382556987117e-06,
"loss": 2.1396,
"step": 6760
},
{
"epoch": 0.52,
"learning_rate": 4.83875886254479e-06,
"loss": 2.1492,
"step": 6770
},
{
"epoch": 0.52,
"learning_rate": 4.831135168102463e-06,
"loss": 2.1594,
"step": 6780
},
{
"epoch": 0.52,
"learning_rate": 4.823511473660136e-06,
"loss": 2.1484,
"step": 6790
},
{
"epoch": 0.52,
"learning_rate": 4.815887779217809e-06,
"loss": 2.1502,
"step": 6800
},
{
"epoch": 0.52,
"learning_rate": 4.808264084775482e-06,
"loss": 2.1383,
"step": 6810
},
{
"epoch": 0.52,
"learning_rate": 4.800640390333156e-06,
"loss": 2.1465,
"step": 6820
},
{
"epoch": 0.52,
"learning_rate": 4.793016695890829e-06,
"loss": 2.1533,
"step": 6830
},
{
"epoch": 0.52,
"learning_rate": 4.7853930014485025e-06,
"loss": 2.1541,
"step": 6840
},
{
"epoch": 0.52,
"learning_rate": 4.777769307006176e-06,
"loss": 2.149,
"step": 6850
},
{
"epoch": 0.52,
"learning_rate": 4.770145612563849e-06,
"loss": 2.1529,
"step": 6860
},
{
"epoch": 0.52,
"learning_rate": 4.762521918121522e-06,
"loss": 2.1562,
"step": 6870
},
{
"epoch": 0.52,
"learning_rate": 4.754898223679195e-06,
"loss": 2.1553,
"step": 6880
},
{
"epoch": 0.53,
"learning_rate": 4.747274529236869e-06,
"loss": 2.1527,
"step": 6890
},
{
"epoch": 0.53,
"learning_rate": 4.739650834794542e-06,
"loss": 2.1514,
"step": 6900
},
{
"epoch": 0.53,
"learning_rate": 4.732027140352215e-06,
"loss": 2.1623,
"step": 6910
},
{
"epoch": 0.53,
"learning_rate": 4.7244034459098884e-06,
"loss": 2.1709,
"step": 6920
},
{
"epoch": 0.53,
"learning_rate": 4.7167797514675616e-06,
"loss": 2.1619,
"step": 6930
},
{
"epoch": 0.53,
"learning_rate": 4.709156057025235e-06,
"loss": 2.1502,
"step": 6940
},
{
"epoch": 0.53,
"learning_rate": 4.701532362582908e-06,
"loss": 2.1461,
"step": 6950
},
{
"epoch": 0.53,
"learning_rate": 4.693908668140582e-06,
"loss": 2.1596,
"step": 6960
},
{
"epoch": 0.53,
"learning_rate": 4.686284973698255e-06,
"loss": 2.1648,
"step": 6970
},
{
"epoch": 0.53,
"learning_rate": 4.678661279255928e-06,
"loss": 2.1654,
"step": 6980
},
{
"epoch": 0.53,
"learning_rate": 4.671037584813601e-06,
"loss": 2.1627,
"step": 6990
},
{
"epoch": 0.53,
"learning_rate": 4.663413890371274e-06,
"loss": 2.1688,
"step": 7000
},
{
"epoch": 0.53,
"learning_rate": 4.6557901959289475e-06,
"loss": 2.1623,
"step": 7010
},
{
"epoch": 0.54,
"learning_rate": 4.648166501486621e-06,
"loss": 2.1662,
"step": 7020
},
{
"epoch": 0.54,
"learning_rate": 4.6405428070442946e-06,
"loss": 2.1654,
"step": 7030
},
{
"epoch": 0.54,
"learning_rate": 4.632919112601968e-06,
"loss": 2.1582,
"step": 7040
},
{
"epoch": 0.54,
"learning_rate": 4.625295418159641e-06,
"loss": 2.1562,
"step": 7050
},
{
"epoch": 0.54,
"learning_rate": 4.617671723717314e-06,
"loss": 2.1633,
"step": 7060
},
{
"epoch": 0.54,
"learning_rate": 4.610048029274987e-06,
"loss": 2.1588,
"step": 7070
},
{
"epoch": 0.54,
"learning_rate": 4.60242433483266e-06,
"loss": 2.1645,
"step": 7080
},
{
"epoch": 0.54,
"learning_rate": 4.594800640390333e-06,
"loss": 2.1621,
"step": 7090
},
{
"epoch": 0.54,
"learning_rate": 4.587176945948007e-06,
"loss": 2.1658,
"step": 7100
},
{
"epoch": 0.54,
"learning_rate": 4.5795532515056805e-06,
"loss": 2.1678,
"step": 7110
},
{
"epoch": 0.54,
"learning_rate": 4.571929557063354e-06,
"loss": 2.1645,
"step": 7120
},
{
"epoch": 0.54,
"learning_rate": 4.564305862621026e-06,
"loss": 2.1699,
"step": 7130
},
{
"epoch": 0.54,
"learning_rate": 4.5566821681787e-06,
"loss": 2.1736,
"step": 7140
},
{
"epoch": 0.55,
"learning_rate": 4.549058473736373e-06,
"loss": 2.167,
"step": 7150
},
{
"epoch": 0.55,
"learning_rate": 4.541434779294046e-06,
"loss": 2.177,
"step": 7160
},
{
"epoch": 0.55,
"learning_rate": 4.53381108485172e-06,
"loss": 2.1752,
"step": 7170
},
{
"epoch": 0.55,
"learning_rate": 4.526187390409393e-06,
"loss": 2.1721,
"step": 7180
},
{
"epoch": 0.55,
"learning_rate": 4.5185636959670655e-06,
"loss": 2.1844,
"step": 7190
},
{
"epoch": 0.55,
"learning_rate": 4.510940001524739e-06,
"loss": 2.1762,
"step": 7200
},
{
"epoch": 0.55,
"learning_rate": 4.503316307082413e-06,
"loss": 2.182,
"step": 7210
},
{
"epoch": 0.55,
"learning_rate": 4.495692612640086e-06,
"loss": 2.1824,
"step": 7220
},
{
"epoch": 0.55,
"learning_rate": 4.488068918197759e-06,
"loss": 2.175,
"step": 7230
},
{
"epoch": 0.55,
"learning_rate": 4.480445223755433e-06,
"loss": 2.1848,
"step": 7240
},
{
"epoch": 0.55,
"learning_rate": 4.472821529313105e-06,
"loss": 2.1758,
"step": 7250
},
{
"epoch": 0.55,
"learning_rate": 4.465197834870778e-06,
"loss": 2.1885,
"step": 7260
},
{
"epoch": 0.55,
"learning_rate": 4.457574140428452e-06,
"loss": 2.1809,
"step": 7270
},
{
"epoch": 0.56,
"learning_rate": 4.449950445986125e-06,
"loss": 2.1908,
"step": 7280
},
{
"epoch": 0.56,
"learning_rate": 4.4423267515437985e-06,
"loss": 2.183,
"step": 7290
},
{
"epoch": 0.56,
"learning_rate": 4.434703057101472e-06,
"loss": 2.1826,
"step": 7300
},
{
"epoch": 0.56,
"learning_rate": 4.427079362659145e-06,
"loss": 2.1908,
"step": 7310
},
{
"epoch": 0.56,
"learning_rate": 4.419455668216818e-06,
"loss": 2.1854,
"step": 7320
},
{
"epoch": 0.56,
"learning_rate": 4.411831973774491e-06,
"loss": 2.1916,
"step": 7330
},
{
"epoch": 0.56,
"learning_rate": 4.404208279332165e-06,
"loss": 2.1836,
"step": 7340
},
{
"epoch": 0.56,
"learning_rate": 4.396584584889838e-06,
"loss": 2.1855,
"step": 7350
},
{
"epoch": 0.56,
"learning_rate": 4.388960890447511e-06,
"loss": 2.1895,
"step": 7360
},
{
"epoch": 0.56,
"learning_rate": 4.381337196005184e-06,
"loss": 2.1855,
"step": 7370
},
{
"epoch": 0.56,
"learning_rate": 4.3737135015628575e-06,
"loss": 2.1826,
"step": 7380
},
{
"epoch": 0.56,
"learning_rate": 4.366089807120531e-06,
"loss": 2.1926,
"step": 7390
},
{
"epoch": 0.56,
"learning_rate": 4.358466112678204e-06,
"loss": 2.1895,
"step": 7400
},
{
"epoch": 0.56,
"learning_rate": 4.350842418235878e-06,
"loss": 2.1973,
"step": 7410
},
{
"epoch": 0.57,
"learning_rate": 4.343218723793551e-06,
"loss": 2.1936,
"step": 7420
},
{
"epoch": 0.57,
"learning_rate": 4.335595029351224e-06,
"loss": 2.1982,
"step": 7430
},
{
"epoch": 0.57,
"learning_rate": 4.327971334908897e-06,
"loss": 2.1973,
"step": 7440
},
{
"epoch": 0.57,
"learning_rate": 4.32034764046657e-06,
"loss": 2.1963,
"step": 7450
},
{
"epoch": 0.57,
"learning_rate": 4.312723946024243e-06,
"loss": 2.193,
"step": 7460
},
{
"epoch": 0.57,
"learning_rate": 4.3051002515819165e-06,
"loss": 2.2084,
"step": 7470
},
{
"epoch": 0.57,
"learning_rate": 4.2974765571395905e-06,
"loss": 2.2049,
"step": 7480
},
{
"epoch": 0.57,
"learning_rate": 4.289852862697264e-06,
"loss": 2.1889,
"step": 7490
},
{
"epoch": 0.57,
"learning_rate": 4.282229168254937e-06,
"loss": 2.1986,
"step": 7500
},
{
"epoch": 0.57,
"learning_rate": 4.27460547381261e-06,
"loss": 2.1945,
"step": 7510
},
{
"epoch": 0.57,
"learning_rate": 4.266981779370283e-06,
"loss": 2.1998,
"step": 7520
},
{
"epoch": 0.57,
"learning_rate": 4.259358084927956e-06,
"loss": 2.1986,
"step": 7530
},
{
"epoch": 0.57,
"learning_rate": 4.251734390485629e-06,
"loss": 2.201,
"step": 7540
},
{
"epoch": 0.58,
"learning_rate": 4.244110696043303e-06,
"loss": 2.2078,
"step": 7550
},
{
"epoch": 0.58,
"learning_rate": 4.236487001600976e-06,
"loss": 2.2041,
"step": 7560
},
{
"epoch": 0.58,
"learning_rate": 4.2288633071586495e-06,
"loss": 2.2051,
"step": 7570
},
{
"epoch": 0.58,
"learning_rate": 4.221239612716323e-06,
"loss": 2.207,
"step": 7580
},
{
"epoch": 0.58,
"learning_rate": 4.213615918273996e-06,
"loss": 2.2041,
"step": 7590
},
{
"epoch": 0.58,
"learning_rate": 4.205992223831669e-06,
"loss": 2.2074,
"step": 7600
},
{
"epoch": 0.58,
"learning_rate": 4.198368529389342e-06,
"loss": 2.2004,
"step": 7610
},
{
"epoch": 0.58,
"learning_rate": 4.190744834947016e-06,
"loss": 2.2121,
"step": 7620
},
{
"epoch": 0.58,
"learning_rate": 4.183121140504689e-06,
"loss": 2.2131,
"step": 7630
},
{
"epoch": 0.58,
"learning_rate": 4.175497446062362e-06,
"loss": 2.2131,
"step": 7640
},
{
"epoch": 0.58,
"learning_rate": 4.1678737516200354e-06,
"loss": 2.2043,
"step": 7650
},
{
"epoch": 0.58,
"learning_rate": 4.1602500571777086e-06,
"loss": 2.2057,
"step": 7660
},
{
"epoch": 0.58,
"learning_rate": 4.152626362735382e-06,
"loss": 2.2066,
"step": 7670
},
{
"epoch": 0.59,
"learning_rate": 4.145002668293055e-06,
"loss": 2.2213,
"step": 7680
},
{
"epoch": 0.59,
"learning_rate": 4.137378973850729e-06,
"loss": 2.2234,
"step": 7690
},
{
"epoch": 0.59,
"learning_rate": 4.129755279408402e-06,
"loss": 2.2174,
"step": 7700
},
{
"epoch": 0.59,
"learning_rate": 4.122131584966075e-06,
"loss": 2.2186,
"step": 7710
},
{
"epoch": 0.59,
"learning_rate": 4.114507890523748e-06,
"loss": 2.2115,
"step": 7720
},
{
"epoch": 0.59,
"learning_rate": 4.106884196081421e-06,
"loss": 2.2293,
"step": 7730
},
{
"epoch": 0.59,
"learning_rate": 4.0992605016390945e-06,
"loss": 2.2129,
"step": 7740
},
{
"epoch": 0.59,
"learning_rate": 4.091636807196768e-06,
"loss": 2.2178,
"step": 7750
},
{
"epoch": 0.59,
"learning_rate": 4.0840131127544416e-06,
"loss": 2.2182,
"step": 7760
},
{
"epoch": 0.59,
"learning_rate": 4.076389418312115e-06,
"loss": 2.227,
"step": 7770
},
{
"epoch": 0.59,
"learning_rate": 4.068765723869788e-06,
"loss": 2.2273,
"step": 7780
},
{
"epoch": 0.59,
"learning_rate": 4.061142029427461e-06,
"loss": 2.2225,
"step": 7790
},
{
"epoch": 0.59,
"learning_rate": 4.053518334985134e-06,
"loss": 2.2307,
"step": 7800
},
{
"epoch": 0.6,
"learning_rate": 4.045894640542807e-06,
"loss": 2.2209,
"step": 7810
},
{
"epoch": 0.6,
"learning_rate": 4.03827094610048e-06,
"loss": 2.2314,
"step": 7820
},
{
"epoch": 0.6,
"learning_rate": 4.030647251658154e-06,
"loss": 2.2258,
"step": 7830
},
{
"epoch": 0.6,
"learning_rate": 4.0230235572158275e-06,
"loss": 2.233,
"step": 7840
},
{
"epoch": 0.6,
"learning_rate": 4.015399862773501e-06,
"loss": 2.2275,
"step": 7850
},
{
"epoch": 0.6,
"learning_rate": 4.007776168331174e-06,
"loss": 2.2271,
"step": 7860
},
{
"epoch": 0.6,
"learning_rate": 4.000152473888847e-06,
"loss": 2.2324,
"step": 7870
},
{
"epoch": 0.6,
"learning_rate": 3.99252877944652e-06,
"loss": 2.2418,
"step": 7880
},
{
"epoch": 0.6,
"learning_rate": 3.984905085004193e-06,
"loss": 2.2348,
"step": 7890
},
{
"epoch": 0.6,
"learning_rate": 3.977281390561867e-06,
"loss": 2.2416,
"step": 7900
},
{
"epoch": 0.6,
"learning_rate": 3.96965769611954e-06,
"loss": 2.2408,
"step": 7910
},
{
"epoch": 0.6,
"learning_rate": 3.962034001677213e-06,
"loss": 2.2395,
"step": 7920
},
{
"epoch": 0.6,
"learning_rate": 3.9544103072348865e-06,
"loss": 2.2328,
"step": 7930
},
{
"epoch": 0.61,
"learning_rate": 3.94678661279256e-06,
"loss": 2.2471,
"step": 7940
},
{
"epoch": 0.61,
"learning_rate": 3.939162918350233e-06,
"loss": 2.2352,
"step": 7950
},
{
"epoch": 0.61,
"learning_rate": 3.931539223907906e-06,
"loss": 2.2406,
"step": 7960
},
{
"epoch": 0.61,
"learning_rate": 3.92391552946558e-06,
"loss": 2.2322,
"step": 7970
},
{
"epoch": 0.61,
"learning_rate": 3.916291835023253e-06,
"loss": 2.2447,
"step": 7980
},
{
"epoch": 0.61,
"learning_rate": 3.908668140580926e-06,
"loss": 2.2412,
"step": 7990
},
{
"epoch": 0.61,
"learning_rate": 3.901044446138599e-06,
"loss": 2.2412,
"step": 8000
},
{
"epoch": 0.61,
"learning_rate": 3.893420751696272e-06,
"loss": 2.2361,
"step": 8010
},
{
"epoch": 0.61,
"learning_rate": 3.8857970572539455e-06,
"loss": 2.2393,
"step": 8020
},
{
"epoch": 0.61,
"learning_rate": 3.878173362811619e-06,
"loss": 2.2414,
"step": 8030
},
{
"epoch": 0.61,
"learning_rate": 3.870549668369293e-06,
"loss": 2.2477,
"step": 8040
},
{
"epoch": 0.61,
"learning_rate": 3.862925973926966e-06,
"loss": 2.2484,
"step": 8050
},
{
"epoch": 0.61,
"learning_rate": 3.855302279484638e-06,
"loss": 2.2463,
"step": 8060
},
{
"epoch": 0.62,
"learning_rate": 3.847678585042312e-06,
"loss": 2.2463,
"step": 8070
},
{
"epoch": 0.62,
"learning_rate": 3.840054890599985e-06,
"loss": 2.2387,
"step": 8080
},
{
"epoch": 0.62,
"learning_rate": 3.832431196157658e-06,
"loss": 2.2482,
"step": 8090
},
{
"epoch": 0.62,
"learning_rate": 3.824807501715331e-06,
"loss": 2.249,
"step": 8100
},
{
"epoch": 0.62,
"learning_rate": 3.817183807273005e-06,
"loss": 2.251,
"step": 8110
},
{
"epoch": 0.62,
"learning_rate": 3.809560112830678e-06,
"loss": 2.2467,
"step": 8120
},
{
"epoch": 0.62,
"learning_rate": 3.801936418388351e-06,
"loss": 2.2529,
"step": 8130
},
{
"epoch": 0.62,
"learning_rate": 3.7943127239460248e-06,
"loss": 2.25,
"step": 8140
},
{
"epoch": 0.62,
"learning_rate": 3.786689029503698e-06,
"loss": 2.2533,
"step": 8150
},
{
"epoch": 0.62,
"learning_rate": 3.779065335061371e-06,
"loss": 2.2539,
"step": 8160
},
{
"epoch": 0.62,
"learning_rate": 3.771441640619044e-06,
"loss": 2.2549,
"step": 8170
},
{
"epoch": 0.62,
"learning_rate": 3.7638179461767177e-06,
"loss": 2.2439,
"step": 8180
},
{
"epoch": 0.62,
"learning_rate": 3.756194251734391e-06,
"loss": 2.2607,
"step": 8190
},
{
"epoch": 0.63,
"learning_rate": 3.748570557292064e-06,
"loss": 2.2596,
"step": 8200
},
{
"epoch": 0.63,
"learning_rate": 3.7409468628497375e-06,
"loss": 2.2547,
"step": 8210
},
{
"epoch": 0.63,
"learning_rate": 3.7333231684074107e-06,
"loss": 2.2695,
"step": 8220
},
{
"epoch": 0.63,
"learning_rate": 3.7256994739650838e-06,
"loss": 2.2631,
"step": 8230
},
{
"epoch": 0.63,
"learning_rate": 3.718075779522757e-06,
"loss": 2.2543,
"step": 8240
},
{
"epoch": 0.63,
"learning_rate": 3.7104520850804305e-06,
"loss": 2.2664,
"step": 8250
},
{
"epoch": 0.63,
"learning_rate": 3.7028283906381036e-06,
"loss": 2.2568,
"step": 8260
},
{
"epoch": 0.63,
"learning_rate": 3.6952046961957767e-06,
"loss": 2.2645,
"step": 8270
},
{
"epoch": 0.63,
"learning_rate": 3.6875810017534503e-06,
"loss": 2.2658,
"step": 8280
},
{
"epoch": 0.63,
"learning_rate": 3.6799573073111234e-06,
"loss": 2.2654,
"step": 8290
},
{
"epoch": 0.63,
"learning_rate": 3.6723336128687965e-06,
"loss": 2.2682,
"step": 8300
},
{
"epoch": 0.63,
"learning_rate": 3.6647099184264697e-06,
"loss": 2.2715,
"step": 8310
},
{
"epoch": 0.63,
"learning_rate": 3.6570862239841432e-06,
"loss": 2.2707,
"step": 8320
},
{
"epoch": 0.64,
"learning_rate": 3.6494625295418164e-06,
"loss": 2.2648,
"step": 8330
},
{
"epoch": 0.64,
"learning_rate": 3.6418388350994895e-06,
"loss": 2.2693,
"step": 8340
},
{
"epoch": 0.64,
"learning_rate": 3.634215140657163e-06,
"loss": 2.2752,
"step": 8350
},
{
"epoch": 0.64,
"learning_rate": 3.626591446214836e-06,
"loss": 2.2711,
"step": 8360
},
{
"epoch": 0.64,
"learning_rate": 3.6189677517725093e-06,
"loss": 2.2746,
"step": 8370
},
{
"epoch": 0.64,
"learning_rate": 3.6113440573301824e-06,
"loss": 2.2654,
"step": 8380
},
{
"epoch": 0.64,
"learning_rate": 3.603720362887856e-06,
"loss": 2.2721,
"step": 8390
},
{
"epoch": 0.64,
"learning_rate": 3.596096668445529e-06,
"loss": 2.2746,
"step": 8400
},
{
"epoch": 0.64,
"learning_rate": 3.5884729740032022e-06,
"loss": 2.2684,
"step": 8410
},
{
"epoch": 0.64,
"learning_rate": 3.580849279560876e-06,
"loss": 2.2781,
"step": 8420
},
{
"epoch": 0.64,
"learning_rate": 3.573225585118549e-06,
"loss": 2.2775,
"step": 8430
},
{
"epoch": 0.64,
"learning_rate": 3.565601890676222e-06,
"loss": 2.2754,
"step": 8440
},
{
"epoch": 0.64,
"learning_rate": 3.5579781962338956e-06,
"loss": 2.2756,
"step": 8450
},
{
"epoch": 0.64,
"learning_rate": 3.5503545017915687e-06,
"loss": 2.2834,
"step": 8460
},
{
"epoch": 0.65,
"learning_rate": 3.542730807349242e-06,
"loss": 2.282,
"step": 8470
},
{
"epoch": 0.65,
"learning_rate": 3.5351071129069146e-06,
"loss": 2.2748,
"step": 8480
},
{
"epoch": 0.65,
"learning_rate": 3.5274834184645886e-06,
"loss": 2.2865,
"step": 8490
},
{
"epoch": 0.65,
"learning_rate": 3.5198597240222617e-06,
"loss": 2.2859,
"step": 8500
},
{
"epoch": 0.65,
"learning_rate": 3.5122360295799344e-06,
"loss": 2.2908,
"step": 8510
},
{
"epoch": 0.65,
"learning_rate": 3.5046123351376084e-06,
"loss": 2.2895,
"step": 8520
},
{
"epoch": 0.65,
"learning_rate": 3.4969886406952815e-06,
"loss": 2.2873,
"step": 8530
},
{
"epoch": 0.65,
"learning_rate": 3.4893649462529542e-06,
"loss": 2.2807,
"step": 8540
},
{
"epoch": 0.65,
"learning_rate": 3.4817412518106273e-06,
"loss": 2.2889,
"step": 8550
},
{
"epoch": 0.65,
"learning_rate": 3.4741175573683013e-06,
"loss": 2.2893,
"step": 8560
},
{
"epoch": 0.65,
"learning_rate": 3.466493862925974e-06,
"loss": 2.2877,
"step": 8570
},
{
"epoch": 0.65,
"learning_rate": 3.458870168483647e-06,
"loss": 2.2939,
"step": 8580
},
{
"epoch": 0.65,
"learning_rate": 3.451246474041321e-06,
"loss": 2.2947,
"step": 8590
},
{
"epoch": 0.66,
"learning_rate": 3.443622779598994e-06,
"loss": 2.2932,
"step": 8600
},
{
"epoch": 0.66,
"learning_rate": 3.435999085156667e-06,
"loss": 2.2941,
"step": 8610
},
{
"epoch": 0.66,
"learning_rate": 3.42837539071434e-06,
"loss": 2.291,
"step": 8620
},
{
"epoch": 0.66,
"learning_rate": 3.4207516962720137e-06,
"loss": 2.2855,
"step": 8630
},
{
"epoch": 0.66,
"learning_rate": 3.413128001829687e-06,
"loss": 2.2846,
"step": 8640
},
{
"epoch": 0.66,
"learning_rate": 3.40550430738736e-06,
"loss": 2.283,
"step": 8650
},
{
"epoch": 0.66,
"learning_rate": 3.3978806129450335e-06,
"loss": 2.2908,
"step": 8660
},
{
"epoch": 0.66,
"learning_rate": 3.3902569185027066e-06,
"loss": 2.2951,
"step": 8670
},
{
"epoch": 0.66,
"learning_rate": 3.3826332240603797e-06,
"loss": 2.2889,
"step": 8680
},
{
"epoch": 0.66,
"learning_rate": 3.375009529618053e-06,
"loss": 2.2871,
"step": 8690
},
{
"epoch": 0.66,
"learning_rate": 3.3673858351757264e-06,
"loss": 2.2875,
"step": 8700
},
{
"epoch": 0.66,
"learning_rate": 3.3597621407333996e-06,
"loss": 2.2971,
"step": 8710
},
{
"epoch": 0.66,
"learning_rate": 3.3521384462910727e-06,
"loss": 2.2992,
"step": 8720
},
{
"epoch": 0.67,
"learning_rate": 3.3445147518487462e-06,
"loss": 2.2961,
"step": 8730
},
{
"epoch": 0.67,
"learning_rate": 3.3368910574064194e-06,
"loss": 2.3055,
"step": 8740
},
{
"epoch": 0.67,
"learning_rate": 3.3292673629640925e-06,
"loss": 2.3043,
"step": 8750
},
{
"epoch": 0.67,
"learning_rate": 3.3216436685217656e-06,
"loss": 2.3014,
"step": 8760
},
{
"epoch": 0.67,
"learning_rate": 3.314019974079439e-06,
"loss": 2.3037,
"step": 8770
},
{
"epoch": 0.67,
"learning_rate": 3.3063962796371123e-06,
"loss": 2.2908,
"step": 8780
},
{
"epoch": 0.67,
"learning_rate": 3.2987725851947854e-06,
"loss": 2.3041,
"step": 8790
},
{
"epoch": 0.67,
"learning_rate": 3.291148890752459e-06,
"loss": 2.3021,
"step": 8800
},
{
"epoch": 0.67,
"learning_rate": 3.283525196310132e-06,
"loss": 2.3039,
"step": 8810
},
{
"epoch": 0.67,
"learning_rate": 3.2759015018678053e-06,
"loss": 2.3041,
"step": 8820
},
{
"epoch": 0.67,
"learning_rate": 3.2682778074254784e-06,
"loss": 2.3012,
"step": 8830
},
{
"epoch": 0.67,
"learning_rate": 3.260654112983152e-06,
"loss": 2.2986,
"step": 8840
},
{
"epoch": 0.67,
"learning_rate": 3.253030418540825e-06,
"loss": 2.3004,
"step": 8850
},
{
"epoch": 0.68,
"learning_rate": 3.245406724098498e-06,
"loss": 2.301,
"step": 8860
},
{
"epoch": 0.68,
"learning_rate": 3.2377830296561718e-06,
"loss": 2.3121,
"step": 8870
},
{
"epoch": 0.68,
"learning_rate": 3.230159335213845e-06,
"loss": 2.3102,
"step": 8880
},
{
"epoch": 0.68,
"learning_rate": 3.222535640771518e-06,
"loss": 2.3115,
"step": 8890
},
{
"epoch": 0.68,
"learning_rate": 3.214911946329191e-06,
"loss": 2.31,
"step": 8900
},
{
"epoch": 0.68,
"learning_rate": 3.2072882518868647e-06,
"loss": 2.3121,
"step": 8910
},
{
"epoch": 0.68,
"learning_rate": 3.199664557444538e-06,
"loss": 2.3137,
"step": 8920
},
{
"epoch": 0.68,
"learning_rate": 3.192040863002211e-06,
"loss": 2.309,
"step": 8930
},
{
"epoch": 0.68,
"learning_rate": 3.1844171685598845e-06,
"loss": 2.3104,
"step": 8940
},
{
"epoch": 0.68,
"learning_rate": 3.1767934741175576e-06,
"loss": 2.308,
"step": 8950
},
{
"epoch": 0.68,
"learning_rate": 3.1691697796752308e-06,
"loss": 2.3199,
"step": 8960
},
{
"epoch": 0.68,
"learning_rate": 3.161546085232904e-06,
"loss": 2.3139,
"step": 8970
},
{
"epoch": 0.68,
"learning_rate": 3.1539223907905775e-06,
"loss": 2.3105,
"step": 8980
},
{
"epoch": 0.69,
"learning_rate": 3.1462986963482506e-06,
"loss": 2.3207,
"step": 8990
},
{
"epoch": 0.69,
"learning_rate": 3.1386750019059237e-06,
"loss": 2.3186,
"step": 9000
},
{
"epoch": 0.69,
"learning_rate": 3.1310513074635973e-06,
"loss": 2.3189,
"step": 9010
},
{
"epoch": 0.69,
"learning_rate": 3.1234276130212704e-06,
"loss": 2.3186,
"step": 9020
},
{
"epoch": 0.69,
"learning_rate": 3.1158039185789435e-06,
"loss": 2.3158,
"step": 9030
},
{
"epoch": 0.69,
"learning_rate": 3.108180224136617e-06,
"loss": 2.3297,
"step": 9040
},
{
"epoch": 0.69,
"learning_rate": 3.1005565296942902e-06,
"loss": 2.3268,
"step": 9050
},
{
"epoch": 0.69,
"learning_rate": 3.0929328352519634e-06,
"loss": 2.3172,
"step": 9060
},
{
"epoch": 0.69,
"learning_rate": 3.0853091408096365e-06,
"loss": 2.3182,
"step": 9070
},
{
"epoch": 0.69,
"learning_rate": 3.07768544636731e-06,
"loss": 2.3219,
"step": 9080
},
{
"epoch": 0.69,
"learning_rate": 3.070061751924983e-06,
"loss": 2.3189,
"step": 9090
},
{
"epoch": 0.69,
"learning_rate": 3.0624380574826563e-06,
"loss": 2.3193,
"step": 9100
},
{
"epoch": 0.69,
"learning_rate": 3.05481436304033e-06,
"loss": 2.3289,
"step": 9110
},
{
"epoch": 0.7,
"learning_rate": 3.047190668598003e-06,
"loss": 2.3322,
"step": 9120
},
{
"epoch": 0.7,
"learning_rate": 3.039566974155676e-06,
"loss": 2.3242,
"step": 9130
},
{
"epoch": 0.7,
"learning_rate": 3.0319432797133492e-06,
"loss": 2.3229,
"step": 9140
},
{
"epoch": 0.7,
"learning_rate": 3.024319585271023e-06,
"loss": 2.334,
"step": 9150
},
{
"epoch": 0.7,
"learning_rate": 3.016695890828696e-06,
"loss": 2.3248,
"step": 9160
},
{
"epoch": 0.7,
"learning_rate": 3.009072196386369e-06,
"loss": 2.326,
"step": 9170
},
{
"epoch": 0.7,
"learning_rate": 3.0014485019440426e-06,
"loss": 2.3285,
"step": 9180
},
{
"epoch": 0.7,
"learning_rate": 2.9938248075017157e-06,
"loss": 2.3281,
"step": 9190
},
{
"epoch": 0.7,
"learning_rate": 2.986201113059389e-06,
"loss": 2.3379,
"step": 9200
},
{
"epoch": 0.7,
"learning_rate": 2.978577418617062e-06,
"loss": 2.3277,
"step": 9210
},
{
"epoch": 0.7,
"learning_rate": 2.9709537241747356e-06,
"loss": 2.3326,
"step": 9220
},
{
"epoch": 0.7,
"learning_rate": 2.9633300297324087e-06,
"loss": 2.3279,
"step": 9230
},
{
"epoch": 0.7,
"learning_rate": 2.955706335290082e-06,
"loss": 2.3307,
"step": 9240
},
{
"epoch": 0.71,
"learning_rate": 2.9480826408477554e-06,
"loss": 2.3354,
"step": 9250
},
{
"epoch": 0.71,
"learning_rate": 2.9404589464054285e-06,
"loss": 2.3357,
"step": 9260
},
{
"epoch": 0.71,
"learning_rate": 2.9328352519631016e-06,
"loss": 2.3281,
"step": 9270
},
{
"epoch": 0.71,
"learning_rate": 2.9252115575207748e-06,
"loss": 2.3297,
"step": 9280
},
{
"epoch": 0.71,
"learning_rate": 2.9175878630784483e-06,
"loss": 2.3375,
"step": 9290
},
{
"epoch": 0.71,
"learning_rate": 2.9099641686361214e-06,
"loss": 2.342,
"step": 9300
},
{
"epoch": 0.71,
"learning_rate": 2.9023404741937946e-06,
"loss": 2.3305,
"step": 9310
},
{
"epoch": 0.71,
"learning_rate": 2.894716779751468e-06,
"loss": 2.3375,
"step": 9320
},
{
"epoch": 0.71,
"learning_rate": 2.8870930853091413e-06,
"loss": 2.3422,
"step": 9330
},
{
"epoch": 0.71,
"learning_rate": 2.8794693908668144e-06,
"loss": 2.3529,
"step": 9340
},
{
"epoch": 0.71,
"learning_rate": 2.871845696424487e-06,
"loss": 2.3475,
"step": 9350
},
{
"epoch": 0.71,
"learning_rate": 2.864222001982161e-06,
"loss": 2.3412,
"step": 9360
},
{
"epoch": 0.71,
"learning_rate": 2.856598307539834e-06,
"loss": 2.3396,
"step": 9370
},
{
"epoch": 0.72,
"learning_rate": 2.848974613097507e-06,
"loss": 2.3504,
"step": 9380
},
{
"epoch": 0.72,
"learning_rate": 2.841350918655181e-06,
"loss": 2.3422,
"step": 9390
},
{
"epoch": 0.72,
"learning_rate": 2.833727224212854e-06,
"loss": 2.3461,
"step": 9400
},
{
"epoch": 0.72,
"learning_rate": 2.8261035297705267e-06,
"loss": 2.3482,
"step": 9410
},
{
"epoch": 0.72,
"learning_rate": 2.8184798353282e-06,
"loss": 2.3441,
"step": 9420
},
{
"epoch": 0.72,
"learning_rate": 2.810856140885874e-06,
"loss": 2.3504,
"step": 9430
},
{
"epoch": 0.72,
"learning_rate": 2.8032324464435465e-06,
"loss": 2.3469,
"step": 9440
},
{
"epoch": 0.72,
"learning_rate": 2.7956087520012197e-06,
"loss": 2.349,
"step": 9450
},
{
"epoch": 0.72,
"learning_rate": 2.7879850575588937e-06,
"loss": 2.3463,
"step": 9460
},
{
"epoch": 0.72,
"learning_rate": 2.7803613631165664e-06,
"loss": 2.3475,
"step": 9470
},
{
"epoch": 0.72,
"learning_rate": 2.7727376686742395e-06,
"loss": 2.3492,
"step": 9480
},
{
"epoch": 0.72,
"learning_rate": 2.7651139742319126e-06,
"loss": 2.3531,
"step": 9490
},
{
"epoch": 0.72,
"learning_rate": 2.757490279789586e-06,
"loss": 2.343,
"step": 9500
},
{
"epoch": 0.73,
"learning_rate": 2.7498665853472593e-06,
"loss": 2.3504,
"step": 9510
},
{
"epoch": 0.73,
"learning_rate": 2.7422428909049324e-06,
"loss": 2.3529,
"step": 9520
},
{
"epoch": 0.73,
"learning_rate": 2.7346191964626064e-06,
"loss": 2.3527,
"step": 9530
},
{
"epoch": 0.73,
"learning_rate": 2.726995502020279e-06,
"loss": 2.3572,
"step": 9540
},
{
"epoch": 0.73,
"learning_rate": 2.7193718075779523e-06,
"loss": 2.3451,
"step": 9550
},
{
"epoch": 0.73,
"learning_rate": 2.7117481131356254e-06,
"loss": 2.3551,
"step": 9560
},
{
"epoch": 0.73,
"learning_rate": 2.704124418693299e-06,
"loss": 2.359,
"step": 9570
},
{
"epoch": 0.73,
"learning_rate": 2.696500724250972e-06,
"loss": 2.352,
"step": 9580
},
{
"epoch": 0.73,
"learning_rate": 2.688877029808645e-06,
"loss": 2.3549,
"step": 9590
},
{
"epoch": 0.73,
"learning_rate": 2.6812533353663188e-06,
"loss": 2.3525,
"step": 9600
},
{
"epoch": 0.73,
"learning_rate": 2.673629640923992e-06,
"loss": 2.3625,
"step": 9610
},
{
"epoch": 0.73,
"learning_rate": 2.666005946481665e-06,
"loss": 2.3549,
"step": 9620
},
{
"epoch": 0.73,
"learning_rate": 2.6583822520393386e-06,
"loss": 2.365,
"step": 9630
},
{
"epoch": 0.73,
"learning_rate": 2.6507585575970117e-06,
"loss": 2.3639,
"step": 9640
},
{
"epoch": 0.74,
"learning_rate": 2.643134863154685e-06,
"loss": 2.3607,
"step": 9650
},
{
"epoch": 0.74,
"learning_rate": 2.635511168712358e-06,
"loss": 2.3613,
"step": 9660
},
{
"epoch": 0.74,
"learning_rate": 2.6278874742700315e-06,
"loss": 2.3684,
"step": 9670
},
{
"epoch": 0.74,
"learning_rate": 2.6202637798277046e-06,
"loss": 2.3645,
"step": 9680
},
{
"epoch": 0.74,
"learning_rate": 2.6126400853853778e-06,
"loss": 2.3689,
"step": 9690
},
{
"epoch": 0.74,
"learning_rate": 2.6050163909430513e-06,
"loss": 2.3691,
"step": 9700
},
{
"epoch": 0.74,
"learning_rate": 2.5973926965007245e-06,
"loss": 2.3695,
"step": 9710
},
{
"epoch": 0.74,
"learning_rate": 2.5897690020583976e-06,
"loss": 2.3758,
"step": 9720
},
{
"epoch": 0.74,
"learning_rate": 2.5821453076160707e-06,
"loss": 2.3701,
"step": 9730
},
{
"epoch": 0.74,
"learning_rate": 2.5745216131737443e-06,
"loss": 2.3643,
"step": 9740
},
{
"epoch": 0.74,
"learning_rate": 2.5668979187314174e-06,
"loss": 2.3766,
"step": 9750
},
{
"epoch": 0.74,
"learning_rate": 2.5592742242890905e-06,
"loss": 2.374,
"step": 9760
},
{
"epoch": 0.74,
"learning_rate": 2.551650529846764e-06,
"loss": 2.3727,
"step": 9770
},
{
"epoch": 0.75,
"learning_rate": 2.5440268354044372e-06,
"loss": 2.3783,
"step": 9780
},
{
"epoch": 0.75,
"learning_rate": 2.5364031409621103e-06,
"loss": 2.3645,
"step": 9790
},
{
"epoch": 0.75,
"learning_rate": 2.5287794465197835e-06,
"loss": 2.3775,
"step": 9800
},
{
"epoch": 0.75,
"learning_rate": 2.521155752077457e-06,
"loss": 2.3707,
"step": 9810
},
{
"epoch": 0.75,
"learning_rate": 2.51353205763513e-06,
"loss": 2.3828,
"step": 9820
},
{
"epoch": 0.75,
"learning_rate": 2.5059083631928033e-06,
"loss": 2.3697,
"step": 9830
},
{
"epoch": 0.75,
"learning_rate": 2.498284668750477e-06,
"loss": 2.3814,
"step": 9840
},
{
"epoch": 0.75,
"learning_rate": 2.49066097430815e-06,
"loss": 2.3709,
"step": 9850
},
{
"epoch": 0.75,
"learning_rate": 2.483037279865823e-06,
"loss": 2.3766,
"step": 9860
},
{
"epoch": 0.75,
"learning_rate": 2.4754135854234967e-06,
"loss": 2.3768,
"step": 9870
},
{
"epoch": 0.75,
"learning_rate": 2.4677898909811694e-06,
"loss": 2.3793,
"step": 9880
},
{
"epoch": 0.75,
"learning_rate": 2.460166196538843e-06,
"loss": 2.3777,
"step": 9890
},
{
"epoch": 0.75,
"learning_rate": 2.4525425020965165e-06,
"loss": 2.3809,
"step": 9900
},
{
"epoch": 0.76,
"learning_rate": 2.444918807654189e-06,
"loss": 2.3795,
"step": 9910
},
{
"epoch": 0.76,
"learning_rate": 2.4372951132118627e-06,
"loss": 2.3756,
"step": 9920
},
{
"epoch": 0.76,
"learning_rate": 2.429671418769536e-06,
"loss": 2.3805,
"step": 9930
},
{
"epoch": 0.76,
"learning_rate": 2.422047724327209e-06,
"loss": 2.3861,
"step": 9940
},
{
"epoch": 0.76,
"learning_rate": 2.414424029884882e-06,
"loss": 2.3816,
"step": 9950
},
{
"epoch": 0.76,
"learning_rate": 2.4068003354425557e-06,
"loss": 2.3879,
"step": 9960
},
{
"epoch": 0.76,
"learning_rate": 2.399176641000229e-06,
"loss": 2.3818,
"step": 9970
},
{
"epoch": 0.76,
"learning_rate": 2.391552946557902e-06,
"loss": 2.3775,
"step": 9980
},
{
"epoch": 0.76,
"learning_rate": 2.3839292521155755e-06,
"loss": 2.3809,
"step": 9990
},
{
"epoch": 0.76,
"learning_rate": 2.3763055576732486e-06,
"loss": 2.3775,
"step": 10000
},
{
"epoch": 0.76,
"learning_rate": 2.3686818632309218e-06,
"loss": 2.3885,
"step": 10010
},
{
"epoch": 0.76,
"learning_rate": 2.3610581687885953e-06,
"loss": 2.3879,
"step": 10020
},
{
"epoch": 0.76,
"learning_rate": 2.3534344743462684e-06,
"loss": 2.385,
"step": 10030
},
{
"epoch": 0.77,
"learning_rate": 2.3458107799039416e-06,
"loss": 2.3793,
"step": 10040
},
{
"epoch": 0.77,
"learning_rate": 2.3381870854616147e-06,
"loss": 2.3898,
"step": 10050
},
{
"epoch": 0.77,
"learning_rate": 2.3305633910192883e-06,
"loss": 2.382,
"step": 10060
},
{
"epoch": 0.77,
"learning_rate": 2.3229396965769614e-06,
"loss": 2.39,
"step": 10070
},
{
"epoch": 0.77,
"learning_rate": 2.3153160021346345e-06,
"loss": 2.3848,
"step": 10080
},
{
"epoch": 0.77,
"learning_rate": 2.307692307692308e-06,
"loss": 2.3936,
"step": 10090
},
{
"epoch": 0.77,
"learning_rate": 2.300068613249981e-06,
"loss": 2.3937,
"step": 10100
},
{
"epoch": 0.77,
"learning_rate": 2.2924449188076543e-06,
"loss": 2.3939,
"step": 10110
},
{
"epoch": 0.77,
"learning_rate": 2.2848212243653275e-06,
"loss": 2.3889,
"step": 10120
},
{
"epoch": 0.77,
"learning_rate": 2.277197529923001e-06,
"loss": 2.382,
"step": 10130
},
{
"epoch": 0.77,
"learning_rate": 2.269573835480674e-06,
"loss": 2.3936,
"step": 10140
},
{
"epoch": 0.77,
"learning_rate": 2.2619501410383473e-06,
"loss": 2.3896,
"step": 10150
},
{
"epoch": 0.77,
"learning_rate": 2.254326446596021e-06,
"loss": 2.3908,
"step": 10160
},
{
"epoch": 0.78,
"learning_rate": 2.246702752153694e-06,
"loss": 2.3871,
"step": 10170
},
{
"epoch": 0.78,
"learning_rate": 2.239079057711367e-06,
"loss": 2.3945,
"step": 10180
},
{
"epoch": 0.78,
"learning_rate": 2.2314553632690402e-06,
"loss": 2.4012,
"step": 10190
},
{
"epoch": 0.78,
"learning_rate": 2.2238316688267138e-06,
"loss": 2.3996,
"step": 10200
},
{
"epoch": 0.78,
"learning_rate": 2.216207974384387e-06,
"loss": 2.3977,
"step": 10210
},
{
"epoch": 0.78,
"learning_rate": 2.20858427994206e-06,
"loss": 2.4,
"step": 10220
},
{
"epoch": 0.78,
"learning_rate": 2.2009605854997336e-06,
"loss": 2.3959,
"step": 10230
},
{
"epoch": 0.78,
"learning_rate": 2.1933368910574067e-06,
"loss": 2.3996,
"step": 10240
},
{
"epoch": 0.78,
"learning_rate": 2.18571319661508e-06,
"loss": 2.4045,
"step": 10250
},
{
"epoch": 0.78,
"learning_rate": 2.178089502172753e-06,
"loss": 2.3979,
"step": 10260
},
{
"epoch": 0.78,
"learning_rate": 2.1704658077304265e-06,
"loss": 2.4029,
"step": 10270
},
{
"epoch": 0.78,
"learning_rate": 2.1628421132880993e-06,
"loss": 2.4057,
"step": 10280
},
{
"epoch": 0.78,
"learning_rate": 2.155218418845773e-06,
"loss": 2.4002,
"step": 10290
},
{
"epoch": 0.79,
"learning_rate": 2.1475947244034464e-06,
"loss": 2.3965,
"step": 10300
},
{
"epoch": 0.79,
"learning_rate": 2.139971029961119e-06,
"loss": 2.4021,
"step": 10310
},
{
"epoch": 0.79,
"learning_rate": 2.1323473355187926e-06,
"loss": 2.4031,
"step": 10320
}
],
"max_steps": 13117,
"num_train_epochs": 1,
"total_flos": 6400995950592000.0,
"trial_name": null,
"trial_params": null
}