aimonbc24's picture
Upload folder using huggingface_hub
5b79c30 verified
raw
history blame
25.7 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9998511240136966,
"eval_steps": 500,
"global_step": 6716,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.722084367245657e-08,
"loss": 0.2836,
"step": 32
},
{
"epoch": 0.01,
"learning_rate": 7.692307692307692e-08,
"loss": 0.2111,
"step": 64
},
{
"epoch": 0.01,
"learning_rate": 1.1662531017369727e-07,
"loss": 0.2145,
"step": 96
},
{
"epoch": 0.02,
"learning_rate": 1.563275434243176e-07,
"loss": 0.2375,
"step": 128
},
{
"epoch": 0.02,
"learning_rate": 1.9602977667493795e-07,
"loss": 0.1839,
"step": 160
},
{
"epoch": 0.03,
"learning_rate": 2.3573200992555832e-07,
"loss": 0.2864,
"step": 192
},
{
"epoch": 0.03,
"learning_rate": 2.7543424317617863e-07,
"loss": 0.1799,
"step": 224
},
{
"epoch": 0.04,
"learning_rate": 3.1513647642679897e-07,
"loss": 0.1609,
"step": 256
},
{
"epoch": 0.04,
"learning_rate": 3.5483870967741936e-07,
"loss": 0.1434,
"step": 288
},
{
"epoch": 0.05,
"learning_rate": 3.945409429280397e-07,
"loss": 0.1699,
"step": 320
},
{
"epoch": 0.05,
"learning_rate": 4.3424317617866004e-07,
"loss": 0.1898,
"step": 352
},
{
"epoch": 0.06,
"learning_rate": 4.739454094292804e-07,
"loss": 0.1665,
"step": 384
},
{
"epoch": 0.06,
"learning_rate": 5.136476426799007e-07,
"loss": 0.1358,
"step": 416
},
{
"epoch": 0.07,
"learning_rate": 5.533498759305211e-07,
"loss": 0.1713,
"step": 448
},
{
"epoch": 0.07,
"learning_rate": 5.930521091811415e-07,
"loss": 0.1817,
"step": 480
},
{
"epoch": 0.08,
"learning_rate": 6.327543424317618e-07,
"loss": 0.149,
"step": 512
},
{
"epoch": 0.08,
"learning_rate": 6.724565756823821e-07,
"loss": 0.165,
"step": 544
},
{
"epoch": 0.09,
"learning_rate": 7.121588089330024e-07,
"loss": 0.2282,
"step": 576
},
{
"epoch": 0.09,
"learning_rate": 7.518610421836227e-07,
"loss": 0.1666,
"step": 608
},
{
"epoch": 0.1,
"learning_rate": 7.915632754342431e-07,
"loss": 0.1229,
"step": 640
},
{
"epoch": 0.1,
"learning_rate": 8.312655086848634e-07,
"loss": 0.2118,
"step": 672
},
{
"epoch": 0.1,
"learning_rate": 8.709677419354838e-07,
"loss": 0.1869,
"step": 704
},
{
"epoch": 0.11,
"learning_rate": 9.106699751861042e-07,
"loss": 0.2196,
"step": 736
},
{
"epoch": 0.11,
"learning_rate": 9.503722084367245e-07,
"loss": 0.0936,
"step": 768
},
{
"epoch": 0.12,
"learning_rate": 9.90074441687345e-07,
"loss": 0.1839,
"step": 800
},
{
"epoch": 0.12,
"learning_rate": 9.980994615140957e-07,
"loss": 0.1675,
"step": 832
},
{
"epoch": 0.13,
"learning_rate": 9.955654101995564e-07,
"loss": 0.1595,
"step": 864
},
{
"epoch": 0.13,
"learning_rate": 9.930313588850174e-07,
"loss": 0.1556,
"step": 896
},
{
"epoch": 0.14,
"learning_rate": 9.905764966740576e-07,
"loss": 0.1711,
"step": 928
},
{
"epoch": 0.14,
"learning_rate": 9.880424453595185e-07,
"loss": 0.1777,
"step": 960
},
{
"epoch": 0.15,
"learning_rate": 9.855083940449792e-07,
"loss": 0.2031,
"step": 992
},
{
"epoch": 0.15,
"learning_rate": 9.829743427304402e-07,
"loss": 0.1529,
"step": 1024
},
{
"epoch": 0.16,
"learning_rate": 9.804402914159011e-07,
"loss": 0.1365,
"step": 1056
},
{
"epoch": 0.16,
"learning_rate": 9.77906240101362e-07,
"loss": 0.1922,
"step": 1088
},
{
"epoch": 0.17,
"learning_rate": 9.75372188786823e-07,
"loss": 0.133,
"step": 1120
},
{
"epoch": 0.17,
"learning_rate": 9.728381374722837e-07,
"loss": 0.1692,
"step": 1152
},
{
"epoch": 0.18,
"learning_rate": 9.703040861577447e-07,
"loss": 0.1022,
"step": 1184
},
{
"epoch": 0.18,
"learning_rate": 9.677700348432054e-07,
"loss": 0.2052,
"step": 1216
},
{
"epoch": 0.19,
"learning_rate": 9.652359835286664e-07,
"loss": 0.1546,
"step": 1248
},
{
"epoch": 0.19,
"learning_rate": 9.627019322141273e-07,
"loss": 0.149,
"step": 1280
},
{
"epoch": 0.2,
"learning_rate": 9.601678808995883e-07,
"loss": 0.1281,
"step": 1312
},
{
"epoch": 0.2,
"learning_rate": 9.57633829585049e-07,
"loss": 0.1437,
"step": 1344
},
{
"epoch": 0.2,
"learning_rate": 9.5509977827051e-07,
"loss": 0.2097,
"step": 1376
},
{
"epoch": 0.21,
"learning_rate": 9.525657269559708e-07,
"loss": 0.1308,
"step": 1408
},
{
"epoch": 0.21,
"learning_rate": 9.500316756414317e-07,
"loss": 0.1691,
"step": 1440
},
{
"epoch": 0.22,
"learning_rate": 9.474976243268927e-07,
"loss": 0.2319,
"step": 1472
},
{
"epoch": 0.22,
"learning_rate": 9.449635730123534e-07,
"loss": 0.2226,
"step": 1504
},
{
"epoch": 0.23,
"learning_rate": 9.424295216978143e-07,
"loss": 0.1789,
"step": 1536
},
{
"epoch": 0.23,
"learning_rate": 9.398954703832752e-07,
"loss": 0.1932,
"step": 1568
},
{
"epoch": 0.24,
"learning_rate": 9.373614190687361e-07,
"loss": 0.1718,
"step": 1600
},
{
"epoch": 0.24,
"learning_rate": 9.34827367754197e-07,
"loss": 0.156,
"step": 1632
},
{
"epoch": 0.25,
"learning_rate": 9.322933164396578e-07,
"loss": 0.1512,
"step": 1664
},
{
"epoch": 0.25,
"learning_rate": 9.297592651251187e-07,
"loss": 0.0968,
"step": 1696
},
{
"epoch": 0.26,
"learning_rate": 9.272252138105796e-07,
"loss": 0.0932,
"step": 1728
},
{
"epoch": 0.26,
"learning_rate": 9.246911624960405e-07,
"loss": 0.2464,
"step": 1760
},
{
"epoch": 0.27,
"learning_rate": 9.221571111815014e-07,
"loss": 0.2036,
"step": 1792
},
{
"epoch": 0.27,
"learning_rate": 9.196230598669623e-07,
"loss": 0.1245,
"step": 1824
},
{
"epoch": 0.28,
"learning_rate": 9.170890085524232e-07,
"loss": 0.1097,
"step": 1856
},
{
"epoch": 0.28,
"learning_rate": 9.14554957237884e-07,
"loss": 0.1844,
"step": 1888
},
{
"epoch": 0.29,
"learning_rate": 9.120209059233449e-07,
"loss": 0.1114,
"step": 1920
},
{
"epoch": 0.29,
"learning_rate": 9.094868546088058e-07,
"loss": 0.1992,
"step": 1952
},
{
"epoch": 0.3,
"learning_rate": 9.069528032942667e-07,
"loss": 0.1721,
"step": 1984
},
{
"epoch": 0.3,
"learning_rate": 9.044187519797275e-07,
"loss": 0.1473,
"step": 2016
},
{
"epoch": 0.3,
"learning_rate": 9.018847006651884e-07,
"loss": 0.1865,
"step": 2048
},
{
"epoch": 0.31,
"learning_rate": 8.993506493506493e-07,
"loss": 0.1583,
"step": 2080
},
{
"epoch": 0.31,
"learning_rate": 8.968165980361102e-07,
"loss": 0.1866,
"step": 2112
},
{
"epoch": 0.32,
"learning_rate": 8.942825467215711e-07,
"loss": 0.1617,
"step": 2144
},
{
"epoch": 0.32,
"learning_rate": 8.917484954070319e-07,
"loss": 0.1189,
"step": 2176
},
{
"epoch": 0.33,
"learning_rate": 8.892144440924928e-07,
"loss": 0.148,
"step": 2208
},
{
"epoch": 0.33,
"learning_rate": 8.866803927779537e-07,
"loss": 0.131,
"step": 2240
},
{
"epoch": 0.34,
"learning_rate": 8.841463414634146e-07,
"loss": 0.2261,
"step": 2272
},
{
"epoch": 0.34,
"learning_rate": 8.816122901488755e-07,
"loss": 0.1742,
"step": 2304
},
{
"epoch": 0.35,
"learning_rate": 8.790782388343364e-07,
"loss": 0.164,
"step": 2336
},
{
"epoch": 0.35,
"learning_rate": 8.765441875197972e-07,
"loss": 0.1161,
"step": 2368
},
{
"epoch": 0.36,
"learning_rate": 8.74010136205258e-07,
"loss": 0.1636,
"step": 2400
},
{
"epoch": 0.36,
"learning_rate": 8.71476084890719e-07,
"loss": 0.2416,
"step": 2432
},
{
"epoch": 0.37,
"learning_rate": 8.689420335761799e-07,
"loss": 0.1632,
"step": 2464
},
{
"epoch": 0.37,
"learning_rate": 8.664079822616408e-07,
"loss": 0.1477,
"step": 2496
},
{
"epoch": 0.38,
"learning_rate": 8.638739309471016e-07,
"loss": 0.2083,
"step": 2528
},
{
"epoch": 0.38,
"learning_rate": 8.613398796325625e-07,
"loss": 0.1599,
"step": 2560
},
{
"epoch": 0.39,
"learning_rate": 8.588058283180234e-07,
"loss": 0.1817,
"step": 2592
},
{
"epoch": 0.39,
"learning_rate": 8.562717770034843e-07,
"loss": 0.1005,
"step": 2624
},
{
"epoch": 0.4,
"learning_rate": 8.537377256889452e-07,
"loss": 0.168,
"step": 2656
},
{
"epoch": 0.4,
"learning_rate": 8.51203674374406e-07,
"loss": 0.2418,
"step": 2688
},
{
"epoch": 0.4,
"learning_rate": 8.486696230598669e-07,
"loss": 0.1881,
"step": 2720
},
{
"epoch": 0.41,
"learning_rate": 8.461355717453278e-07,
"loss": 0.1829,
"step": 2752
},
{
"epoch": 0.41,
"learning_rate": 8.436015204307887e-07,
"loss": 0.1073,
"step": 2784
},
{
"epoch": 0.42,
"learning_rate": 8.410674691162496e-07,
"loss": 0.1324,
"step": 2816
},
{
"epoch": 0.42,
"learning_rate": 8.385334178017105e-07,
"loss": 0.2077,
"step": 2848
},
{
"epoch": 0.43,
"learning_rate": 8.359993664871713e-07,
"loss": 0.2248,
"step": 2880
},
{
"epoch": 0.43,
"learning_rate": 8.334653151726322e-07,
"loss": 0.1337,
"step": 2912
},
{
"epoch": 0.44,
"learning_rate": 8.30931263858093e-07,
"loss": 0.1906,
"step": 2944
},
{
"epoch": 0.44,
"learning_rate": 8.28397212543554e-07,
"loss": 0.1893,
"step": 2976
},
{
"epoch": 0.45,
"learning_rate": 8.259423503325942e-07,
"loss": 0.2029,
"step": 3008
},
{
"epoch": 0.45,
"learning_rate": 8.234082990180551e-07,
"loss": 0.157,
"step": 3040
},
{
"epoch": 0.46,
"learning_rate": 8.208742477035159e-07,
"loss": 0.1433,
"step": 3072
},
{
"epoch": 0.46,
"learning_rate": 8.183401963889769e-07,
"loss": 0.1689,
"step": 3104
},
{
"epoch": 0.47,
"learning_rate": 8.158061450744377e-07,
"loss": 0.2012,
"step": 3136
},
{
"epoch": 0.47,
"learning_rate": 8.132720937598986e-07,
"loss": 0.175,
"step": 3168
},
{
"epoch": 0.48,
"learning_rate": 8.107380424453595e-07,
"loss": 0.1961,
"step": 3200
},
{
"epoch": 0.48,
"learning_rate": 8.082039911308203e-07,
"loss": 0.2547,
"step": 3232
},
{
"epoch": 0.49,
"learning_rate": 8.056699398162813e-07,
"loss": 0.1935,
"step": 3264
},
{
"epoch": 0.49,
"learning_rate": 8.031358885017421e-07,
"loss": 0.2149,
"step": 3296
},
{
"epoch": 0.5,
"learning_rate": 8.00601837187203e-07,
"loss": 0.1809,
"step": 3328
},
{
"epoch": 0.5,
"learning_rate": 7.980677858726639e-07,
"loss": 0.2072,
"step": 3360
},
{
"epoch": 0.5,
"learning_rate": 7.955337345581247e-07,
"loss": 0.2116,
"step": 3392
},
{
"epoch": 0.51,
"learning_rate": 7.929996832435857e-07,
"loss": 0.1737,
"step": 3424
},
{
"epoch": 0.51,
"learning_rate": 7.904656319290464e-07,
"loss": 0.2219,
"step": 3456
},
{
"epoch": 0.52,
"learning_rate": 7.879315806145074e-07,
"loss": 0.1849,
"step": 3488
},
{
"epoch": 0.52,
"learning_rate": 7.853975292999683e-07,
"loss": 0.1884,
"step": 3520
},
{
"epoch": 0.53,
"learning_rate": 7.828634779854292e-07,
"loss": 0.2192,
"step": 3552
},
{
"epoch": 0.53,
"learning_rate": 7.803294266708901e-07,
"loss": 0.1958,
"step": 3584
},
{
"epoch": 0.54,
"learning_rate": 7.777953753563509e-07,
"loss": 0.1433,
"step": 3616
},
{
"epoch": 0.54,
"learning_rate": 7.752613240418118e-07,
"loss": 0.2151,
"step": 3648
},
{
"epoch": 0.55,
"learning_rate": 7.727272727272727e-07,
"loss": 0.1675,
"step": 3680
},
{
"epoch": 0.55,
"learning_rate": 7.701932214127336e-07,
"loss": 0.1586,
"step": 3712
},
{
"epoch": 0.56,
"learning_rate": 7.676591700981945e-07,
"loss": 0.2881,
"step": 3744
},
{
"epoch": 0.56,
"learning_rate": 7.651251187836553e-07,
"loss": 0.196,
"step": 3776
},
{
"epoch": 0.57,
"learning_rate": 7.625910674691162e-07,
"loss": 0.1285,
"step": 3808
},
{
"epoch": 0.57,
"learning_rate": 7.60057016154577e-07,
"loss": 0.2262,
"step": 3840
},
{
"epoch": 0.58,
"learning_rate": 7.57522964840038e-07,
"loss": 0.2309,
"step": 3872
},
{
"epoch": 0.58,
"learning_rate": 7.549889135254989e-07,
"loss": 0.1533,
"step": 3904
},
{
"epoch": 0.59,
"learning_rate": 7.524548622109597e-07,
"loss": 0.1297,
"step": 3936
},
{
"epoch": 0.59,
"learning_rate": 7.499208108964206e-07,
"loss": 0.1808,
"step": 3968
},
{
"epoch": 0.6,
"learning_rate": 7.473867595818814e-07,
"loss": 0.2401,
"step": 4000
},
{
"epoch": 0.6,
"learning_rate": 7.448527082673424e-07,
"loss": 0.2507,
"step": 4032
},
{
"epoch": 0.61,
"learning_rate": 7.423186569528033e-07,
"loss": 0.1562,
"step": 4064
},
{
"epoch": 0.61,
"learning_rate": 7.397846056382642e-07,
"loss": 0.1912,
"step": 4096
},
{
"epoch": 0.61,
"learning_rate": 7.373297434273043e-07,
"loss": 0.1703,
"step": 4128
},
{
"epoch": 0.62,
"learning_rate": 7.347956921127653e-07,
"loss": 0.1471,
"step": 4160
},
{
"epoch": 0.62,
"learning_rate": 7.322616407982262e-07,
"loss": 0.1539,
"step": 4192
},
{
"epoch": 0.63,
"learning_rate": 7.297275894836869e-07,
"loss": 0.1521,
"step": 4224
},
{
"epoch": 0.63,
"learning_rate": 7.271935381691479e-07,
"loss": 0.2623,
"step": 4256
},
{
"epoch": 0.64,
"learning_rate": 7.246594868546087e-07,
"loss": 0.1753,
"step": 4288
},
{
"epoch": 0.64,
"learning_rate": 7.221254355400697e-07,
"loss": 0.1945,
"step": 4320
},
{
"epoch": 0.65,
"learning_rate": 7.195913842255306e-07,
"loss": 0.2153,
"step": 4352
},
{
"epoch": 0.65,
"learning_rate": 7.170573329109915e-07,
"loss": 0.2841,
"step": 4384
},
{
"epoch": 0.66,
"learning_rate": 7.145232815964523e-07,
"loss": 0.1759,
"step": 4416
},
{
"epoch": 0.66,
"learning_rate": 7.119892302819131e-07,
"loss": 0.2214,
"step": 4448
},
{
"epoch": 0.67,
"learning_rate": 7.094551789673741e-07,
"loss": 0.188,
"step": 4480
},
{
"epoch": 0.67,
"learning_rate": 7.069211276528349e-07,
"loss": 0.1579,
"step": 4512
},
{
"epoch": 0.68,
"learning_rate": 7.043870763382959e-07,
"loss": 0.2213,
"step": 4544
},
{
"epoch": 0.68,
"learning_rate": 7.018530250237567e-07,
"loss": 0.2042,
"step": 4576
},
{
"epoch": 0.69,
"learning_rate": 6.993189737092175e-07,
"loss": 0.1852,
"step": 4608
},
{
"epoch": 0.69,
"learning_rate": 6.967849223946785e-07,
"loss": 0.1716,
"step": 4640
},
{
"epoch": 0.7,
"learning_rate": 6.942508710801393e-07,
"loss": 0.1645,
"step": 4672
},
{
"epoch": 0.7,
"learning_rate": 6.917168197656003e-07,
"loss": 0.1986,
"step": 4704
},
{
"epoch": 0.71,
"learning_rate": 6.89182768451061e-07,
"loss": 0.2531,
"step": 4736
},
{
"epoch": 0.71,
"learning_rate": 6.866487171365219e-07,
"loss": 0.1792,
"step": 4768
},
{
"epoch": 0.71,
"learning_rate": 6.841146658219829e-07,
"loss": 0.1843,
"step": 4800
},
{
"epoch": 0.72,
"learning_rate": 6.815806145074437e-07,
"loss": 0.2175,
"step": 4832
},
{
"epoch": 0.72,
"learning_rate": 6.790465631929047e-07,
"loss": 0.2083,
"step": 4864
},
{
"epoch": 0.73,
"learning_rate": 6.765125118783655e-07,
"loss": 0.1729,
"step": 4896
},
{
"epoch": 0.73,
"learning_rate": 6.739784605638263e-07,
"loss": 0.1849,
"step": 4928
},
{
"epoch": 0.74,
"learning_rate": 6.714444092492873e-07,
"loss": 0.2374,
"step": 4960
},
{
"epoch": 0.74,
"learning_rate": 6.689103579347481e-07,
"loss": 0.241,
"step": 4992
},
{
"epoch": 0.75,
"learning_rate": 6.663763066202091e-07,
"loss": 0.1853,
"step": 5024
},
{
"epoch": 0.75,
"learning_rate": 6.638422553056699e-07,
"loss": 0.1957,
"step": 5056
},
{
"epoch": 0.76,
"learning_rate": 6.613082039911308e-07,
"loss": 0.2052,
"step": 5088
},
{
"epoch": 0.76,
"learning_rate": 6.587741526765917e-07,
"loss": 0.2321,
"step": 5120
},
{
"epoch": 0.77,
"learning_rate": 6.562401013620525e-07,
"loss": 0.1804,
"step": 5152
},
{
"epoch": 0.77,
"learning_rate": 6.537060500475135e-07,
"loss": 0.1842,
"step": 5184
},
{
"epoch": 0.78,
"learning_rate": 6.511719987329743e-07,
"loss": 0.2388,
"step": 5216
},
{
"epoch": 0.78,
"learning_rate": 6.486379474184352e-07,
"loss": 0.2417,
"step": 5248
},
{
"epoch": 0.79,
"learning_rate": 6.46103896103896e-07,
"loss": 0.2224,
"step": 5280
},
{
"epoch": 0.79,
"learning_rate": 6.435698447893569e-07,
"loss": 0.2029,
"step": 5312
},
{
"epoch": 0.8,
"learning_rate": 6.410357934748179e-07,
"loss": 0.2807,
"step": 5344
},
{
"epoch": 0.8,
"learning_rate": 6.385017421602787e-07,
"loss": 0.192,
"step": 5376
},
{
"epoch": 0.81,
"learning_rate": 6.359676908457397e-07,
"loss": 0.1848,
"step": 5408
},
{
"epoch": 0.81,
"learning_rate": 6.334336395312004e-07,
"loss": 0.2143,
"step": 5440
},
{
"epoch": 0.81,
"learning_rate": 6.308995882166613e-07,
"loss": 0.2421,
"step": 5472
},
{
"epoch": 0.82,
"learning_rate": 6.283655369021223e-07,
"loss": 0.1724,
"step": 5504
},
{
"epoch": 0.82,
"learning_rate": 6.258314855875831e-07,
"loss": 0.1207,
"step": 5536
},
{
"epoch": 0.83,
"learning_rate": 6.232974342730441e-07,
"loss": 0.2259,
"step": 5568
},
{
"epoch": 0.83,
"learning_rate": 6.207633829585048e-07,
"loss": 0.2504,
"step": 5600
},
{
"epoch": 0.84,
"learning_rate": 6.182293316439658e-07,
"loss": 0.188,
"step": 5632
},
{
"epoch": 0.84,
"learning_rate": 6.156952803294266e-07,
"loss": 0.1893,
"step": 5664
},
{
"epoch": 0.85,
"learning_rate": 6.131612290148875e-07,
"loss": 0.1905,
"step": 5696
},
{
"epoch": 0.85,
"learning_rate": 6.106271777003485e-07,
"loss": 0.2594,
"step": 5728
},
{
"epoch": 0.86,
"learning_rate": 6.080931263858092e-07,
"loss": 0.3084,
"step": 5760
},
{
"epoch": 0.86,
"learning_rate": 6.055590750712702e-07,
"loss": 0.1925,
"step": 5792
},
{
"epoch": 0.87,
"learning_rate": 6.03025023756731e-07,
"loss": 0.186,
"step": 5824
},
{
"epoch": 0.87,
"learning_rate": 6.004909724421919e-07,
"loss": 0.2302,
"step": 5856
},
{
"epoch": 0.88,
"learning_rate": 5.979569211276529e-07,
"loss": 0.1371,
"step": 5888
},
{
"epoch": 0.88,
"learning_rate": 5.954228698131137e-07,
"loss": 0.231,
"step": 5920
},
{
"epoch": 0.89,
"learning_rate": 5.928888184985746e-07,
"loss": 0.2012,
"step": 5952
},
{
"epoch": 0.89,
"learning_rate": 5.903547671840354e-07,
"loss": 0.2006,
"step": 5984
},
{
"epoch": 0.9,
"learning_rate": 5.878207158694963e-07,
"loss": 0.215,
"step": 6016
},
{
"epoch": 0.9,
"learning_rate": 5.852866645549572e-07,
"loss": 0.1471,
"step": 6048
},
{
"epoch": 0.91,
"learning_rate": 5.827526132404181e-07,
"loss": 0.2364,
"step": 6080
},
{
"epoch": 0.91,
"learning_rate": 5.80218561925879e-07,
"loss": 0.2881,
"step": 6112
},
{
"epoch": 0.91,
"learning_rate": 5.776845106113398e-07,
"loss": 0.1536,
"step": 6144
},
{
"epoch": 0.92,
"learning_rate": 5.751504592968008e-07,
"loss": 0.2317,
"step": 6176
},
{
"epoch": 0.92,
"learning_rate": 5.726164079822616e-07,
"loss": 0.1952,
"step": 6208
},
{
"epoch": 0.93,
"learning_rate": 5.700823566677225e-07,
"loss": 0.1602,
"step": 6240
},
{
"epoch": 0.93,
"learning_rate": 5.675483053531834e-07,
"loss": 0.212,
"step": 6272
},
{
"epoch": 0.94,
"learning_rate": 5.650142540386442e-07,
"loss": 0.2401,
"step": 6304
},
{
"epoch": 0.94,
"learning_rate": 5.624802027241052e-07,
"loss": 0.1992,
"step": 6336
},
{
"epoch": 0.95,
"learning_rate": 5.59946151409566e-07,
"loss": 0.2616,
"step": 6368
},
{
"epoch": 0.95,
"learning_rate": 5.574121000950269e-07,
"loss": 0.146,
"step": 6400
},
{
"epoch": 0.96,
"learning_rate": 5.548780487804878e-07,
"loss": 0.2081,
"step": 6432
},
{
"epoch": 0.96,
"learning_rate": 5.523439974659486e-07,
"loss": 0.207,
"step": 6464
},
{
"epoch": 0.97,
"learning_rate": 5.498099461514096e-07,
"loss": 0.2631,
"step": 6496
},
{
"epoch": 0.97,
"learning_rate": 5.472758948368704e-07,
"loss": 0.1721,
"step": 6528
},
{
"epoch": 0.98,
"learning_rate": 5.447418435223313e-07,
"loss": 0.1908,
"step": 6560
},
{
"epoch": 0.98,
"learning_rate": 5.422869813113715e-07,
"loss": 0.2238,
"step": 6592
},
{
"epoch": 0.99,
"learning_rate": 5.397529299968325e-07,
"loss": 0.2524,
"step": 6624
},
{
"epoch": 0.99,
"learning_rate": 5.372188786822933e-07,
"loss": 0.1968,
"step": 6656
},
{
"epoch": 1.0,
"learning_rate": 5.346848273677542e-07,
"loss": 0.2379,
"step": 6688
}
],
"logging_steps": 32,
"max_steps": 13434,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 3358,
"total_flos": 2.85184331513856e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}