{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4997766860205448, "eval_steps": 500, "global_step": 10074, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.722084367245657e-08, "loss": 0.2836, "step": 32 }, { "epoch": 0.01, "learning_rate": 7.692307692307692e-08, "loss": 0.2111, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.1662531017369727e-07, "loss": 0.2145, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.563275434243176e-07, "loss": 0.2375, "step": 128 }, { "epoch": 0.02, "learning_rate": 1.9602977667493795e-07, "loss": 0.1839, "step": 160 }, { "epoch": 0.03, "learning_rate": 2.3573200992555832e-07, "loss": 0.2864, "step": 192 }, { "epoch": 0.03, "learning_rate": 2.7543424317617863e-07, "loss": 0.1799, "step": 224 }, { "epoch": 0.04, "learning_rate": 3.1513647642679897e-07, "loss": 0.1609, "step": 256 }, { "epoch": 0.04, "learning_rate": 3.5483870967741936e-07, "loss": 0.1434, "step": 288 }, { "epoch": 0.05, "learning_rate": 3.945409429280397e-07, "loss": 0.1699, "step": 320 }, { "epoch": 0.05, "learning_rate": 4.3424317617866004e-07, "loss": 0.1898, "step": 352 }, { "epoch": 0.06, "learning_rate": 4.739454094292804e-07, "loss": 0.1665, "step": 384 }, { "epoch": 0.06, "learning_rate": 5.136476426799007e-07, "loss": 0.1358, "step": 416 }, { "epoch": 0.07, "learning_rate": 5.533498759305211e-07, "loss": 0.1713, "step": 448 }, { "epoch": 0.07, "learning_rate": 5.930521091811415e-07, "loss": 0.1817, "step": 480 }, { "epoch": 0.08, "learning_rate": 6.327543424317618e-07, "loss": 0.149, "step": 512 }, { "epoch": 0.08, "learning_rate": 6.724565756823821e-07, "loss": 0.165, "step": 544 }, { "epoch": 0.09, "learning_rate": 7.121588089330024e-07, "loss": 0.2282, "step": 576 }, { "epoch": 0.09, "learning_rate": 7.518610421836227e-07, "loss": 0.1666, "step": 608 }, { "epoch": 0.1, "learning_rate": 7.915632754342431e-07, "loss": 0.1229, "step": 640 }, { "epoch": 0.1, "learning_rate": 8.312655086848634e-07, "loss": 0.2118, "step": 672 }, { "epoch": 0.1, "learning_rate": 8.709677419354838e-07, "loss": 0.1869, "step": 704 }, { "epoch": 0.11, "learning_rate": 9.106699751861042e-07, "loss": 0.2196, "step": 736 }, { "epoch": 0.11, "learning_rate": 9.503722084367245e-07, "loss": 0.0936, "step": 768 }, { "epoch": 0.12, "learning_rate": 9.90074441687345e-07, "loss": 0.1839, "step": 800 }, { "epoch": 0.12, "learning_rate": 9.980994615140957e-07, "loss": 0.1675, "step": 832 }, { "epoch": 0.13, "learning_rate": 9.955654101995564e-07, "loss": 0.1595, "step": 864 }, { "epoch": 0.13, "learning_rate": 9.930313588850174e-07, "loss": 0.1556, "step": 896 }, { "epoch": 0.14, "learning_rate": 9.905764966740576e-07, "loss": 0.1711, "step": 928 }, { "epoch": 0.14, "learning_rate": 9.880424453595185e-07, "loss": 0.1777, "step": 960 }, { "epoch": 0.15, "learning_rate": 9.855083940449792e-07, "loss": 0.2031, "step": 992 }, { "epoch": 0.15, "learning_rate": 9.829743427304402e-07, "loss": 0.1529, "step": 1024 }, { "epoch": 0.16, "learning_rate": 9.804402914159011e-07, "loss": 0.1365, "step": 1056 }, { "epoch": 0.16, "learning_rate": 9.77906240101362e-07, "loss": 0.1922, "step": 1088 }, { "epoch": 0.17, "learning_rate": 9.75372188786823e-07, "loss": 0.133, "step": 1120 }, { "epoch": 0.17, "learning_rate": 9.728381374722837e-07, "loss": 0.1692, "step": 1152 }, { "epoch": 0.18, "learning_rate": 9.703040861577447e-07, "loss": 0.1022, "step": 1184 }, { "epoch": 0.18, "learning_rate": 9.677700348432054e-07, "loss": 0.2052, "step": 1216 }, { "epoch": 0.19, "learning_rate": 9.652359835286664e-07, "loss": 0.1546, "step": 1248 }, { "epoch": 0.19, "learning_rate": 9.627019322141273e-07, "loss": 0.149, "step": 1280 }, { "epoch": 0.2, "learning_rate": 9.601678808995883e-07, "loss": 0.1281, "step": 1312 }, { "epoch": 0.2, "learning_rate": 9.57633829585049e-07, "loss": 0.1437, "step": 1344 }, { "epoch": 0.2, "learning_rate": 9.5509977827051e-07, "loss": 0.2097, "step": 1376 }, { "epoch": 0.21, "learning_rate": 9.525657269559708e-07, "loss": 0.1308, "step": 1408 }, { "epoch": 0.21, "learning_rate": 9.500316756414317e-07, "loss": 0.1691, "step": 1440 }, { "epoch": 0.22, "learning_rate": 9.474976243268927e-07, "loss": 0.2319, "step": 1472 }, { "epoch": 0.22, "learning_rate": 9.449635730123534e-07, "loss": 0.2226, "step": 1504 }, { "epoch": 0.23, "learning_rate": 9.424295216978143e-07, "loss": 0.1789, "step": 1536 }, { "epoch": 0.23, "learning_rate": 9.398954703832752e-07, "loss": 0.1932, "step": 1568 }, { "epoch": 0.24, "learning_rate": 9.373614190687361e-07, "loss": 0.1718, "step": 1600 }, { "epoch": 0.24, "learning_rate": 9.34827367754197e-07, "loss": 0.156, "step": 1632 }, { "epoch": 0.25, "learning_rate": 9.322933164396578e-07, "loss": 0.1512, "step": 1664 }, { "epoch": 0.25, "learning_rate": 9.297592651251187e-07, "loss": 0.0968, "step": 1696 }, { "epoch": 0.26, "learning_rate": 9.272252138105796e-07, "loss": 0.0932, "step": 1728 }, { "epoch": 0.26, "learning_rate": 9.246911624960405e-07, "loss": 0.2464, "step": 1760 }, { "epoch": 0.27, "learning_rate": 9.221571111815014e-07, "loss": 0.2036, "step": 1792 }, { "epoch": 0.27, "learning_rate": 9.196230598669623e-07, "loss": 0.1245, "step": 1824 }, { "epoch": 0.28, "learning_rate": 9.170890085524232e-07, "loss": 0.1097, "step": 1856 }, { "epoch": 0.28, "learning_rate": 9.14554957237884e-07, "loss": 0.1844, "step": 1888 }, { "epoch": 0.29, "learning_rate": 9.120209059233449e-07, "loss": 0.1114, "step": 1920 }, { "epoch": 0.29, "learning_rate": 9.094868546088058e-07, "loss": 0.1992, "step": 1952 }, { "epoch": 0.3, "learning_rate": 9.069528032942667e-07, "loss": 0.1721, "step": 1984 }, { "epoch": 0.3, "learning_rate": 9.044187519797275e-07, "loss": 0.1473, "step": 2016 }, { "epoch": 0.3, "learning_rate": 9.018847006651884e-07, "loss": 0.1865, "step": 2048 }, { "epoch": 0.31, "learning_rate": 8.993506493506493e-07, "loss": 0.1583, "step": 2080 }, { "epoch": 0.31, "learning_rate": 8.968165980361102e-07, "loss": 0.1866, "step": 2112 }, { "epoch": 0.32, "learning_rate": 8.942825467215711e-07, "loss": 0.1617, "step": 2144 }, { "epoch": 0.32, "learning_rate": 8.917484954070319e-07, "loss": 0.1189, "step": 2176 }, { "epoch": 0.33, "learning_rate": 8.892144440924928e-07, "loss": 0.148, "step": 2208 }, { "epoch": 0.33, "learning_rate": 8.866803927779537e-07, "loss": 0.131, "step": 2240 }, { "epoch": 0.34, "learning_rate": 8.841463414634146e-07, "loss": 0.2261, "step": 2272 }, { "epoch": 0.34, "learning_rate": 8.816122901488755e-07, "loss": 0.1742, "step": 2304 }, { "epoch": 0.35, "learning_rate": 8.790782388343364e-07, "loss": 0.164, "step": 2336 }, { "epoch": 0.35, "learning_rate": 8.765441875197972e-07, "loss": 0.1161, "step": 2368 }, { "epoch": 0.36, "learning_rate": 8.74010136205258e-07, "loss": 0.1636, "step": 2400 }, { "epoch": 0.36, "learning_rate": 8.71476084890719e-07, "loss": 0.2416, "step": 2432 }, { "epoch": 0.37, "learning_rate": 8.689420335761799e-07, "loss": 0.1632, "step": 2464 }, { "epoch": 0.37, "learning_rate": 8.664079822616408e-07, "loss": 0.1477, "step": 2496 }, { "epoch": 0.38, "learning_rate": 8.638739309471016e-07, "loss": 0.2083, "step": 2528 }, { "epoch": 0.38, "learning_rate": 8.613398796325625e-07, "loss": 0.1599, "step": 2560 }, { "epoch": 0.39, "learning_rate": 8.588058283180234e-07, "loss": 0.1817, "step": 2592 }, { "epoch": 0.39, "learning_rate": 8.562717770034843e-07, "loss": 0.1005, "step": 2624 }, { "epoch": 0.4, "learning_rate": 8.537377256889452e-07, "loss": 0.168, "step": 2656 }, { "epoch": 0.4, "learning_rate": 8.51203674374406e-07, "loss": 0.2418, "step": 2688 }, { "epoch": 0.4, "learning_rate": 8.486696230598669e-07, "loss": 0.1881, "step": 2720 }, { "epoch": 0.41, "learning_rate": 8.461355717453278e-07, "loss": 0.1829, "step": 2752 }, { "epoch": 0.41, "learning_rate": 8.436015204307887e-07, "loss": 0.1073, "step": 2784 }, { "epoch": 0.42, "learning_rate": 8.410674691162496e-07, "loss": 0.1324, "step": 2816 }, { "epoch": 0.42, "learning_rate": 8.385334178017105e-07, "loss": 0.2077, "step": 2848 }, { "epoch": 0.43, "learning_rate": 8.359993664871713e-07, "loss": 0.2248, "step": 2880 }, { "epoch": 0.43, "learning_rate": 8.334653151726322e-07, "loss": 0.1337, "step": 2912 }, { "epoch": 0.44, "learning_rate": 8.30931263858093e-07, "loss": 0.1906, "step": 2944 }, { "epoch": 0.44, "learning_rate": 8.28397212543554e-07, "loss": 0.1893, "step": 2976 }, { "epoch": 0.45, "learning_rate": 8.259423503325942e-07, "loss": 0.2029, "step": 3008 }, { "epoch": 0.45, "learning_rate": 8.234082990180551e-07, "loss": 0.157, "step": 3040 }, { "epoch": 0.46, "learning_rate": 8.208742477035159e-07, "loss": 0.1433, "step": 3072 }, { "epoch": 0.46, "learning_rate": 8.183401963889769e-07, "loss": 0.1689, "step": 3104 }, { "epoch": 0.47, "learning_rate": 8.158061450744377e-07, "loss": 0.2012, "step": 3136 }, { "epoch": 0.47, "learning_rate": 8.132720937598986e-07, "loss": 0.175, "step": 3168 }, { "epoch": 0.48, "learning_rate": 8.107380424453595e-07, "loss": 0.1961, "step": 3200 }, { "epoch": 0.48, "learning_rate": 8.082039911308203e-07, "loss": 0.2547, "step": 3232 }, { "epoch": 0.49, "learning_rate": 8.056699398162813e-07, "loss": 0.1935, "step": 3264 }, { "epoch": 0.49, "learning_rate": 8.031358885017421e-07, "loss": 0.2149, "step": 3296 }, { "epoch": 0.5, "learning_rate": 8.00601837187203e-07, "loss": 0.1809, "step": 3328 }, { "epoch": 0.5, "learning_rate": 7.980677858726639e-07, "loss": 0.2072, "step": 3360 }, { "epoch": 0.5, "learning_rate": 7.955337345581247e-07, "loss": 0.2116, "step": 3392 }, { "epoch": 0.51, "learning_rate": 7.929996832435857e-07, "loss": 0.1737, "step": 3424 }, { "epoch": 0.51, "learning_rate": 7.904656319290464e-07, "loss": 0.2219, "step": 3456 }, { "epoch": 0.52, "learning_rate": 7.879315806145074e-07, "loss": 0.1849, "step": 3488 }, { "epoch": 0.52, "learning_rate": 7.853975292999683e-07, "loss": 0.1884, "step": 3520 }, { "epoch": 0.53, "learning_rate": 7.828634779854292e-07, "loss": 0.2192, "step": 3552 }, { "epoch": 0.53, "learning_rate": 7.803294266708901e-07, "loss": 0.1958, "step": 3584 }, { "epoch": 0.54, "learning_rate": 7.777953753563509e-07, "loss": 0.1433, "step": 3616 }, { "epoch": 0.54, "learning_rate": 7.752613240418118e-07, "loss": 0.2151, "step": 3648 }, { "epoch": 0.55, "learning_rate": 7.727272727272727e-07, "loss": 0.1675, "step": 3680 }, { "epoch": 0.55, "learning_rate": 7.701932214127336e-07, "loss": 0.1586, "step": 3712 }, { "epoch": 0.56, "learning_rate": 7.676591700981945e-07, "loss": 0.2881, "step": 3744 }, { "epoch": 0.56, "learning_rate": 7.651251187836553e-07, "loss": 0.196, "step": 3776 }, { "epoch": 0.57, "learning_rate": 7.625910674691162e-07, "loss": 0.1285, "step": 3808 }, { "epoch": 0.57, "learning_rate": 7.60057016154577e-07, "loss": 0.2262, "step": 3840 }, { "epoch": 0.58, "learning_rate": 7.57522964840038e-07, "loss": 0.2309, "step": 3872 }, { "epoch": 0.58, "learning_rate": 7.549889135254989e-07, "loss": 0.1533, "step": 3904 }, { "epoch": 0.59, "learning_rate": 7.524548622109597e-07, "loss": 0.1297, "step": 3936 }, { "epoch": 0.59, "learning_rate": 7.499208108964206e-07, "loss": 0.1808, "step": 3968 }, { "epoch": 0.6, "learning_rate": 7.473867595818814e-07, "loss": 0.2401, "step": 4000 }, { "epoch": 0.6, "learning_rate": 7.448527082673424e-07, "loss": 0.2507, "step": 4032 }, { "epoch": 0.61, "learning_rate": 7.423186569528033e-07, "loss": 0.1562, "step": 4064 }, { "epoch": 0.61, "learning_rate": 7.397846056382642e-07, "loss": 0.1912, "step": 4096 }, { "epoch": 0.61, "learning_rate": 7.373297434273043e-07, "loss": 0.1703, "step": 4128 }, { "epoch": 0.62, "learning_rate": 7.347956921127653e-07, "loss": 0.1471, "step": 4160 }, { "epoch": 0.62, "learning_rate": 7.322616407982262e-07, "loss": 0.1539, "step": 4192 }, { "epoch": 0.63, "learning_rate": 7.297275894836869e-07, "loss": 0.1521, "step": 4224 }, { "epoch": 0.63, "learning_rate": 7.271935381691479e-07, "loss": 0.2623, "step": 4256 }, { "epoch": 0.64, "learning_rate": 7.246594868546087e-07, "loss": 0.1753, "step": 4288 }, { "epoch": 0.64, "learning_rate": 7.221254355400697e-07, "loss": 0.1945, "step": 4320 }, { "epoch": 0.65, "learning_rate": 7.195913842255306e-07, "loss": 0.2153, "step": 4352 }, { "epoch": 0.65, "learning_rate": 7.170573329109915e-07, "loss": 0.2841, "step": 4384 }, { "epoch": 0.66, "learning_rate": 7.145232815964523e-07, "loss": 0.1759, "step": 4416 }, { "epoch": 0.66, "learning_rate": 7.119892302819131e-07, "loss": 0.2214, "step": 4448 }, { "epoch": 0.67, "learning_rate": 7.094551789673741e-07, "loss": 0.188, "step": 4480 }, { "epoch": 0.67, "learning_rate": 7.069211276528349e-07, "loss": 0.1579, "step": 4512 }, { "epoch": 0.68, "learning_rate": 7.043870763382959e-07, "loss": 0.2213, "step": 4544 }, { "epoch": 0.68, "learning_rate": 7.018530250237567e-07, "loss": 0.2042, "step": 4576 }, { "epoch": 0.69, "learning_rate": 6.993189737092175e-07, "loss": 0.1852, "step": 4608 }, { "epoch": 0.69, "learning_rate": 6.967849223946785e-07, "loss": 0.1716, "step": 4640 }, { "epoch": 0.7, "learning_rate": 6.942508710801393e-07, "loss": 0.1645, "step": 4672 }, { "epoch": 0.7, "learning_rate": 6.917168197656003e-07, "loss": 0.1986, "step": 4704 }, { "epoch": 0.71, "learning_rate": 6.89182768451061e-07, "loss": 0.2531, "step": 4736 }, { "epoch": 0.71, "learning_rate": 6.866487171365219e-07, "loss": 0.1792, "step": 4768 }, { "epoch": 0.71, "learning_rate": 6.841146658219829e-07, "loss": 0.1843, "step": 4800 }, { "epoch": 0.72, "learning_rate": 6.815806145074437e-07, "loss": 0.2175, "step": 4832 }, { "epoch": 0.72, "learning_rate": 6.790465631929047e-07, "loss": 0.2083, "step": 4864 }, { "epoch": 0.73, "learning_rate": 6.765125118783655e-07, "loss": 0.1729, "step": 4896 }, { "epoch": 0.73, "learning_rate": 6.739784605638263e-07, "loss": 0.1849, "step": 4928 }, { "epoch": 0.74, "learning_rate": 6.714444092492873e-07, "loss": 0.2374, "step": 4960 }, { "epoch": 0.74, "learning_rate": 6.689103579347481e-07, "loss": 0.241, "step": 4992 }, { "epoch": 0.75, "learning_rate": 6.663763066202091e-07, "loss": 0.1853, "step": 5024 }, { "epoch": 0.75, "learning_rate": 6.638422553056699e-07, "loss": 0.1957, "step": 5056 }, { "epoch": 0.76, "learning_rate": 6.613082039911308e-07, "loss": 0.2052, "step": 5088 }, { "epoch": 0.76, "learning_rate": 6.587741526765917e-07, "loss": 0.2321, "step": 5120 }, { "epoch": 0.77, "learning_rate": 6.562401013620525e-07, "loss": 0.1804, "step": 5152 }, { "epoch": 0.77, "learning_rate": 6.537060500475135e-07, "loss": 0.1842, "step": 5184 }, { "epoch": 0.78, "learning_rate": 6.511719987329743e-07, "loss": 0.2388, "step": 5216 }, { "epoch": 0.78, "learning_rate": 6.486379474184352e-07, "loss": 0.2417, "step": 5248 }, { "epoch": 0.79, "learning_rate": 6.46103896103896e-07, "loss": 0.2224, "step": 5280 }, { "epoch": 0.79, "learning_rate": 6.435698447893569e-07, "loss": 0.2029, "step": 5312 }, { "epoch": 0.8, "learning_rate": 6.410357934748179e-07, "loss": 0.2807, "step": 5344 }, { "epoch": 0.8, "learning_rate": 6.385017421602787e-07, "loss": 0.192, "step": 5376 }, { "epoch": 0.81, "learning_rate": 6.359676908457397e-07, "loss": 0.1848, "step": 5408 }, { "epoch": 0.81, "learning_rate": 6.334336395312004e-07, "loss": 0.2143, "step": 5440 }, { "epoch": 0.81, "learning_rate": 6.308995882166613e-07, "loss": 0.2421, "step": 5472 }, { "epoch": 0.82, "learning_rate": 6.283655369021223e-07, "loss": 0.1724, "step": 5504 }, { "epoch": 0.82, "learning_rate": 6.258314855875831e-07, "loss": 0.1207, "step": 5536 }, { "epoch": 0.83, "learning_rate": 6.232974342730441e-07, "loss": 0.2259, "step": 5568 }, { "epoch": 0.83, "learning_rate": 6.207633829585048e-07, "loss": 0.2504, "step": 5600 }, { "epoch": 0.84, "learning_rate": 6.182293316439658e-07, "loss": 0.188, "step": 5632 }, { "epoch": 0.84, "learning_rate": 6.156952803294266e-07, "loss": 0.1893, "step": 5664 }, { "epoch": 0.85, "learning_rate": 6.131612290148875e-07, "loss": 0.1905, "step": 5696 }, { "epoch": 0.85, "learning_rate": 6.106271777003485e-07, "loss": 0.2594, "step": 5728 }, { "epoch": 0.86, "learning_rate": 6.080931263858092e-07, "loss": 0.3084, "step": 5760 }, { "epoch": 0.86, "learning_rate": 6.055590750712702e-07, "loss": 0.1925, "step": 5792 }, { "epoch": 0.87, "learning_rate": 6.03025023756731e-07, "loss": 0.186, "step": 5824 }, { "epoch": 0.87, "learning_rate": 6.004909724421919e-07, "loss": 0.2302, "step": 5856 }, { "epoch": 0.88, "learning_rate": 5.979569211276529e-07, "loss": 0.1371, "step": 5888 }, { "epoch": 0.88, "learning_rate": 5.954228698131137e-07, "loss": 0.231, "step": 5920 }, { "epoch": 0.89, "learning_rate": 5.928888184985746e-07, "loss": 0.2012, "step": 5952 }, { "epoch": 0.89, "learning_rate": 5.903547671840354e-07, "loss": 0.2006, "step": 5984 }, { "epoch": 0.9, "learning_rate": 5.878207158694963e-07, "loss": 0.215, "step": 6016 }, { "epoch": 0.9, "learning_rate": 5.852866645549572e-07, "loss": 0.1471, "step": 6048 }, { "epoch": 0.91, "learning_rate": 5.827526132404181e-07, "loss": 0.2364, "step": 6080 }, { "epoch": 0.91, "learning_rate": 5.80218561925879e-07, "loss": 0.2881, "step": 6112 }, { "epoch": 0.91, "learning_rate": 5.776845106113398e-07, "loss": 0.1536, "step": 6144 }, { "epoch": 0.92, "learning_rate": 5.751504592968008e-07, "loss": 0.2317, "step": 6176 }, { "epoch": 0.92, "learning_rate": 5.726164079822616e-07, "loss": 0.1952, "step": 6208 }, { "epoch": 0.93, "learning_rate": 5.700823566677225e-07, "loss": 0.1602, "step": 6240 }, { "epoch": 0.93, "learning_rate": 5.675483053531834e-07, "loss": 0.212, "step": 6272 }, { "epoch": 0.94, "learning_rate": 5.650142540386442e-07, "loss": 0.2401, "step": 6304 }, { "epoch": 0.94, "learning_rate": 5.624802027241052e-07, "loss": 0.1992, "step": 6336 }, { "epoch": 0.95, "learning_rate": 5.59946151409566e-07, "loss": 0.2616, "step": 6368 }, { "epoch": 0.95, "learning_rate": 5.574121000950269e-07, "loss": 0.146, "step": 6400 }, { "epoch": 0.96, "learning_rate": 5.548780487804878e-07, "loss": 0.2081, "step": 6432 }, { "epoch": 0.96, "learning_rate": 5.523439974659486e-07, "loss": 0.207, "step": 6464 }, { "epoch": 0.97, "learning_rate": 5.498099461514096e-07, "loss": 0.2631, "step": 6496 }, { "epoch": 0.97, "learning_rate": 5.472758948368704e-07, "loss": 0.1721, "step": 6528 }, { "epoch": 0.98, "learning_rate": 5.447418435223313e-07, "loss": 0.1908, "step": 6560 }, { "epoch": 0.98, "learning_rate": 5.422869813113715e-07, "loss": 0.2238, "step": 6592 }, { "epoch": 0.99, "learning_rate": 5.397529299968325e-07, "loss": 0.2524, "step": 6624 }, { "epoch": 0.99, "learning_rate": 5.372188786822933e-07, "loss": 0.1968, "step": 6656 }, { "epoch": 1.0, "learning_rate": 5.346848273677542e-07, "loss": 0.2379, "step": 6688 }, { "epoch": 1.0, "learning_rate": 5.32150776053215e-07, "loss": 0.246, "step": 6720 }, { "epoch": 1.01, "learning_rate": 5.296167247386759e-07, "loss": 0.104, "step": 6752 }, { "epoch": 1.01, "learning_rate": 5.270826734241369e-07, "loss": 0.102, "step": 6784 }, { "epoch": 1.01, "learning_rate": 5.245486221095977e-07, "loss": 0.1432, "step": 6816 }, { "epoch": 1.02, "learning_rate": 5.220145707950586e-07, "loss": 0.1262, "step": 6848 }, { "epoch": 1.02, "learning_rate": 5.194805194805194e-07, "loss": 0.1299, "step": 6880 }, { "epoch": 1.03, "learning_rate": 5.169464681659803e-07, "loss": 0.1319, "step": 6912 }, { "epoch": 1.03, "learning_rate": 5.144124168514412e-07, "loss": 0.105, "step": 6944 }, { "epoch": 1.04, "learning_rate": 5.118783655369021e-07, "loss": 0.1233, "step": 6976 }, { "epoch": 1.04, "learning_rate": 5.09344314222363e-07, "loss": 0.0922, "step": 7008 }, { "epoch": 1.05, "learning_rate": 5.068102629078239e-07, "loss": 0.144, "step": 7040 }, { "epoch": 1.05, "learning_rate": 5.042762115932847e-07, "loss": 0.1828, "step": 7072 }, { "epoch": 1.06, "learning_rate": 5.017421602787456e-07, "loss": 0.1097, "step": 7104 }, { "epoch": 1.06, "learning_rate": 4.992081089642065e-07, "loss": 0.1867, "step": 7136 }, { "epoch": 1.07, "learning_rate": 4.966740576496675e-07, "loss": 0.1338, "step": 7168 }, { "epoch": 1.07, "learning_rate": 4.941400063351283e-07, "loss": 0.1535, "step": 7200 }, { "epoch": 1.08, "learning_rate": 4.916059550205891e-07, "loss": 0.1861, "step": 7232 }, { "epoch": 1.08, "learning_rate": 4.8907190370605e-07, "loss": 0.1284, "step": 7264 }, { "epoch": 1.09, "learning_rate": 4.865378523915109e-07, "loss": 0.1037, "step": 7296 }, { "epoch": 1.09, "learning_rate": 4.840038010769719e-07, "loss": 0.1217, "step": 7328 }, { "epoch": 1.1, "learning_rate": 4.814697497624327e-07, "loss": 0.1469, "step": 7360 }, { "epoch": 1.1, "learning_rate": 4.789356984478935e-07, "loss": 0.1218, "step": 7392 }, { "epoch": 1.11, "learning_rate": 4.764016471333545e-07, "loss": 0.1486, "step": 7424 }, { "epoch": 1.11, "learning_rate": 4.738675958188153e-07, "loss": 0.0796, "step": 7456 }, { "epoch": 1.11, "learning_rate": 4.7141273360785554e-07, "loss": 0.1163, "step": 7488 }, { "epoch": 1.12, "learning_rate": 4.688786822933164e-07, "loss": 0.0821, "step": 7520 }, { "epoch": 1.12, "learning_rate": 4.663446309787773e-07, "loss": 0.1701, "step": 7552 }, { "epoch": 1.13, "learning_rate": 4.638105796642382e-07, "loss": 0.1002, "step": 7584 }, { "epoch": 1.13, "learning_rate": 4.613557174532784e-07, "loss": 0.0914, "step": 7616 }, { "epoch": 1.14, "learning_rate": 4.5882166613873927e-07, "loss": 0.1374, "step": 7648 }, { "epoch": 1.14, "learning_rate": 4.562876148242002e-07, "loss": 0.1142, "step": 7680 }, { "epoch": 1.15, "learning_rate": 4.5375356350966105e-07, "loss": 0.1351, "step": 7712 }, { "epoch": 1.15, "learning_rate": 4.5121951219512194e-07, "loss": 0.1513, "step": 7744 }, { "epoch": 1.16, "learning_rate": 4.486854608805828e-07, "loss": 0.0998, "step": 7776 }, { "epoch": 1.16, "learning_rate": 4.461514095660437e-07, "loss": 0.1115, "step": 7808 }, { "epoch": 1.17, "learning_rate": 4.4361735825150457e-07, "loss": 0.1373, "step": 7840 }, { "epoch": 1.17, "learning_rate": 4.4108330693696546e-07, "loss": 0.1614, "step": 7872 }, { "epoch": 1.18, "learning_rate": 4.3854925562242635e-07, "loss": 0.0826, "step": 7904 }, { "epoch": 1.18, "learning_rate": 4.3601520430788724e-07, "loss": 0.1129, "step": 7936 }, { "epoch": 1.19, "learning_rate": 4.334811529933481e-07, "loss": 0.0825, "step": 7968 }, { "epoch": 1.19, "learning_rate": 4.3094710167880897e-07, "loss": 0.1497, "step": 8000 }, { "epoch": 1.2, "learning_rate": 4.284130503642698e-07, "loss": 0.1575, "step": 8032 }, { "epoch": 1.2, "learning_rate": 4.2587899904973075e-07, "loss": 0.1377, "step": 8064 }, { "epoch": 1.21, "learning_rate": 4.2334494773519165e-07, "loss": 0.152, "step": 8096 }, { "epoch": 1.21, "learning_rate": 4.208108964206525e-07, "loss": 0.1142, "step": 8128 }, { "epoch": 1.21, "learning_rate": 4.182768451061134e-07, "loss": 0.1458, "step": 8160 }, { "epoch": 1.22, "learning_rate": 4.1574279379157427e-07, "loss": 0.1216, "step": 8192 }, { "epoch": 1.22, "learning_rate": 4.132087424770351e-07, "loss": 0.112, "step": 8224 }, { "epoch": 1.23, "learning_rate": 4.1067469116249605e-07, "loss": 0.1517, "step": 8256 }, { "epoch": 1.23, "learning_rate": 4.081406398479569e-07, "loss": 0.1046, "step": 8288 }, { "epoch": 1.24, "learning_rate": 4.056065885334178e-07, "loss": 0.1538, "step": 8320 }, { "epoch": 1.24, "learning_rate": 4.030725372188787e-07, "loss": 0.1734, "step": 8352 }, { "epoch": 1.25, "learning_rate": 4.005384859043395e-07, "loss": 0.1236, "step": 8384 }, { "epoch": 1.25, "learning_rate": 3.980044345898004e-07, "loss": 0.1129, "step": 8416 }, { "epoch": 1.26, "learning_rate": 3.9547038327526135e-07, "loss": 0.1193, "step": 8448 }, { "epoch": 1.26, "learning_rate": 3.929363319607222e-07, "loss": 0.1124, "step": 8480 }, { "epoch": 1.27, "learning_rate": 3.904022806461831e-07, "loss": 0.103, "step": 8512 }, { "epoch": 1.27, "learning_rate": 3.878682293316439e-07, "loss": 0.1526, "step": 8544 }, { "epoch": 1.28, "learning_rate": 3.853341780171048e-07, "loss": 0.1747, "step": 8576 }, { "epoch": 1.28, "learning_rate": 3.8280012670256575e-07, "loss": 0.0711, "step": 8608 }, { "epoch": 1.29, "learning_rate": 3.802660753880266e-07, "loss": 0.131, "step": 8640 }, { "epoch": 1.29, "learning_rate": 3.777320240734875e-07, "loss": 0.0695, "step": 8672 }, { "epoch": 1.3, "learning_rate": 3.751979727589484e-07, "loss": 0.1176, "step": 8704 }, { "epoch": 1.3, "learning_rate": 3.726639214444092e-07, "loss": 0.1141, "step": 8736 }, { "epoch": 1.31, "learning_rate": 3.701298701298701e-07, "loss": 0.1437, "step": 8768 }, { "epoch": 1.31, "learning_rate": 3.6759581881533095e-07, "loss": 0.1273, "step": 8800 }, { "epoch": 1.31, "learning_rate": 3.650617675007919e-07, "loss": 0.0765, "step": 8832 }, { "epoch": 1.32, "learning_rate": 3.625277161862528e-07, "loss": 0.1322, "step": 8864 }, { "epoch": 1.32, "learning_rate": 3.599936648717136e-07, "loss": 0.1643, "step": 8896 }, { "epoch": 1.33, "learning_rate": 3.574596135571745e-07, "loss": 0.1005, "step": 8928 }, { "epoch": 1.33, "learning_rate": 3.549255622426354e-07, "loss": 0.1545, "step": 8960 }, { "epoch": 1.34, "learning_rate": 3.523915109280963e-07, "loss": 0.0755, "step": 8992 }, { "epoch": 1.34, "learning_rate": 3.498574596135572e-07, "loss": 0.0917, "step": 9024 }, { "epoch": 1.35, "learning_rate": 3.47323408299018e-07, "loss": 0.0893, "step": 9056 }, { "epoch": 1.35, "learning_rate": 3.447893569844789e-07, "loss": 0.152, "step": 9088 }, { "epoch": 1.36, "learning_rate": 3.422553056699398e-07, "loss": 0.1154, "step": 9120 }, { "epoch": 1.36, "learning_rate": 3.3972125435540065e-07, "loss": 0.0817, "step": 9152 }, { "epoch": 1.37, "learning_rate": 3.371872030408616e-07, "loss": 0.083, "step": 9184 }, { "epoch": 1.37, "learning_rate": 3.346531517263225e-07, "loss": 0.1084, "step": 9216 }, { "epoch": 1.38, "learning_rate": 3.321191004117833e-07, "loss": 0.1514, "step": 9248 }, { "epoch": 1.38, "learning_rate": 3.295850490972442e-07, "loss": 0.1195, "step": 9280 }, { "epoch": 1.39, "learning_rate": 3.2705099778270505e-07, "loss": 0.0939, "step": 9312 }, { "epoch": 1.39, "learning_rate": 3.2451694646816595e-07, "loss": 0.1227, "step": 9344 }, { "epoch": 1.4, "learning_rate": 3.219828951536269e-07, "loss": 0.1342, "step": 9376 }, { "epoch": 1.4, "learning_rate": 3.1944884383908773e-07, "loss": 0.1214, "step": 9408 }, { "epoch": 1.41, "learning_rate": 3.169147925245486e-07, "loss": 0.1624, "step": 9440 }, { "epoch": 1.41, "learning_rate": 3.143807412100095e-07, "loss": 0.1261, "step": 9472 }, { "epoch": 1.41, "learning_rate": 3.1184668989547035e-07, "loss": 0.1674, "step": 9504 }, { "epoch": 1.42, "learning_rate": 3.0931263858093124e-07, "loss": 0.1613, "step": 9536 }, { "epoch": 1.42, "learning_rate": 3.0677858726639213e-07, "loss": 0.1519, "step": 9568 }, { "epoch": 1.43, "learning_rate": 3.04244535951853e-07, "loss": 0.1107, "step": 9600 }, { "epoch": 1.43, "learning_rate": 3.017104846373139e-07, "loss": 0.0992, "step": 9632 }, { "epoch": 1.44, "learning_rate": 2.9917643332277476e-07, "loss": 0.0921, "step": 9664 }, { "epoch": 1.44, "learning_rate": 2.9664238200823565e-07, "loss": 0.1478, "step": 9696 }, { "epoch": 1.45, "learning_rate": 2.9410833069369654e-07, "loss": 0.1268, "step": 9728 }, { "epoch": 1.45, "learning_rate": 2.9157427937915743e-07, "loss": 0.1086, "step": 9760 }, { "epoch": 1.46, "learning_rate": 2.890402280646183e-07, "loss": 0.1913, "step": 9792 }, { "epoch": 1.46, "learning_rate": 2.8650617675007916e-07, "loss": 0.1626, "step": 9824 }, { "epoch": 1.47, "learning_rate": 2.8397212543554005e-07, "loss": 0.1647, "step": 9856 }, { "epoch": 1.47, "learning_rate": 2.8143807412100095e-07, "loss": 0.1245, "step": 9888 }, { "epoch": 1.48, "learning_rate": 2.789040228064618e-07, "loss": 0.1326, "step": 9920 }, { "epoch": 1.48, "learning_rate": 2.7636997149192273e-07, "loss": 0.1095, "step": 9952 }, { "epoch": 1.49, "learning_rate": 2.738359201773836e-07, "loss": 0.1089, "step": 9984 }, { "epoch": 1.49, "learning_rate": 2.7130186886284446e-07, "loss": 0.1074, "step": 10016 }, { "epoch": 1.5, "learning_rate": 2.6876781754830535e-07, "loss": 0.1084, "step": 10048 } ], "logging_steps": 32, "max_steps": 13434, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 3358, "total_flos": 4.27776497270784e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }