|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998511240136966, |
|
"eval_steps": 500, |
|
"global_step": 6716, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.722084367245657e-08, |
|
"loss": 0.2836, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.692307692307692e-08, |
|
"loss": 0.2111, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.1662531017369727e-07, |
|
"loss": 0.2145, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.563275434243176e-07, |
|
"loss": 0.2375, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9602977667493795e-07, |
|
"loss": 0.1839, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.3573200992555832e-07, |
|
"loss": 0.2864, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.7543424317617863e-07, |
|
"loss": 0.1799, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.1513647642679897e-07, |
|
"loss": 0.1609, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5483870967741936e-07, |
|
"loss": 0.1434, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.945409429280397e-07, |
|
"loss": 0.1699, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.3424317617866004e-07, |
|
"loss": 0.1898, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.739454094292804e-07, |
|
"loss": 0.1665, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.136476426799007e-07, |
|
"loss": 0.1358, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.533498759305211e-07, |
|
"loss": 0.1713, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.930521091811415e-07, |
|
"loss": 0.1817, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.327543424317618e-07, |
|
"loss": 0.149, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.724565756823821e-07, |
|
"loss": 0.165, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.121588089330024e-07, |
|
"loss": 0.2282, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.518610421836227e-07, |
|
"loss": 0.1666, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.915632754342431e-07, |
|
"loss": 0.1229, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.312655086848634e-07, |
|
"loss": 0.2118, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.709677419354838e-07, |
|
"loss": 0.1869, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.106699751861042e-07, |
|
"loss": 0.2196, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.503722084367245e-07, |
|
"loss": 0.0936, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.90074441687345e-07, |
|
"loss": 0.1839, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.980994615140957e-07, |
|
"loss": 0.1675, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.955654101995564e-07, |
|
"loss": 0.1595, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.930313588850174e-07, |
|
"loss": 0.1556, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.905764966740576e-07, |
|
"loss": 0.1711, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.880424453595185e-07, |
|
"loss": 0.1777, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.855083940449792e-07, |
|
"loss": 0.2031, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.829743427304402e-07, |
|
"loss": 0.1529, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.804402914159011e-07, |
|
"loss": 0.1365, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.77906240101362e-07, |
|
"loss": 0.1922, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.75372188786823e-07, |
|
"loss": 0.133, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.728381374722837e-07, |
|
"loss": 0.1692, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.703040861577447e-07, |
|
"loss": 0.1022, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.677700348432054e-07, |
|
"loss": 0.2052, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.652359835286664e-07, |
|
"loss": 0.1546, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.627019322141273e-07, |
|
"loss": 0.149, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.601678808995883e-07, |
|
"loss": 0.1281, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.57633829585049e-07, |
|
"loss": 0.1437, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.5509977827051e-07, |
|
"loss": 0.2097, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.525657269559708e-07, |
|
"loss": 0.1308, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.500316756414317e-07, |
|
"loss": 0.1691, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.474976243268927e-07, |
|
"loss": 0.2319, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.449635730123534e-07, |
|
"loss": 0.2226, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.424295216978143e-07, |
|
"loss": 0.1789, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.398954703832752e-07, |
|
"loss": 0.1932, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.373614190687361e-07, |
|
"loss": 0.1718, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.34827367754197e-07, |
|
"loss": 0.156, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.322933164396578e-07, |
|
"loss": 0.1512, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.297592651251187e-07, |
|
"loss": 0.0968, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.272252138105796e-07, |
|
"loss": 0.0932, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.246911624960405e-07, |
|
"loss": 0.2464, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.221571111815014e-07, |
|
"loss": 0.2036, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.196230598669623e-07, |
|
"loss": 0.1245, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.170890085524232e-07, |
|
"loss": 0.1097, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.14554957237884e-07, |
|
"loss": 0.1844, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.120209059233449e-07, |
|
"loss": 0.1114, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.094868546088058e-07, |
|
"loss": 0.1992, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.069528032942667e-07, |
|
"loss": 0.1721, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.044187519797275e-07, |
|
"loss": 0.1473, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.018847006651884e-07, |
|
"loss": 0.1865, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.993506493506493e-07, |
|
"loss": 0.1583, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.968165980361102e-07, |
|
"loss": 0.1866, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.942825467215711e-07, |
|
"loss": 0.1617, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.917484954070319e-07, |
|
"loss": 0.1189, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.892144440924928e-07, |
|
"loss": 0.148, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.866803927779537e-07, |
|
"loss": 0.131, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.841463414634146e-07, |
|
"loss": 0.2261, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.816122901488755e-07, |
|
"loss": 0.1742, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.790782388343364e-07, |
|
"loss": 0.164, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.765441875197972e-07, |
|
"loss": 0.1161, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.74010136205258e-07, |
|
"loss": 0.1636, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.71476084890719e-07, |
|
"loss": 0.2416, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.689420335761799e-07, |
|
"loss": 0.1632, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.664079822616408e-07, |
|
"loss": 0.1477, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.638739309471016e-07, |
|
"loss": 0.2083, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.613398796325625e-07, |
|
"loss": 0.1599, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.588058283180234e-07, |
|
"loss": 0.1817, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.562717770034843e-07, |
|
"loss": 0.1005, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.537377256889452e-07, |
|
"loss": 0.168, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.51203674374406e-07, |
|
"loss": 0.2418, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.486696230598669e-07, |
|
"loss": 0.1881, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.461355717453278e-07, |
|
"loss": 0.1829, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.436015204307887e-07, |
|
"loss": 0.1073, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.410674691162496e-07, |
|
"loss": 0.1324, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.385334178017105e-07, |
|
"loss": 0.2077, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.359993664871713e-07, |
|
"loss": 0.2248, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.334653151726322e-07, |
|
"loss": 0.1337, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.30931263858093e-07, |
|
"loss": 0.1906, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.28397212543554e-07, |
|
"loss": 0.1893, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.259423503325942e-07, |
|
"loss": 0.2029, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.234082990180551e-07, |
|
"loss": 0.157, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.208742477035159e-07, |
|
"loss": 0.1433, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.183401963889769e-07, |
|
"loss": 0.1689, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.158061450744377e-07, |
|
"loss": 0.2012, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.132720937598986e-07, |
|
"loss": 0.175, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.107380424453595e-07, |
|
"loss": 0.1961, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.082039911308203e-07, |
|
"loss": 0.2547, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.056699398162813e-07, |
|
"loss": 0.1935, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.031358885017421e-07, |
|
"loss": 0.2149, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.00601837187203e-07, |
|
"loss": 0.1809, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.980677858726639e-07, |
|
"loss": 0.2072, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.955337345581247e-07, |
|
"loss": 0.2116, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.929996832435857e-07, |
|
"loss": 0.1737, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.904656319290464e-07, |
|
"loss": 0.2219, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.879315806145074e-07, |
|
"loss": 0.1849, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.853975292999683e-07, |
|
"loss": 0.1884, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.828634779854292e-07, |
|
"loss": 0.2192, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.803294266708901e-07, |
|
"loss": 0.1958, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.777953753563509e-07, |
|
"loss": 0.1433, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.752613240418118e-07, |
|
"loss": 0.2151, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.727272727272727e-07, |
|
"loss": 0.1675, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.701932214127336e-07, |
|
"loss": 0.1586, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.676591700981945e-07, |
|
"loss": 0.2881, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.651251187836553e-07, |
|
"loss": 0.196, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.625910674691162e-07, |
|
"loss": 0.1285, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.60057016154577e-07, |
|
"loss": 0.2262, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.57522964840038e-07, |
|
"loss": 0.2309, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.549889135254989e-07, |
|
"loss": 0.1533, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.524548622109597e-07, |
|
"loss": 0.1297, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.499208108964206e-07, |
|
"loss": 0.1808, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.473867595818814e-07, |
|
"loss": 0.2401, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.448527082673424e-07, |
|
"loss": 0.2507, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.423186569528033e-07, |
|
"loss": 0.1562, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.397846056382642e-07, |
|
"loss": 0.1912, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.373297434273043e-07, |
|
"loss": 0.1703, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.347956921127653e-07, |
|
"loss": 0.1471, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.322616407982262e-07, |
|
"loss": 0.1539, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.297275894836869e-07, |
|
"loss": 0.1521, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.271935381691479e-07, |
|
"loss": 0.2623, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.246594868546087e-07, |
|
"loss": 0.1753, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.221254355400697e-07, |
|
"loss": 0.1945, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.195913842255306e-07, |
|
"loss": 0.2153, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.170573329109915e-07, |
|
"loss": 0.2841, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.145232815964523e-07, |
|
"loss": 0.1759, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.119892302819131e-07, |
|
"loss": 0.2214, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.094551789673741e-07, |
|
"loss": 0.188, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.069211276528349e-07, |
|
"loss": 0.1579, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.043870763382959e-07, |
|
"loss": 0.2213, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.018530250237567e-07, |
|
"loss": 0.2042, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.993189737092175e-07, |
|
"loss": 0.1852, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.967849223946785e-07, |
|
"loss": 0.1716, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.942508710801393e-07, |
|
"loss": 0.1645, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.917168197656003e-07, |
|
"loss": 0.1986, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.89182768451061e-07, |
|
"loss": 0.2531, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.866487171365219e-07, |
|
"loss": 0.1792, |
|
"step": 4768 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.841146658219829e-07, |
|
"loss": 0.1843, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.815806145074437e-07, |
|
"loss": 0.2175, |
|
"step": 4832 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.790465631929047e-07, |
|
"loss": 0.2083, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.765125118783655e-07, |
|
"loss": 0.1729, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.739784605638263e-07, |
|
"loss": 0.1849, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.714444092492873e-07, |
|
"loss": 0.2374, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.689103579347481e-07, |
|
"loss": 0.241, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.663763066202091e-07, |
|
"loss": 0.1853, |
|
"step": 5024 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.638422553056699e-07, |
|
"loss": 0.1957, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.613082039911308e-07, |
|
"loss": 0.2052, |
|
"step": 5088 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.587741526765917e-07, |
|
"loss": 0.2321, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.562401013620525e-07, |
|
"loss": 0.1804, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.537060500475135e-07, |
|
"loss": 0.1842, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.511719987329743e-07, |
|
"loss": 0.2388, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.486379474184352e-07, |
|
"loss": 0.2417, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.46103896103896e-07, |
|
"loss": 0.2224, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.435698447893569e-07, |
|
"loss": 0.2029, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.410357934748179e-07, |
|
"loss": 0.2807, |
|
"step": 5344 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.385017421602787e-07, |
|
"loss": 0.192, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.359676908457397e-07, |
|
"loss": 0.1848, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.334336395312004e-07, |
|
"loss": 0.2143, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.308995882166613e-07, |
|
"loss": 0.2421, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.283655369021223e-07, |
|
"loss": 0.1724, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.258314855875831e-07, |
|
"loss": 0.1207, |
|
"step": 5536 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.232974342730441e-07, |
|
"loss": 0.2259, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.207633829585048e-07, |
|
"loss": 0.2504, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.182293316439658e-07, |
|
"loss": 0.188, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.156952803294266e-07, |
|
"loss": 0.1893, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.131612290148875e-07, |
|
"loss": 0.1905, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.106271777003485e-07, |
|
"loss": 0.2594, |
|
"step": 5728 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.080931263858092e-07, |
|
"loss": 0.3084, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.055590750712702e-07, |
|
"loss": 0.1925, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.03025023756731e-07, |
|
"loss": 0.186, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.004909724421919e-07, |
|
"loss": 0.2302, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.979569211276529e-07, |
|
"loss": 0.1371, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.954228698131137e-07, |
|
"loss": 0.231, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.928888184985746e-07, |
|
"loss": 0.2012, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.903547671840354e-07, |
|
"loss": 0.2006, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.878207158694963e-07, |
|
"loss": 0.215, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.852866645549572e-07, |
|
"loss": 0.1471, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.827526132404181e-07, |
|
"loss": 0.2364, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.80218561925879e-07, |
|
"loss": 0.2881, |
|
"step": 6112 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.776845106113398e-07, |
|
"loss": 0.1536, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.751504592968008e-07, |
|
"loss": 0.2317, |
|
"step": 6176 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.726164079822616e-07, |
|
"loss": 0.1952, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.700823566677225e-07, |
|
"loss": 0.1602, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.675483053531834e-07, |
|
"loss": 0.212, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.650142540386442e-07, |
|
"loss": 0.2401, |
|
"step": 6304 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.624802027241052e-07, |
|
"loss": 0.1992, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.59946151409566e-07, |
|
"loss": 0.2616, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.574121000950269e-07, |
|
"loss": 0.146, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.548780487804878e-07, |
|
"loss": 0.2081, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.523439974659486e-07, |
|
"loss": 0.207, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.498099461514096e-07, |
|
"loss": 0.2631, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.472758948368704e-07, |
|
"loss": 0.1721, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.447418435223313e-07, |
|
"loss": 0.1908, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.422869813113715e-07, |
|
"loss": 0.2238, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.397529299968325e-07, |
|
"loss": 0.2524, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.372188786822933e-07, |
|
"loss": 0.1968, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.346848273677542e-07, |
|
"loss": 0.2379, |
|
"step": 6688 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 13434, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 3358, |
|
"total_flos": 2.85184331513856e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|