|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4997766860205448, |
|
"eval_steps": 500, |
|
"global_step": 10074, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.722084367245657e-08, |
|
"loss": 0.2836, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.692307692307692e-08, |
|
"loss": 0.2111, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.1662531017369727e-07, |
|
"loss": 0.2145, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.563275434243176e-07, |
|
"loss": 0.2375, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9602977667493795e-07, |
|
"loss": 0.1839, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.3573200992555832e-07, |
|
"loss": 0.2864, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.7543424317617863e-07, |
|
"loss": 0.1799, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.1513647642679897e-07, |
|
"loss": 0.1609, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.5483870967741936e-07, |
|
"loss": 0.1434, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.945409429280397e-07, |
|
"loss": 0.1699, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.3424317617866004e-07, |
|
"loss": 0.1898, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.739454094292804e-07, |
|
"loss": 0.1665, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5.136476426799007e-07, |
|
"loss": 0.1358, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.533498759305211e-07, |
|
"loss": 0.1713, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 5.930521091811415e-07, |
|
"loss": 0.1817, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.327543424317618e-07, |
|
"loss": 0.149, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.724565756823821e-07, |
|
"loss": 0.165, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.121588089330024e-07, |
|
"loss": 0.2282, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.518610421836227e-07, |
|
"loss": 0.1666, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 7.915632754342431e-07, |
|
"loss": 0.1229, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.312655086848634e-07, |
|
"loss": 0.2118, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 8.709677419354838e-07, |
|
"loss": 0.1869, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.106699751861042e-07, |
|
"loss": 0.2196, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.503722084367245e-07, |
|
"loss": 0.0936, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.90074441687345e-07, |
|
"loss": 0.1839, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.980994615140957e-07, |
|
"loss": 0.1675, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.955654101995564e-07, |
|
"loss": 0.1595, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.930313588850174e-07, |
|
"loss": 0.1556, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.905764966740576e-07, |
|
"loss": 0.1711, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.880424453595185e-07, |
|
"loss": 0.1777, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.855083940449792e-07, |
|
"loss": 0.2031, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.829743427304402e-07, |
|
"loss": 0.1529, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.804402914159011e-07, |
|
"loss": 0.1365, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.77906240101362e-07, |
|
"loss": 0.1922, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.75372188786823e-07, |
|
"loss": 0.133, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.728381374722837e-07, |
|
"loss": 0.1692, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.703040861577447e-07, |
|
"loss": 0.1022, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.677700348432054e-07, |
|
"loss": 0.2052, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.652359835286664e-07, |
|
"loss": 0.1546, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.627019322141273e-07, |
|
"loss": 0.149, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.601678808995883e-07, |
|
"loss": 0.1281, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.57633829585049e-07, |
|
"loss": 0.1437, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.5509977827051e-07, |
|
"loss": 0.2097, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.525657269559708e-07, |
|
"loss": 0.1308, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.500316756414317e-07, |
|
"loss": 0.1691, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.474976243268927e-07, |
|
"loss": 0.2319, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.449635730123534e-07, |
|
"loss": 0.2226, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.424295216978143e-07, |
|
"loss": 0.1789, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.398954703832752e-07, |
|
"loss": 0.1932, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.373614190687361e-07, |
|
"loss": 0.1718, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.34827367754197e-07, |
|
"loss": 0.156, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.322933164396578e-07, |
|
"loss": 0.1512, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.297592651251187e-07, |
|
"loss": 0.0968, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.272252138105796e-07, |
|
"loss": 0.0932, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.246911624960405e-07, |
|
"loss": 0.2464, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.221571111815014e-07, |
|
"loss": 0.2036, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.196230598669623e-07, |
|
"loss": 0.1245, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.170890085524232e-07, |
|
"loss": 0.1097, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.14554957237884e-07, |
|
"loss": 0.1844, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.120209059233449e-07, |
|
"loss": 0.1114, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.094868546088058e-07, |
|
"loss": 0.1992, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.069528032942667e-07, |
|
"loss": 0.1721, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.044187519797275e-07, |
|
"loss": 0.1473, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.018847006651884e-07, |
|
"loss": 0.1865, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.993506493506493e-07, |
|
"loss": 0.1583, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.968165980361102e-07, |
|
"loss": 0.1866, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.942825467215711e-07, |
|
"loss": 0.1617, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.917484954070319e-07, |
|
"loss": 0.1189, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.892144440924928e-07, |
|
"loss": 0.148, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.866803927779537e-07, |
|
"loss": 0.131, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.841463414634146e-07, |
|
"loss": 0.2261, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.816122901488755e-07, |
|
"loss": 0.1742, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.790782388343364e-07, |
|
"loss": 0.164, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.765441875197972e-07, |
|
"loss": 0.1161, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.74010136205258e-07, |
|
"loss": 0.1636, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.71476084890719e-07, |
|
"loss": 0.2416, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.689420335761799e-07, |
|
"loss": 0.1632, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.664079822616408e-07, |
|
"loss": 0.1477, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.638739309471016e-07, |
|
"loss": 0.2083, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.613398796325625e-07, |
|
"loss": 0.1599, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.588058283180234e-07, |
|
"loss": 0.1817, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.562717770034843e-07, |
|
"loss": 0.1005, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.537377256889452e-07, |
|
"loss": 0.168, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.51203674374406e-07, |
|
"loss": 0.2418, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.486696230598669e-07, |
|
"loss": 0.1881, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.461355717453278e-07, |
|
"loss": 0.1829, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.436015204307887e-07, |
|
"loss": 0.1073, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.410674691162496e-07, |
|
"loss": 0.1324, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.385334178017105e-07, |
|
"loss": 0.2077, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.359993664871713e-07, |
|
"loss": 0.2248, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.334653151726322e-07, |
|
"loss": 0.1337, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.30931263858093e-07, |
|
"loss": 0.1906, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.28397212543554e-07, |
|
"loss": 0.1893, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.259423503325942e-07, |
|
"loss": 0.2029, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.234082990180551e-07, |
|
"loss": 0.157, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.208742477035159e-07, |
|
"loss": 0.1433, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.183401963889769e-07, |
|
"loss": 0.1689, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.158061450744377e-07, |
|
"loss": 0.2012, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.132720937598986e-07, |
|
"loss": 0.175, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.107380424453595e-07, |
|
"loss": 0.1961, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.082039911308203e-07, |
|
"loss": 0.2547, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.056699398162813e-07, |
|
"loss": 0.1935, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.031358885017421e-07, |
|
"loss": 0.2149, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.00601837187203e-07, |
|
"loss": 0.1809, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.980677858726639e-07, |
|
"loss": 0.2072, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.955337345581247e-07, |
|
"loss": 0.2116, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.929996832435857e-07, |
|
"loss": 0.1737, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.904656319290464e-07, |
|
"loss": 0.2219, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.879315806145074e-07, |
|
"loss": 0.1849, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.853975292999683e-07, |
|
"loss": 0.1884, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.828634779854292e-07, |
|
"loss": 0.2192, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.803294266708901e-07, |
|
"loss": 0.1958, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.777953753563509e-07, |
|
"loss": 0.1433, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.752613240418118e-07, |
|
"loss": 0.2151, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.727272727272727e-07, |
|
"loss": 0.1675, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.701932214127336e-07, |
|
"loss": 0.1586, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.676591700981945e-07, |
|
"loss": 0.2881, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.651251187836553e-07, |
|
"loss": 0.196, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.625910674691162e-07, |
|
"loss": 0.1285, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.60057016154577e-07, |
|
"loss": 0.2262, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.57522964840038e-07, |
|
"loss": 0.2309, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.549889135254989e-07, |
|
"loss": 0.1533, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.524548622109597e-07, |
|
"loss": 0.1297, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.499208108964206e-07, |
|
"loss": 0.1808, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.473867595818814e-07, |
|
"loss": 0.2401, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.448527082673424e-07, |
|
"loss": 0.2507, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.423186569528033e-07, |
|
"loss": 0.1562, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.397846056382642e-07, |
|
"loss": 0.1912, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.373297434273043e-07, |
|
"loss": 0.1703, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.347956921127653e-07, |
|
"loss": 0.1471, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.322616407982262e-07, |
|
"loss": 0.1539, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.297275894836869e-07, |
|
"loss": 0.1521, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.271935381691479e-07, |
|
"loss": 0.2623, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.246594868546087e-07, |
|
"loss": 0.1753, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.221254355400697e-07, |
|
"loss": 0.1945, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.195913842255306e-07, |
|
"loss": 0.2153, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.170573329109915e-07, |
|
"loss": 0.2841, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.145232815964523e-07, |
|
"loss": 0.1759, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.119892302819131e-07, |
|
"loss": 0.2214, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.094551789673741e-07, |
|
"loss": 0.188, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.069211276528349e-07, |
|
"loss": 0.1579, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.043870763382959e-07, |
|
"loss": 0.2213, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.018530250237567e-07, |
|
"loss": 0.2042, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.993189737092175e-07, |
|
"loss": 0.1852, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.967849223946785e-07, |
|
"loss": 0.1716, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.942508710801393e-07, |
|
"loss": 0.1645, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.917168197656003e-07, |
|
"loss": 0.1986, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.89182768451061e-07, |
|
"loss": 0.2531, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.866487171365219e-07, |
|
"loss": 0.1792, |
|
"step": 4768 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.841146658219829e-07, |
|
"loss": 0.1843, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.815806145074437e-07, |
|
"loss": 0.2175, |
|
"step": 4832 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.790465631929047e-07, |
|
"loss": 0.2083, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.765125118783655e-07, |
|
"loss": 0.1729, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.739784605638263e-07, |
|
"loss": 0.1849, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.714444092492873e-07, |
|
"loss": 0.2374, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.689103579347481e-07, |
|
"loss": 0.241, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.663763066202091e-07, |
|
"loss": 0.1853, |
|
"step": 5024 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.638422553056699e-07, |
|
"loss": 0.1957, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.613082039911308e-07, |
|
"loss": 0.2052, |
|
"step": 5088 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.587741526765917e-07, |
|
"loss": 0.2321, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.562401013620525e-07, |
|
"loss": 0.1804, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.537060500475135e-07, |
|
"loss": 0.1842, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.511719987329743e-07, |
|
"loss": 0.2388, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.486379474184352e-07, |
|
"loss": 0.2417, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.46103896103896e-07, |
|
"loss": 0.2224, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.435698447893569e-07, |
|
"loss": 0.2029, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.410357934748179e-07, |
|
"loss": 0.2807, |
|
"step": 5344 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.385017421602787e-07, |
|
"loss": 0.192, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.359676908457397e-07, |
|
"loss": 0.1848, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.334336395312004e-07, |
|
"loss": 0.2143, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.308995882166613e-07, |
|
"loss": 0.2421, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.283655369021223e-07, |
|
"loss": 0.1724, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.258314855875831e-07, |
|
"loss": 0.1207, |
|
"step": 5536 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.232974342730441e-07, |
|
"loss": 0.2259, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.207633829585048e-07, |
|
"loss": 0.2504, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.182293316439658e-07, |
|
"loss": 0.188, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.156952803294266e-07, |
|
"loss": 0.1893, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.131612290148875e-07, |
|
"loss": 0.1905, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.106271777003485e-07, |
|
"loss": 0.2594, |
|
"step": 5728 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.080931263858092e-07, |
|
"loss": 0.3084, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.055590750712702e-07, |
|
"loss": 0.1925, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.03025023756731e-07, |
|
"loss": 0.186, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.004909724421919e-07, |
|
"loss": 0.2302, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.979569211276529e-07, |
|
"loss": 0.1371, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.954228698131137e-07, |
|
"loss": 0.231, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.928888184985746e-07, |
|
"loss": 0.2012, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.903547671840354e-07, |
|
"loss": 0.2006, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.878207158694963e-07, |
|
"loss": 0.215, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.852866645549572e-07, |
|
"loss": 0.1471, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.827526132404181e-07, |
|
"loss": 0.2364, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.80218561925879e-07, |
|
"loss": 0.2881, |
|
"step": 6112 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.776845106113398e-07, |
|
"loss": 0.1536, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.751504592968008e-07, |
|
"loss": 0.2317, |
|
"step": 6176 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.726164079822616e-07, |
|
"loss": 0.1952, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.700823566677225e-07, |
|
"loss": 0.1602, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.675483053531834e-07, |
|
"loss": 0.212, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.650142540386442e-07, |
|
"loss": 0.2401, |
|
"step": 6304 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.624802027241052e-07, |
|
"loss": 0.1992, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.59946151409566e-07, |
|
"loss": 0.2616, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.574121000950269e-07, |
|
"loss": 0.146, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.548780487804878e-07, |
|
"loss": 0.2081, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5.523439974659486e-07, |
|
"loss": 0.207, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.498099461514096e-07, |
|
"loss": 0.2631, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.472758948368704e-07, |
|
"loss": 0.1721, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.447418435223313e-07, |
|
"loss": 0.1908, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.422869813113715e-07, |
|
"loss": 0.2238, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.397529299968325e-07, |
|
"loss": 0.2524, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.372188786822933e-07, |
|
"loss": 0.1968, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.346848273677542e-07, |
|
"loss": 0.2379, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.32150776053215e-07, |
|
"loss": 0.246, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.296167247386759e-07, |
|
"loss": 0.104, |
|
"step": 6752 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.270826734241369e-07, |
|
"loss": 0.102, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.245486221095977e-07, |
|
"loss": 0.1432, |
|
"step": 6816 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5.220145707950586e-07, |
|
"loss": 0.1262, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5.194805194805194e-07, |
|
"loss": 0.1299, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.169464681659803e-07, |
|
"loss": 0.1319, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.144124168514412e-07, |
|
"loss": 0.105, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.118783655369021e-07, |
|
"loss": 0.1233, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.09344314222363e-07, |
|
"loss": 0.0922, |
|
"step": 7008 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.068102629078239e-07, |
|
"loss": 0.144, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.042762115932847e-07, |
|
"loss": 0.1828, |
|
"step": 7072 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.017421602787456e-07, |
|
"loss": 0.1097, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.992081089642065e-07, |
|
"loss": 0.1867, |
|
"step": 7136 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.966740576496675e-07, |
|
"loss": 0.1338, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.941400063351283e-07, |
|
"loss": 0.1535, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.916059550205891e-07, |
|
"loss": 0.1861, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.8907190370605e-07, |
|
"loss": 0.1284, |
|
"step": 7264 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.865378523915109e-07, |
|
"loss": 0.1037, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.840038010769719e-07, |
|
"loss": 0.1217, |
|
"step": 7328 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.814697497624327e-07, |
|
"loss": 0.1469, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.789356984478935e-07, |
|
"loss": 0.1218, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.764016471333545e-07, |
|
"loss": 0.1486, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.738675958188153e-07, |
|
"loss": 0.0796, |
|
"step": 7456 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.7141273360785554e-07, |
|
"loss": 0.1163, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.688786822933164e-07, |
|
"loss": 0.0821, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.663446309787773e-07, |
|
"loss": 0.1701, |
|
"step": 7552 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.638105796642382e-07, |
|
"loss": 0.1002, |
|
"step": 7584 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.613557174532784e-07, |
|
"loss": 0.0914, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.5882166613873927e-07, |
|
"loss": 0.1374, |
|
"step": 7648 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.562876148242002e-07, |
|
"loss": 0.1142, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.5375356350966105e-07, |
|
"loss": 0.1351, |
|
"step": 7712 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.5121951219512194e-07, |
|
"loss": 0.1513, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.486854608805828e-07, |
|
"loss": 0.0998, |
|
"step": 7776 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.461514095660437e-07, |
|
"loss": 0.1115, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4361735825150457e-07, |
|
"loss": 0.1373, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4108330693696546e-07, |
|
"loss": 0.1614, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.3854925562242635e-07, |
|
"loss": 0.0826, |
|
"step": 7904 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.3601520430788724e-07, |
|
"loss": 0.1129, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.334811529933481e-07, |
|
"loss": 0.0825, |
|
"step": 7968 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.3094710167880897e-07, |
|
"loss": 0.1497, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.284130503642698e-07, |
|
"loss": 0.1575, |
|
"step": 8032 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.2587899904973075e-07, |
|
"loss": 0.1377, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.2334494773519165e-07, |
|
"loss": 0.152, |
|
"step": 8096 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.208108964206525e-07, |
|
"loss": 0.1142, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.182768451061134e-07, |
|
"loss": 0.1458, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.1574279379157427e-07, |
|
"loss": 0.1216, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.132087424770351e-07, |
|
"loss": 0.112, |
|
"step": 8224 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.1067469116249605e-07, |
|
"loss": 0.1517, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.081406398479569e-07, |
|
"loss": 0.1046, |
|
"step": 8288 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.056065885334178e-07, |
|
"loss": 0.1538, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.030725372188787e-07, |
|
"loss": 0.1734, |
|
"step": 8352 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.005384859043395e-07, |
|
"loss": 0.1236, |
|
"step": 8384 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.980044345898004e-07, |
|
"loss": 0.1129, |
|
"step": 8416 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.9547038327526135e-07, |
|
"loss": 0.1193, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.929363319607222e-07, |
|
"loss": 0.1124, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.904022806461831e-07, |
|
"loss": 0.103, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.878682293316439e-07, |
|
"loss": 0.1526, |
|
"step": 8544 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.853341780171048e-07, |
|
"loss": 0.1747, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.8280012670256575e-07, |
|
"loss": 0.0711, |
|
"step": 8608 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.802660753880266e-07, |
|
"loss": 0.131, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.777320240734875e-07, |
|
"loss": 0.0695, |
|
"step": 8672 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.751979727589484e-07, |
|
"loss": 0.1176, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.726639214444092e-07, |
|
"loss": 0.1141, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.701298701298701e-07, |
|
"loss": 0.1437, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.6759581881533095e-07, |
|
"loss": 0.1273, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.650617675007919e-07, |
|
"loss": 0.0765, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.625277161862528e-07, |
|
"loss": 0.1322, |
|
"step": 8864 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.599936648717136e-07, |
|
"loss": 0.1643, |
|
"step": 8896 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.574596135571745e-07, |
|
"loss": 0.1005, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.549255622426354e-07, |
|
"loss": 0.1545, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.523915109280963e-07, |
|
"loss": 0.0755, |
|
"step": 8992 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.498574596135572e-07, |
|
"loss": 0.0917, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.47323408299018e-07, |
|
"loss": 0.0893, |
|
"step": 9056 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.447893569844789e-07, |
|
"loss": 0.152, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.422553056699398e-07, |
|
"loss": 0.1154, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.3972125435540065e-07, |
|
"loss": 0.0817, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.371872030408616e-07, |
|
"loss": 0.083, |
|
"step": 9184 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.346531517263225e-07, |
|
"loss": 0.1084, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.321191004117833e-07, |
|
"loss": 0.1514, |
|
"step": 9248 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.295850490972442e-07, |
|
"loss": 0.1195, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2705099778270505e-07, |
|
"loss": 0.0939, |
|
"step": 9312 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.2451694646816595e-07, |
|
"loss": 0.1227, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.219828951536269e-07, |
|
"loss": 0.1342, |
|
"step": 9376 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.1944884383908773e-07, |
|
"loss": 0.1214, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.169147925245486e-07, |
|
"loss": 0.1624, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.143807412100095e-07, |
|
"loss": 0.1261, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.1184668989547035e-07, |
|
"loss": 0.1674, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.0931263858093124e-07, |
|
"loss": 0.1613, |
|
"step": 9536 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.0677858726639213e-07, |
|
"loss": 0.1519, |
|
"step": 9568 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.04244535951853e-07, |
|
"loss": 0.1107, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.017104846373139e-07, |
|
"loss": 0.0992, |
|
"step": 9632 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.9917643332277476e-07, |
|
"loss": 0.0921, |
|
"step": 9664 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.9664238200823565e-07, |
|
"loss": 0.1478, |
|
"step": 9696 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.9410833069369654e-07, |
|
"loss": 0.1268, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.9157427937915743e-07, |
|
"loss": 0.1086, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.890402280646183e-07, |
|
"loss": 0.1913, |
|
"step": 9792 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8650617675007916e-07, |
|
"loss": 0.1626, |
|
"step": 9824 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8397212543554005e-07, |
|
"loss": 0.1647, |
|
"step": 9856 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8143807412100095e-07, |
|
"loss": 0.1245, |
|
"step": 9888 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.789040228064618e-07, |
|
"loss": 0.1326, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.7636997149192273e-07, |
|
"loss": 0.1095, |
|
"step": 9952 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.738359201773836e-07, |
|
"loss": 0.1089, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.7130186886284446e-07, |
|
"loss": 0.1074, |
|
"step": 10016 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.6876781754830535e-07, |
|
"loss": 0.1084, |
|
"step": 10048 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 13434, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 3358, |
|
"total_flos": 4.27776497270784e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|