|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 22486, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.991105576803343e-07, |
|
"loss": 3.5693, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.981618192060246e-07, |
|
"loss": 1.3039, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.972130807317145e-07, |
|
"loss": 0.6178, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.962643422574046e-07, |
|
"loss": 0.2217, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.953156037830946e-07, |
|
"loss": 0.135, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.943668653087847e-07, |
|
"loss": 0.2547, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.934181268344748e-07, |
|
"loss": 0.2357, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.92513860476148e-07, |
|
"loss": 0.4032, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.915651220018381e-07, |
|
"loss": 0.2513, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.906163835275282e-07, |
|
"loss": 0.2058, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.89667645053218e-07, |
|
"loss": 0.2276, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.887189065789084e-07, |
|
"loss": 0.1798, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.877701681045983e-07, |
|
"loss": 0.2646, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.868214296302884e-07, |
|
"loss": 0.2434, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.858726911559785e-07, |
|
"loss": 0.3211, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.849239526816686e-07, |
|
"loss": 0.1601, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.839752142073586e-07, |
|
"loss": 0.2012, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.830264757330487e-07, |
|
"loss": 0.1635, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.820777372587388e-07, |
|
"loss": 0.2808, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.811289987844287e-07, |
|
"loss": 0.1622, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.801802603101188e-07, |
|
"loss": 0.23, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.792315218358089e-07, |
|
"loss": 0.1615, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.78282783361499e-07, |
|
"loss": 0.1424, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.77334044887189e-07, |
|
"loss": 0.2007, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.763853064128792e-07, |
|
"loss": 0.3649, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.75436567938569e-07, |
|
"loss": 0.1149, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.744878294642591e-07, |
|
"loss": 0.1882, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.735390909899492e-07, |
|
"loss": 0.2598, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.725903525156393e-07, |
|
"loss": 0.2198, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.716416140413294e-07, |
|
"loss": 0.1786, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.706928755670195e-07, |
|
"loss": 0.2713, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.697441370927096e-07, |
|
"loss": 0.2076, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.687953986183995e-07, |
|
"loss": 0.2343, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.678466601440898e-07, |
|
"loss": 0.3182, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.668979216697796e-07, |
|
"loss": 0.2102, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.659491831954697e-07, |
|
"loss": 0.2687, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.650004447211598e-07, |
|
"loss": 0.2186, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.6405170624685e-07, |
|
"loss": 0.1295, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.631029677725398e-07, |
|
"loss": 0.1542, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.621542292982299e-07, |
|
"loss": 0.1365, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.6120549082392e-07, |
|
"loss": 0.2106, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.6025675234961e-07, |
|
"loss": 0.2753, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.593080138753002e-07, |
|
"loss": 0.2162, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.583592754009902e-07, |
|
"loss": 0.2186, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.574105369266803e-07, |
|
"loss": 0.0991, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.564617984523702e-07, |
|
"loss": 0.1616, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.555130599780605e-07, |
|
"loss": 0.2029, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.545643215037504e-07, |
|
"loss": 0.1659, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.536155830294405e-07, |
|
"loss": 0.1887, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.526668445551306e-07, |
|
"loss": 0.2626, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.517181060808206e-07, |
|
"loss": 0.1325, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.507693676065108e-07, |
|
"loss": 0.2125, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.498206291322007e-07, |
|
"loss": 0.1926, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.488718906578907e-07, |
|
"loss": 0.2237, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.479231521835809e-07, |
|
"loss": 0.1283, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.469744137092709e-07, |
|
"loss": 0.2052, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.46025675234961e-07, |
|
"loss": 0.1983, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.45076936760651e-07, |
|
"loss": 0.1976, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.441281982863411e-07, |
|
"loss": 0.1999, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.431794598120312e-07, |
|
"loss": 0.2231, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.422307213377212e-07, |
|
"loss": 0.2057, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.412819828634113e-07, |
|
"loss": 0.2376, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.403332443891013e-07, |
|
"loss": 0.1325, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.393845059147913e-07, |
|
"loss": 0.2595, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.384357674404815e-07, |
|
"loss": 0.1817, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.374870289661715e-07, |
|
"loss": 0.2224, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.365382904918616e-07, |
|
"loss": 0.1444, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.355895520175517e-07, |
|
"loss": 0.2695, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.346408135432417e-07, |
|
"loss": 0.1859, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.336920750689318e-07, |
|
"loss": 0.1551, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.327433365946217e-07, |
|
"loss": 0.2093, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.317945981203118e-07, |
|
"loss": 0.1922, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.308458596460019e-07, |
|
"loss": 0.2056, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.298971211716919e-07, |
|
"loss": 0.2087, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.289483826973821e-07, |
|
"loss": 0.1552, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.279996442230721e-07, |
|
"loss": 0.1619, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.270509057487621e-07, |
|
"loss": 0.2634, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.261021672744523e-07, |
|
"loss": 0.1459, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.251534288001423e-07, |
|
"loss": 0.225, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.242046903258323e-07, |
|
"loss": 0.1878, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.232559518515224e-07, |
|
"loss": 0.1286, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.223072133772124e-07, |
|
"loss": 0.1059, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.213584749029025e-07, |
|
"loss": 0.1541, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.204097364285925e-07, |
|
"loss": 0.2427, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.194609979542827e-07, |
|
"loss": 0.1368, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.185122594799727e-07, |
|
"loss": 0.2477, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.175635210056627e-07, |
|
"loss": 0.1385, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.166147825313529e-07, |
|
"loss": 0.2537, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.156660440570428e-07, |
|
"loss": 0.2374, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.147173055827329e-07, |
|
"loss": 0.1942, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.13768567108423e-07, |
|
"loss": 0.1416, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.12819828634113e-07, |
|
"loss": 0.2205, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.118710901598031e-07, |
|
"loss": 0.1265, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.109223516854932e-07, |
|
"loss": 0.1972, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.099736132111833e-07, |
|
"loss": 0.2119, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.090248747368733e-07, |
|
"loss": 0.1284, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.080761362625632e-07, |
|
"loss": 0.1665, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.071273977882534e-07, |
|
"loss": 0.1545, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.061786593139434e-07, |
|
"loss": 0.1505, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.052299208396334e-07, |
|
"loss": 0.1871, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.042811823653236e-07, |
|
"loss": 0.2302, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.033324438910136e-07, |
|
"loss": 0.1395, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.023837054167037e-07, |
|
"loss": 0.2195, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.014349669423938e-07, |
|
"loss": 0.2419, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.004862284680838e-07, |
|
"loss": 0.2173, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.995374899937739e-07, |
|
"loss": 0.2522, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.985887515194639e-07, |
|
"loss": 0.2317, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.97640013045154e-07, |
|
"loss": 0.1959, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.96691274570844e-07, |
|
"loss": 0.1889, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.95742536096534e-07, |
|
"loss": 0.2131, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.947937976222242e-07, |
|
"loss": 0.1321, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.938450591479142e-07, |
|
"loss": 0.1679, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.928963206736043e-07, |
|
"loss": 0.1826, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.919475821992944e-07, |
|
"loss": 0.2144, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.909988437249843e-07, |
|
"loss": 0.1614, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.900501052506744e-07, |
|
"loss": 0.241, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.891013667763645e-07, |
|
"loss": 0.1825, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.881526283020546e-07, |
|
"loss": 0.17, |
|
"step": 7552 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.872038898277446e-07, |
|
"loss": 0.1562, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.862551513534347e-07, |
|
"loss": 0.2264, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.853064128791248e-07, |
|
"loss": 0.1325, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.843576744048148e-07, |
|
"loss": 0.1601, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.83408935930505e-07, |
|
"loss": 0.1784, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.82460197456195e-07, |
|
"loss": 0.1447, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.815114589818849e-07, |
|
"loss": 0.166, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.80562720507575e-07, |
|
"loss": 0.1554, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.796139820332651e-07, |
|
"loss": 0.1097, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.786652435589552e-07, |
|
"loss": 0.1322, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.777165050846452e-07, |
|
"loss": 0.262, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.767677666103353e-07, |
|
"loss": 0.1755, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.758190281360254e-07, |
|
"loss": 0.1646, |
|
"step": 8384 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.748702896617154e-07, |
|
"loss": 0.1481, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.739215511874055e-07, |
|
"loss": 0.0985, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.729728127130955e-07, |
|
"loss": 0.1401, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.720240742387855e-07, |
|
"loss": 0.2057, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.710753357644757e-07, |
|
"loss": 0.1677, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.701265972901657e-07, |
|
"loss": 0.1398, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.691778588158557e-07, |
|
"loss": 0.1665, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.682439443802069e-07, |
|
"loss": 0.1737, |
|
"step": 8896 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.67295205905897e-07, |
|
"loss": 0.1642, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.66346467431587e-07, |
|
"loss": 0.1487, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.653977289572771e-07, |
|
"loss": 0.186, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.644489904829672e-07, |
|
"loss": 0.1902, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.635002520086571e-07, |
|
"loss": 0.1977, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.625515135343473e-07, |
|
"loss": 0.1853, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.616027750600373e-07, |
|
"loss": 0.1156, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.606540365857273e-07, |
|
"loss": 0.179, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.597052981114175e-07, |
|
"loss": 0.1978, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.587565596371075e-07, |
|
"loss": 0.1735, |
|
"step": 9536 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.578078211627976e-07, |
|
"loss": 0.1579, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.568590826884876e-07, |
|
"loss": 0.1444, |
|
"step": 9664 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.559103442141777e-07, |
|
"loss": 0.1664, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.549616057398677e-07, |
|
"loss": 0.1715, |
|
"step": 9792 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.540128672655577e-07, |
|
"loss": 0.1189, |
|
"step": 9856 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.530641287912479e-07, |
|
"loss": 0.1255, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.521153903169379e-07, |
|
"loss": 0.2314, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.511814758812891e-07, |
|
"loss": 0.18, |
|
"step": 10048 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.502327374069791e-07, |
|
"loss": 0.1948, |
|
"step": 10112 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.492839989326692e-07, |
|
"loss": 0.1763, |
|
"step": 10176 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.483352604583593e-07, |
|
"loss": 0.1984, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.473865219840493e-07, |
|
"loss": 0.169, |
|
"step": 10304 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.464377835097394e-07, |
|
"loss": 0.1268, |
|
"step": 10368 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.454890450354294e-07, |
|
"loss": 0.2261, |
|
"step": 10432 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.445403065611195e-07, |
|
"loss": 0.1751, |
|
"step": 10496 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.435915680868095e-07, |
|
"loss": 0.1758, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.426428296124995e-07, |
|
"loss": 0.1565, |
|
"step": 10624 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.416940911381897e-07, |
|
"loss": 0.0861, |
|
"step": 10688 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.407453526638797e-07, |
|
"loss": 0.1382, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.397966141895698e-07, |
|
"loss": 0.1268, |
|
"step": 10816 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.388478757152599e-07, |
|
"loss": 0.1418, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.378991372409498e-07, |
|
"loss": 0.2417, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.369503987666399e-07, |
|
"loss": 0.1285, |
|
"step": 11008 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.3600166029233e-07, |
|
"loss": 0.1719, |
|
"step": 11072 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.350529218180201e-07, |
|
"loss": 0.1432, |
|
"step": 11136 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.341041833437101e-07, |
|
"loss": 0.2443, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.331554448694002e-07, |
|
"loss": 0.1348, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.322067063950903e-07, |
|
"loss": 0.1251, |
|
"step": 11328 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.312579679207803e-07, |
|
"loss": 0.156, |
|
"step": 11392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.303092294464703e-07, |
|
"loss": 0.3104, |
|
"step": 11456 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.293604909721605e-07, |
|
"loss": 0.1834, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.284117524978504e-07, |
|
"loss": 0.1312, |
|
"step": 11584 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.274630140235405e-07, |
|
"loss": 0.1026, |
|
"step": 11648 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.265142755492306e-07, |
|
"loss": 0.1805, |
|
"step": 11712 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.255655370749206e-07, |
|
"loss": 0.2334, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.246167986006107e-07, |
|
"loss": 0.1606, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.236680601263008e-07, |
|
"loss": 0.1009, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.227193216519909e-07, |
|
"loss": 0.1337, |
|
"step": 11968 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.217705831776809e-07, |
|
"loss": 0.2247, |
|
"step": 12032 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.20821844703371e-07, |
|
"loss": 0.163, |
|
"step": 12096 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.19873106229061e-07, |
|
"loss": 0.1729, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.18924367754751e-07, |
|
"loss": 0.2133, |
|
"step": 12224 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.179756292804412e-07, |
|
"loss": 0.2887, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.170268908061312e-07, |
|
"loss": 0.128, |
|
"step": 12352 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.160781523318212e-07, |
|
"loss": 0.2019, |
|
"step": 12416 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.151294138575113e-07, |
|
"loss": 0.128, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.141806753832014e-07, |
|
"loss": 0.1888, |
|
"step": 12544 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.132319369088915e-07, |
|
"loss": 0.2176, |
|
"step": 12608 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.122831984345814e-07, |
|
"loss": 0.1555, |
|
"step": 12672 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.113344599602715e-07, |
|
"loss": 0.1948, |
|
"step": 12736 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.103857214859616e-07, |
|
"loss": 0.233, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.094369830116516e-07, |
|
"loss": 0.1574, |
|
"step": 12864 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.084882445373418e-07, |
|
"loss": 0.1377, |
|
"step": 12928 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.075395060630318e-07, |
|
"loss": 0.1563, |
|
"step": 12992 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.065907675887218e-07, |
|
"loss": 0.1345, |
|
"step": 13056 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.05642029114412e-07, |
|
"loss": 0.1489, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.047081146787631e-07, |
|
"loss": 0.2524, |
|
"step": 13184 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.037742002431142e-07, |
|
"loss": 0.1534, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.028254617688043e-07, |
|
"loss": 0.136, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.018767232944942e-07, |
|
"loss": 0.151, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.009279848201844e-07, |
|
"loss": 0.1423, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.999792463458744e-07, |
|
"loss": 0.2284, |
|
"step": 13504 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.990305078715644e-07, |
|
"loss": 0.1515, |
|
"step": 13568 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.980817693972546e-07, |
|
"loss": 0.1431, |
|
"step": 13632 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.971330309229446e-07, |
|
"loss": 0.1759, |
|
"step": 13696 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.961842924486347e-07, |
|
"loss": 0.2942, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.952355539743248e-07, |
|
"loss": 0.1382, |
|
"step": 13824 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.942868155000148e-07, |
|
"loss": 0.181, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.933380770257049e-07, |
|
"loss": 0.2471, |
|
"step": 13952 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.923893385513949e-07, |
|
"loss": 0.1487, |
|
"step": 14016 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.91440600077085e-07, |
|
"loss": 0.1653, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.90491861602775e-07, |
|
"loss": 0.193, |
|
"step": 14144 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.89543123128465e-07, |
|
"loss": 0.115, |
|
"step": 14208 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.885943846541552e-07, |
|
"loss": 0.1413, |
|
"step": 14272 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.876456461798452e-07, |
|
"loss": 0.1508, |
|
"step": 14336 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.866969077055352e-07, |
|
"loss": 0.1752, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.857481692312254e-07, |
|
"loss": 0.2432, |
|
"step": 14464 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.847994307569153e-07, |
|
"loss": 0.1978, |
|
"step": 14528 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.838506922826054e-07, |
|
"loss": 0.1445, |
|
"step": 14592 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.829019538082955e-07, |
|
"loss": 0.1484, |
|
"step": 14656 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.819532153339855e-07, |
|
"loss": 0.1887, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.810044768596756e-07, |
|
"loss": 0.216, |
|
"step": 14784 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.800557383853657e-07, |
|
"loss": 0.1803, |
|
"step": 14848 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.791069999110558e-07, |
|
"loss": 0.1332, |
|
"step": 14912 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.781582614367458e-07, |
|
"loss": 0.2439, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.772095229624358e-07, |
|
"loss": 0.1689, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.76260784488126e-07, |
|
"loss": 0.1823, |
|
"step": 15104 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.753120460138159e-07, |
|
"loss": 0.1905, |
|
"step": 15168 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.74363307539506e-07, |
|
"loss": 0.2558, |
|
"step": 15232 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.734145690651961e-07, |
|
"loss": 0.1531, |
|
"step": 15296 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.724658305908861e-07, |
|
"loss": 0.1849, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.715170921165762e-07, |
|
"loss": 0.1317, |
|
"step": 15424 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.705683536422663e-07, |
|
"loss": 0.1096, |
|
"step": 15488 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.696196151679564e-07, |
|
"loss": 0.2193, |
|
"step": 15552 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.686708766936464e-07, |
|
"loss": 0.1658, |
|
"step": 15616 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.677221382193365e-07, |
|
"loss": 0.1553, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.667733997450265e-07, |
|
"loss": 0.1772, |
|
"step": 15744 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.658246612707165e-07, |
|
"loss": 0.2147, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.648759227964067e-07, |
|
"loss": 0.1096, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.639271843220967e-07, |
|
"loss": 0.1613, |
|
"step": 15936 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.629784458477867e-07, |
|
"loss": 0.1488, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.620297073734768e-07, |
|
"loss": 0.2256, |
|
"step": 16064 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.610809688991669e-07, |
|
"loss": 0.2512, |
|
"step": 16128 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.60132230424857e-07, |
|
"loss": 0.1264, |
|
"step": 16192 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.59183491950547e-07, |
|
"loss": 0.1162, |
|
"step": 16256 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.58234753476237e-07, |
|
"loss": 0.1401, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.572860150019271e-07, |
|
"loss": 0.1336, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.563372765276171e-07, |
|
"loss": 0.1234, |
|
"step": 16448 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.553885380533072e-07, |
|
"loss": 0.1195, |
|
"step": 16512 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.544397995789973e-07, |
|
"loss": 0.2435, |
|
"step": 16576 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.534910611046873e-07, |
|
"loss": 0.1109, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.525423226303775e-07, |
|
"loss": 0.2088, |
|
"step": 16704 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.515935841560675e-07, |
|
"loss": 0.141, |
|
"step": 16768 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.506448456817574e-07, |
|
"loss": 0.1428, |
|
"step": 16832 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.496961072074475e-07, |
|
"loss": 0.1505, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.487473687331376e-07, |
|
"loss": 0.2152, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.477986302588277e-07, |
|
"loss": 0.2008, |
|
"step": 17024 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.468498917845177e-07, |
|
"loss": 0.1872, |
|
"step": 17088 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.459011533102078e-07, |
|
"loss": 0.1313, |
|
"step": 17152 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.449524148358979e-07, |
|
"loss": 0.1099, |
|
"step": 17216 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.440036763615879e-07, |
|
"loss": 0.138, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.430549378872781e-07, |
|
"loss": 0.1871, |
|
"step": 17344 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.42106199412968e-07, |
|
"loss": 0.18, |
|
"step": 17408 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.41157460938658e-07, |
|
"loss": 0.1337, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.402087224643482e-07, |
|
"loss": 0.1222, |
|
"step": 17536 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.392599839900382e-07, |
|
"loss": 0.1434, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.383112455157283e-07, |
|
"loss": 0.1538, |
|
"step": 17664 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.373625070414183e-07, |
|
"loss": 0.1908, |
|
"step": 17728 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.364137685671084e-07, |
|
"loss": 0.1244, |
|
"step": 17792 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.354650300927985e-07, |
|
"loss": 0.1593, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.345162916184885e-07, |
|
"loss": 0.1588, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.335675531441787e-07, |
|
"loss": 0.1639, |
|
"step": 17984 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.326188146698686e-07, |
|
"loss": 0.1431, |
|
"step": 18048 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.316700761955586e-07, |
|
"loss": 0.2002, |
|
"step": 18112 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.307213377212488e-07, |
|
"loss": 0.1761, |
|
"step": 18176 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.297725992469388e-07, |
|
"loss": 0.1597, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.288238607726288e-07, |
|
"loss": 0.1952, |
|
"step": 18304 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.27875122298319e-07, |
|
"loss": 0.1843, |
|
"step": 18368 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.26926383824009e-07, |
|
"loss": 0.1032, |
|
"step": 18432 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.259776453496991e-07, |
|
"loss": 0.1952, |
|
"step": 18496 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.25028906875389e-07, |
|
"loss": 0.193, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.240801684010791e-07, |
|
"loss": 0.137, |
|
"step": 18624 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.231314299267692e-07, |
|
"loss": 0.1992, |
|
"step": 18688 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.221826914524592e-07, |
|
"loss": 0.138, |
|
"step": 18752 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.212339529781494e-07, |
|
"loss": 0.2263, |
|
"step": 18816 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.202852145038394e-07, |
|
"loss": 0.2101, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.193364760295294e-07, |
|
"loss": 0.1731, |
|
"step": 18944 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.183877375552196e-07, |
|
"loss": 0.1523, |
|
"step": 19008 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.174389990809096e-07, |
|
"loss": 0.1671, |
|
"step": 19072 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.164902606065996e-07, |
|
"loss": 0.1549, |
|
"step": 19136 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.155415221322897e-07, |
|
"loss": 0.1346, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.145927836579797e-07, |
|
"loss": 0.2403, |
|
"step": 19264 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.136588692223308e-07, |
|
"loss": 0.1909, |
|
"step": 19328 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.12710130748021e-07, |
|
"loss": 0.1801, |
|
"step": 19392 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.11761392273711e-07, |
|
"loss": 0.1196, |
|
"step": 19456 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.10812653799401e-07, |
|
"loss": 0.0749, |
|
"step": 19520 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.098639153250912e-07, |
|
"loss": 0.1386, |
|
"step": 19584 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.089151768507812e-07, |
|
"loss": 0.219, |
|
"step": 19648 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.079664383764713e-07, |
|
"loss": 0.1572, |
|
"step": 19712 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.070176999021614e-07, |
|
"loss": 0.191, |
|
"step": 19776 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.060689614278513e-07, |
|
"loss": 0.1882, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.051202229535414e-07, |
|
"loss": 0.1654, |
|
"step": 19904 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.041714844792315e-07, |
|
"loss": 0.1397, |
|
"step": 19968 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.032227460049216e-07, |
|
"loss": 0.1948, |
|
"step": 20032 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.022740075306116e-07, |
|
"loss": 0.2171, |
|
"step": 20096 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.013252690563016e-07, |
|
"loss": 0.2474, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.003765305819918e-07, |
|
"loss": 0.2014, |
|
"step": 20224 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.994277921076818e-07, |
|
"loss": 0.1256, |
|
"step": 20288 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.984790536333718e-07, |
|
"loss": 0.1634, |
|
"step": 20352 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.975303151590619e-07, |
|
"loss": 0.1672, |
|
"step": 20416 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.965815766847519e-07, |
|
"loss": 0.1773, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.95632838210442e-07, |
|
"loss": 0.1157, |
|
"step": 20544 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.946840997361321e-07, |
|
"loss": 0.2241, |
|
"step": 20608 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.937353612618222e-07, |
|
"loss": 0.1108, |
|
"step": 20672 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.927866227875122e-07, |
|
"loss": 0.1821, |
|
"step": 20736 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.918378843132023e-07, |
|
"loss": 0.1459, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.908891458388924e-07, |
|
"loss": 0.2022, |
|
"step": 20864 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.899404073645823e-07, |
|
"loss": 0.1864, |
|
"step": 20928 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.889916688902723e-07, |
|
"loss": 0.1436, |
|
"step": 20992 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.880429304159625e-07, |
|
"loss": 0.1771, |
|
"step": 21056 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.870941919416525e-07, |
|
"loss": 0.1782, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.861454534673426e-07, |
|
"loss": 0.1754, |
|
"step": 21184 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.851967149930327e-07, |
|
"loss": 0.1483, |
|
"step": 21248 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.842479765187227e-07, |
|
"loss": 0.1373, |
|
"step": 21312 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.833140620830739e-07, |
|
"loss": 0.219, |
|
"step": 21376 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.82365323608764e-07, |
|
"loss": 0.1474, |
|
"step": 21440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.81416585134454e-07, |
|
"loss": 0.1713, |
|
"step": 21504 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.80467846660144e-07, |
|
"loss": 0.1034, |
|
"step": 21568 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.795191081858341e-07, |
|
"loss": 0.185, |
|
"step": 21632 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.785703697115241e-07, |
|
"loss": 0.284, |
|
"step": 21696 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.776216312372143e-07, |
|
"loss": 0.1953, |
|
"step": 21760 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.766728927629043e-07, |
|
"loss": 0.168, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.757241542885943e-07, |
|
"loss": 0.1852, |
|
"step": 21888 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.747754158142845e-07, |
|
"loss": 0.1358, |
|
"step": 21952 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.738266773399745e-07, |
|
"loss": 0.1885, |
|
"step": 22016 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.728779388656646e-07, |
|
"loss": 0.22, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.719292003913545e-07, |
|
"loss": 0.214, |
|
"step": 22144 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.709804619170446e-07, |
|
"loss": 0.1198, |
|
"step": 22208 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.700317234427347e-07, |
|
"loss": 0.1458, |
|
"step": 22272 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.690829849684247e-07, |
|
"loss": 0.2405, |
|
"step": 22336 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.681342464941149e-07, |
|
"loss": 0.1162, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.671855080198049e-07, |
|
"loss": 0.1508, |
|
"step": 22464 |
|
} |
|
], |
|
"logging_steps": 64, |
|
"max_steps": 67458, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 22486, |
|
"total_flos": 4.77416235737088e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|