diff --git "a/checkpoint-67458/trainer_state.json" "b/checkpoint-67458/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-67458/trainer_state.json" @@ -0,0 +1,6345 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 67458, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 9.991105576803343e-07, + "loss": 3.5693, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 9.981618192060246e-07, + "loss": 1.3039, + "step": 128 + }, + { + "epoch": 0.01, + "learning_rate": 9.972130807317145e-07, + "loss": 0.6178, + "step": 192 + }, + { + "epoch": 0.01, + "learning_rate": 9.962643422574046e-07, + "loss": 0.2217, + "step": 256 + }, + { + "epoch": 0.01, + "learning_rate": 9.953156037830946e-07, + "loss": 0.135, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 9.943668653087847e-07, + "loss": 0.2547, + "step": 384 + }, + { + "epoch": 0.02, + "learning_rate": 9.934181268344748e-07, + "loss": 0.2357, + "step": 448 + }, + { + "epoch": 0.02, + "learning_rate": 9.92513860476148e-07, + "loss": 0.4032, + "step": 512 + }, + { + "epoch": 0.03, + "learning_rate": 9.915651220018381e-07, + "loss": 0.2513, + "step": 576 + }, + { + "epoch": 0.03, + "learning_rate": 9.906163835275282e-07, + "loss": 0.2058, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 9.89667645053218e-07, + "loss": 0.2276, + "step": 704 + }, + { + "epoch": 0.03, + "learning_rate": 9.887189065789084e-07, + "loss": 0.1798, + "step": 768 + }, + { + "epoch": 0.04, + "learning_rate": 9.877701681045983e-07, + "loss": 0.2646, + "step": 832 + }, + { + "epoch": 0.04, + "learning_rate": 9.868214296302884e-07, + "loss": 0.2434, + "step": 896 + }, + { + "epoch": 0.04, + "learning_rate": 9.858726911559785e-07, + "loss": 0.3211, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 9.849239526816686e-07, + "loss": 0.1601, + "step": 1024 + }, + { + "epoch": 0.05, + "learning_rate": 9.839752142073586e-07, + "loss": 0.2012, + "step": 1088 + }, + { + "epoch": 0.05, + "learning_rate": 9.830264757330487e-07, + "loss": 0.1635, + "step": 1152 + }, + { + "epoch": 0.05, + "learning_rate": 9.820777372587388e-07, + "loss": 0.2808, + "step": 1216 + }, + { + "epoch": 0.06, + "learning_rate": 9.811289987844287e-07, + "loss": 0.1622, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 9.801802603101188e-07, + "loss": 0.23, + "step": 1344 + }, + { + "epoch": 0.06, + "learning_rate": 9.792315218358089e-07, + "loss": 0.1615, + "step": 1408 + }, + { + "epoch": 0.07, + "learning_rate": 9.78282783361499e-07, + "loss": 0.1424, + "step": 1472 + }, + { + "epoch": 0.07, + "learning_rate": 9.77334044887189e-07, + "loss": 0.2007, + "step": 1536 + }, + { + "epoch": 0.07, + "learning_rate": 9.763853064128792e-07, + "loss": 0.3649, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 9.75436567938569e-07, + "loss": 0.1149, + "step": 1664 + }, + { + "epoch": 0.08, + "learning_rate": 9.744878294642591e-07, + "loss": 0.1882, + "step": 1728 + }, + { + "epoch": 0.08, + "learning_rate": 9.735390909899492e-07, + "loss": 0.2598, + "step": 1792 + }, + { + "epoch": 0.08, + "learning_rate": 9.725903525156393e-07, + "loss": 0.2198, + "step": 1856 + }, + { + "epoch": 0.09, + "learning_rate": 9.716416140413294e-07, + "loss": 0.1786, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 9.706928755670195e-07, + "loss": 0.2713, + "step": 1984 + }, + { + "epoch": 0.09, + "learning_rate": 9.697441370927096e-07, + "loss": 0.2076, + "step": 2048 + }, + { + "epoch": 0.09, + "learning_rate": 9.687953986183995e-07, + "loss": 0.2343, + "step": 2112 + }, + { + "epoch": 0.1, + "learning_rate": 9.678466601440898e-07, + "loss": 0.3182, + "step": 2176 + }, + { + "epoch": 0.1, + "learning_rate": 9.668979216697796e-07, + "loss": 0.2102, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 9.659491831954697e-07, + "loss": 0.2687, + "step": 2304 + }, + { + "epoch": 0.11, + "learning_rate": 9.650004447211598e-07, + "loss": 0.2186, + "step": 2368 + }, + { + "epoch": 0.11, + "learning_rate": 9.6405170624685e-07, + "loss": 0.1295, + "step": 2432 + }, + { + "epoch": 0.11, + "learning_rate": 9.631029677725398e-07, + "loss": 0.1542, + "step": 2496 + }, + { + "epoch": 0.11, + "learning_rate": 9.621542292982299e-07, + "loss": 0.1365, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 9.6120549082392e-07, + "loss": 0.2106, + "step": 2624 + }, + { + "epoch": 0.12, + "learning_rate": 9.6025675234961e-07, + "loss": 0.2753, + "step": 2688 + }, + { + "epoch": 0.12, + "learning_rate": 9.593080138753002e-07, + "loss": 0.2162, + "step": 2752 + }, + { + "epoch": 0.13, + "learning_rate": 9.583592754009902e-07, + "loss": 0.2186, + "step": 2816 + }, + { + "epoch": 0.13, + "learning_rate": 9.574105369266803e-07, + "loss": 0.0991, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 9.564617984523702e-07, + "loss": 0.1616, + "step": 2944 + }, + { + "epoch": 0.13, + "learning_rate": 9.555130599780605e-07, + "loss": 0.2029, + "step": 3008 + }, + { + "epoch": 0.14, + "learning_rate": 9.545643215037504e-07, + "loss": 0.1659, + "step": 3072 + }, + { + "epoch": 0.14, + "learning_rate": 9.536155830294405e-07, + "loss": 0.1887, + "step": 3136 + }, + { + "epoch": 0.14, + "learning_rate": 9.526668445551306e-07, + "loss": 0.2626, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 9.517181060808206e-07, + "loss": 0.1325, + "step": 3264 + }, + { + "epoch": 0.15, + "learning_rate": 9.507693676065108e-07, + "loss": 0.2125, + "step": 3328 + }, + { + "epoch": 0.15, + "learning_rate": 9.498206291322007e-07, + "loss": 0.1926, + "step": 3392 + }, + { + "epoch": 0.15, + "learning_rate": 9.488718906578907e-07, + "loss": 0.2237, + "step": 3456 + }, + { + "epoch": 0.16, + "learning_rate": 9.479231521835809e-07, + "loss": 0.1283, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 9.469744137092709e-07, + "loss": 0.2052, + "step": 3584 + }, + { + "epoch": 0.16, + "learning_rate": 9.46025675234961e-07, + "loss": 0.1983, + "step": 3648 + }, + { + "epoch": 0.17, + "learning_rate": 9.45076936760651e-07, + "loss": 0.1976, + "step": 3712 + }, + { + "epoch": 0.17, + "learning_rate": 9.441281982863411e-07, + "loss": 0.1999, + "step": 3776 + }, + { + "epoch": 0.17, + "learning_rate": 9.431794598120312e-07, + "loss": 0.2231, + "step": 3840 + }, + { + "epoch": 0.17, + "learning_rate": 9.422307213377212e-07, + "loss": 0.2057, + "step": 3904 + }, + { + "epoch": 0.18, + "learning_rate": 9.412819828634113e-07, + "loss": 0.2376, + "step": 3968 + }, + { + "epoch": 0.18, + "learning_rate": 9.403332443891013e-07, + "loss": 0.1325, + "step": 4032 + }, + { + "epoch": 0.18, + "learning_rate": 9.393845059147913e-07, + "loss": 0.2595, + "step": 4096 + }, + { + "epoch": 0.19, + "learning_rate": 9.384357674404815e-07, + "loss": 0.1817, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 9.374870289661715e-07, + "loss": 0.2224, + "step": 4224 + }, + { + "epoch": 0.19, + "learning_rate": 9.365382904918616e-07, + "loss": 0.1444, + "step": 4288 + }, + { + "epoch": 0.19, + "learning_rate": 9.355895520175517e-07, + "loss": 0.2695, + "step": 4352 + }, + { + "epoch": 0.2, + "learning_rate": 9.346408135432417e-07, + "loss": 0.1859, + "step": 4416 + }, + { + "epoch": 0.2, + "learning_rate": 9.336920750689318e-07, + "loss": 0.1551, + "step": 4480 + }, + { + "epoch": 0.2, + "learning_rate": 9.327433365946217e-07, + "loss": 0.2093, + "step": 4544 + }, + { + "epoch": 0.2, + "learning_rate": 9.317945981203118e-07, + "loss": 0.1922, + "step": 4608 + }, + { + "epoch": 0.21, + "learning_rate": 9.308458596460019e-07, + "loss": 0.2056, + "step": 4672 + }, + { + "epoch": 0.21, + "learning_rate": 9.298971211716919e-07, + "loss": 0.2087, + "step": 4736 + }, + { + "epoch": 0.21, + "learning_rate": 9.289483826973821e-07, + "loss": 0.1552, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 9.279996442230721e-07, + "loss": 0.1619, + "step": 4864 + }, + { + "epoch": 0.22, + "learning_rate": 9.270509057487621e-07, + "loss": 0.2634, + "step": 4928 + }, + { + "epoch": 0.22, + "learning_rate": 9.261021672744523e-07, + "loss": 0.1459, + "step": 4992 + }, + { + "epoch": 0.22, + "learning_rate": 9.251534288001423e-07, + "loss": 0.225, + "step": 5056 + }, + { + "epoch": 0.23, + "learning_rate": 9.242046903258323e-07, + "loss": 0.1878, + "step": 5120 + }, + { + "epoch": 0.23, + "learning_rate": 9.232559518515224e-07, + "loss": 0.1286, + "step": 5184 + }, + { + "epoch": 0.23, + "learning_rate": 9.223072133772124e-07, + "loss": 0.1059, + "step": 5248 + }, + { + "epoch": 0.24, + "learning_rate": 9.213584749029025e-07, + "loss": 0.1541, + "step": 5312 + }, + { + "epoch": 0.24, + "learning_rate": 9.204097364285925e-07, + "loss": 0.2427, + "step": 5376 + }, + { + "epoch": 0.24, + "learning_rate": 9.194609979542827e-07, + "loss": 0.1368, + "step": 5440 + }, + { + "epoch": 0.24, + "learning_rate": 9.185122594799727e-07, + "loss": 0.2477, + "step": 5504 + }, + { + "epoch": 0.25, + "learning_rate": 9.175635210056627e-07, + "loss": 0.1385, + "step": 5568 + }, + { + "epoch": 0.25, + "learning_rate": 9.166147825313529e-07, + "loss": 0.2537, + "step": 5632 + }, + { + "epoch": 0.25, + "learning_rate": 9.156660440570428e-07, + "loss": 0.2374, + "step": 5696 + }, + { + "epoch": 0.26, + "learning_rate": 9.147173055827329e-07, + "loss": 0.1942, + "step": 5760 + }, + { + "epoch": 0.26, + "learning_rate": 9.13768567108423e-07, + "loss": 0.1416, + "step": 5824 + }, + { + "epoch": 0.26, + "learning_rate": 9.12819828634113e-07, + "loss": 0.2205, + "step": 5888 + }, + { + "epoch": 0.26, + "learning_rate": 9.118710901598031e-07, + "loss": 0.1265, + "step": 5952 + }, + { + "epoch": 0.27, + "learning_rate": 9.109223516854932e-07, + "loss": 0.1972, + "step": 6016 + }, + { + "epoch": 0.27, + "learning_rate": 9.099736132111833e-07, + "loss": 0.2119, + "step": 6080 + }, + { + "epoch": 0.27, + "learning_rate": 9.090248747368733e-07, + "loss": 0.1284, + "step": 6144 + }, + { + "epoch": 0.28, + "learning_rate": 9.080761362625632e-07, + "loss": 0.1665, + "step": 6208 + }, + { + "epoch": 0.28, + "learning_rate": 9.071273977882534e-07, + "loss": 0.1545, + "step": 6272 + }, + { + "epoch": 0.28, + "learning_rate": 9.061786593139434e-07, + "loss": 0.1505, + "step": 6336 + }, + { + "epoch": 0.28, + "learning_rate": 9.052299208396334e-07, + "loss": 0.1871, + "step": 6400 + }, + { + "epoch": 0.29, + "learning_rate": 9.042811823653236e-07, + "loss": 0.2302, + "step": 6464 + }, + { + "epoch": 0.29, + "learning_rate": 9.033324438910136e-07, + "loss": 0.1395, + "step": 6528 + }, + { + "epoch": 0.29, + "learning_rate": 9.023837054167037e-07, + "loss": 0.2195, + "step": 6592 + }, + { + "epoch": 0.3, + "learning_rate": 9.014349669423938e-07, + "loss": 0.2419, + "step": 6656 + }, + { + "epoch": 0.3, + "learning_rate": 9.004862284680838e-07, + "loss": 0.2173, + "step": 6720 + }, + { + "epoch": 0.3, + "learning_rate": 8.995374899937739e-07, + "loss": 0.2522, + "step": 6784 + }, + { + "epoch": 0.3, + "learning_rate": 8.985887515194639e-07, + "loss": 0.2317, + "step": 6848 + }, + { + "epoch": 0.31, + "learning_rate": 8.97640013045154e-07, + "loss": 0.1959, + "step": 6912 + }, + { + "epoch": 0.31, + "learning_rate": 8.96691274570844e-07, + "loss": 0.1889, + "step": 6976 + }, + { + "epoch": 0.31, + "learning_rate": 8.95742536096534e-07, + "loss": 0.2131, + "step": 7040 + }, + { + "epoch": 0.32, + "learning_rate": 8.947937976222242e-07, + "loss": 0.1321, + "step": 7104 + }, + { + "epoch": 0.32, + "learning_rate": 8.938450591479142e-07, + "loss": 0.1679, + "step": 7168 + }, + { + "epoch": 0.32, + "learning_rate": 8.928963206736043e-07, + "loss": 0.1826, + "step": 7232 + }, + { + "epoch": 0.32, + "learning_rate": 8.919475821992944e-07, + "loss": 0.2144, + "step": 7296 + }, + { + "epoch": 0.33, + "learning_rate": 8.909988437249843e-07, + "loss": 0.1614, + "step": 7360 + }, + { + "epoch": 0.33, + "learning_rate": 8.900501052506744e-07, + "loss": 0.241, + "step": 7424 + }, + { + "epoch": 0.33, + "learning_rate": 8.891013667763645e-07, + "loss": 0.1825, + "step": 7488 + }, + { + "epoch": 0.34, + "learning_rate": 8.881526283020546e-07, + "loss": 0.17, + "step": 7552 + }, + { + "epoch": 0.34, + "learning_rate": 8.872038898277446e-07, + "loss": 0.1562, + "step": 7616 + }, + { + "epoch": 0.34, + "learning_rate": 8.862551513534347e-07, + "loss": 0.2264, + "step": 7680 + }, + { + "epoch": 0.34, + "learning_rate": 8.853064128791248e-07, + "loss": 0.1325, + "step": 7744 + }, + { + "epoch": 0.35, + "learning_rate": 8.843576744048148e-07, + "loss": 0.1601, + "step": 7808 + }, + { + "epoch": 0.35, + "learning_rate": 8.83408935930505e-07, + "loss": 0.1784, + "step": 7872 + }, + { + "epoch": 0.35, + "learning_rate": 8.82460197456195e-07, + "loss": 0.1447, + "step": 7936 + }, + { + "epoch": 0.36, + "learning_rate": 8.815114589818849e-07, + "loss": 0.166, + "step": 8000 + }, + { + "epoch": 0.36, + "learning_rate": 8.80562720507575e-07, + "loss": 0.1554, + "step": 8064 + }, + { + "epoch": 0.36, + "learning_rate": 8.796139820332651e-07, + "loss": 0.1097, + "step": 8128 + }, + { + "epoch": 0.36, + "learning_rate": 8.786652435589552e-07, + "loss": 0.1322, + "step": 8192 + }, + { + "epoch": 0.37, + "learning_rate": 8.777165050846452e-07, + "loss": 0.262, + "step": 8256 + }, + { + "epoch": 0.37, + "learning_rate": 8.767677666103353e-07, + "loss": 0.1755, + "step": 8320 + }, + { + "epoch": 0.37, + "learning_rate": 8.758190281360254e-07, + "loss": 0.1646, + "step": 8384 + }, + { + "epoch": 0.38, + "learning_rate": 8.748702896617154e-07, + "loss": 0.1481, + "step": 8448 + }, + { + "epoch": 0.38, + "learning_rate": 8.739215511874055e-07, + "loss": 0.0985, + "step": 8512 + }, + { + "epoch": 0.38, + "learning_rate": 8.729728127130955e-07, + "loss": 0.1401, + "step": 8576 + }, + { + "epoch": 0.38, + "learning_rate": 8.720240742387855e-07, + "loss": 0.2057, + "step": 8640 + }, + { + "epoch": 0.39, + "learning_rate": 8.710753357644757e-07, + "loss": 0.1677, + "step": 8704 + }, + { + "epoch": 0.39, + "learning_rate": 8.701265972901657e-07, + "loss": 0.1398, + "step": 8768 + }, + { + "epoch": 0.39, + "learning_rate": 8.691778588158557e-07, + "loss": 0.1665, + "step": 8832 + }, + { + "epoch": 0.4, + "learning_rate": 8.682439443802069e-07, + "loss": 0.1737, + "step": 8896 + }, + { + "epoch": 0.4, + "learning_rate": 8.67295205905897e-07, + "loss": 0.1642, + "step": 8960 + }, + { + "epoch": 0.4, + "learning_rate": 8.66346467431587e-07, + "loss": 0.1487, + "step": 9024 + }, + { + "epoch": 0.4, + "learning_rate": 8.653977289572771e-07, + "loss": 0.186, + "step": 9088 + }, + { + "epoch": 0.41, + "learning_rate": 8.644489904829672e-07, + "loss": 0.1902, + "step": 9152 + }, + { + "epoch": 0.41, + "learning_rate": 8.635002520086571e-07, + "loss": 0.1977, + "step": 9216 + }, + { + "epoch": 0.41, + "learning_rate": 8.625515135343473e-07, + "loss": 0.1853, + "step": 9280 + }, + { + "epoch": 0.42, + "learning_rate": 8.616027750600373e-07, + "loss": 0.1156, + "step": 9344 + }, + { + "epoch": 0.42, + "learning_rate": 8.606540365857273e-07, + "loss": 0.179, + "step": 9408 + }, + { + "epoch": 0.42, + "learning_rate": 8.597052981114175e-07, + "loss": 0.1978, + "step": 9472 + }, + { + "epoch": 0.42, + "learning_rate": 8.587565596371075e-07, + "loss": 0.1735, + "step": 9536 + }, + { + "epoch": 0.43, + "learning_rate": 8.578078211627976e-07, + "loss": 0.1579, + "step": 9600 + }, + { + "epoch": 0.43, + "learning_rate": 8.568590826884876e-07, + "loss": 0.1444, + "step": 9664 + }, + { + "epoch": 0.43, + "learning_rate": 8.559103442141777e-07, + "loss": 0.1664, + "step": 9728 + }, + { + "epoch": 0.44, + "learning_rate": 8.549616057398677e-07, + "loss": 0.1715, + "step": 9792 + }, + { + "epoch": 0.44, + "learning_rate": 8.540128672655577e-07, + "loss": 0.1189, + "step": 9856 + }, + { + "epoch": 0.44, + "learning_rate": 8.530641287912479e-07, + "loss": 0.1255, + "step": 9920 + }, + { + "epoch": 0.44, + "learning_rate": 8.521153903169379e-07, + "loss": 0.2314, + "step": 9984 + }, + { + "epoch": 0.45, + "learning_rate": 8.511814758812891e-07, + "loss": 0.18, + "step": 10048 + }, + { + "epoch": 0.45, + "learning_rate": 8.502327374069791e-07, + "loss": 0.1948, + "step": 10112 + }, + { + "epoch": 0.45, + "learning_rate": 8.492839989326692e-07, + "loss": 0.1763, + "step": 10176 + }, + { + "epoch": 0.46, + "learning_rate": 8.483352604583593e-07, + "loss": 0.1984, + "step": 10240 + }, + { + "epoch": 0.46, + "learning_rate": 8.473865219840493e-07, + "loss": 0.169, + "step": 10304 + }, + { + "epoch": 0.46, + "learning_rate": 8.464377835097394e-07, + "loss": 0.1268, + "step": 10368 + }, + { + "epoch": 0.46, + "learning_rate": 8.454890450354294e-07, + "loss": 0.2261, + "step": 10432 + }, + { + "epoch": 0.47, + "learning_rate": 8.445403065611195e-07, + "loss": 0.1751, + "step": 10496 + }, + { + "epoch": 0.47, + "learning_rate": 8.435915680868095e-07, + "loss": 0.1758, + "step": 10560 + }, + { + "epoch": 0.47, + "learning_rate": 8.426428296124995e-07, + "loss": 0.1565, + "step": 10624 + }, + { + "epoch": 0.48, + "learning_rate": 8.416940911381897e-07, + "loss": 0.0861, + "step": 10688 + }, + { + "epoch": 0.48, + "learning_rate": 8.407453526638797e-07, + "loss": 0.1382, + "step": 10752 + }, + { + "epoch": 0.48, + "learning_rate": 8.397966141895698e-07, + "loss": 0.1268, + "step": 10816 + }, + { + "epoch": 0.48, + "learning_rate": 8.388478757152599e-07, + "loss": 0.1418, + "step": 10880 + }, + { + "epoch": 0.49, + "learning_rate": 8.378991372409498e-07, + "loss": 0.2417, + "step": 10944 + }, + { + "epoch": 0.49, + "learning_rate": 8.369503987666399e-07, + "loss": 0.1285, + "step": 11008 + }, + { + "epoch": 0.49, + "learning_rate": 8.3600166029233e-07, + "loss": 0.1719, + "step": 11072 + }, + { + "epoch": 0.5, + "learning_rate": 8.350529218180201e-07, + "loss": 0.1432, + "step": 11136 + }, + { + "epoch": 0.5, + "learning_rate": 8.341041833437101e-07, + "loss": 0.2443, + "step": 11200 + }, + { + "epoch": 0.5, + "learning_rate": 8.331554448694002e-07, + "loss": 0.1348, + "step": 11264 + }, + { + "epoch": 0.5, + "learning_rate": 8.322067063950903e-07, + "loss": 0.1251, + "step": 11328 + }, + { + "epoch": 0.51, + "learning_rate": 8.312579679207803e-07, + "loss": 0.156, + "step": 11392 + }, + { + "epoch": 0.51, + "learning_rate": 8.303092294464703e-07, + "loss": 0.3104, + "step": 11456 + }, + { + "epoch": 0.51, + "learning_rate": 8.293604909721605e-07, + "loss": 0.1834, + "step": 11520 + }, + { + "epoch": 0.52, + "learning_rate": 8.284117524978504e-07, + "loss": 0.1312, + "step": 11584 + }, + { + "epoch": 0.52, + "learning_rate": 8.274630140235405e-07, + "loss": 0.1026, + "step": 11648 + }, + { + "epoch": 0.52, + "learning_rate": 8.265142755492306e-07, + "loss": 0.1805, + "step": 11712 + }, + { + "epoch": 0.52, + "learning_rate": 8.255655370749206e-07, + "loss": 0.2334, + "step": 11776 + }, + { + "epoch": 0.53, + "learning_rate": 8.246167986006107e-07, + "loss": 0.1606, + "step": 11840 + }, + { + "epoch": 0.53, + "learning_rate": 8.236680601263008e-07, + "loss": 0.1009, + "step": 11904 + }, + { + "epoch": 0.53, + "learning_rate": 8.227193216519909e-07, + "loss": 0.1337, + "step": 11968 + }, + { + "epoch": 0.54, + "learning_rate": 8.217705831776809e-07, + "loss": 0.2247, + "step": 12032 + }, + { + "epoch": 0.54, + "learning_rate": 8.20821844703371e-07, + "loss": 0.163, + "step": 12096 + }, + { + "epoch": 0.54, + "learning_rate": 8.19873106229061e-07, + "loss": 0.1729, + "step": 12160 + }, + { + "epoch": 0.54, + "learning_rate": 8.18924367754751e-07, + "loss": 0.2133, + "step": 12224 + }, + { + "epoch": 0.55, + "learning_rate": 8.179756292804412e-07, + "loss": 0.2887, + "step": 12288 + }, + { + "epoch": 0.55, + "learning_rate": 8.170268908061312e-07, + "loss": 0.128, + "step": 12352 + }, + { + "epoch": 0.55, + "learning_rate": 8.160781523318212e-07, + "loss": 0.2019, + "step": 12416 + }, + { + "epoch": 0.56, + "learning_rate": 8.151294138575113e-07, + "loss": 0.128, + "step": 12480 + }, + { + "epoch": 0.56, + "learning_rate": 8.141806753832014e-07, + "loss": 0.1888, + "step": 12544 + }, + { + "epoch": 0.56, + "learning_rate": 8.132319369088915e-07, + "loss": 0.2176, + "step": 12608 + }, + { + "epoch": 0.56, + "learning_rate": 8.122831984345814e-07, + "loss": 0.1555, + "step": 12672 + }, + { + "epoch": 0.57, + "learning_rate": 8.113344599602715e-07, + "loss": 0.1948, + "step": 12736 + }, + { + "epoch": 0.57, + "learning_rate": 8.103857214859616e-07, + "loss": 0.233, + "step": 12800 + }, + { + "epoch": 0.57, + "learning_rate": 8.094369830116516e-07, + "loss": 0.1574, + "step": 12864 + }, + { + "epoch": 0.57, + "learning_rate": 8.084882445373418e-07, + "loss": 0.1377, + "step": 12928 + }, + { + "epoch": 0.58, + "learning_rate": 8.075395060630318e-07, + "loss": 0.1563, + "step": 12992 + }, + { + "epoch": 0.58, + "learning_rate": 8.065907675887218e-07, + "loss": 0.1345, + "step": 13056 + }, + { + "epoch": 0.58, + "learning_rate": 8.05642029114412e-07, + "loss": 0.1489, + "step": 13120 + }, + { + "epoch": 0.59, + "learning_rate": 8.047081146787631e-07, + "loss": 0.2524, + "step": 13184 + }, + { + "epoch": 0.59, + "learning_rate": 8.037742002431142e-07, + "loss": 0.1534, + "step": 13248 + }, + { + "epoch": 0.59, + "learning_rate": 8.028254617688043e-07, + "loss": 0.136, + "step": 13312 + }, + { + "epoch": 0.59, + "learning_rate": 8.018767232944942e-07, + "loss": 0.151, + "step": 13376 + }, + { + "epoch": 0.6, + "learning_rate": 8.009279848201844e-07, + "loss": 0.1423, + "step": 13440 + }, + { + "epoch": 0.6, + "learning_rate": 7.999792463458744e-07, + "loss": 0.2284, + "step": 13504 + }, + { + "epoch": 0.6, + "learning_rate": 7.990305078715644e-07, + "loss": 0.1515, + "step": 13568 + }, + { + "epoch": 0.61, + "learning_rate": 7.980817693972546e-07, + "loss": 0.1431, + "step": 13632 + }, + { + "epoch": 0.61, + "learning_rate": 7.971330309229446e-07, + "loss": 0.1759, + "step": 13696 + }, + { + "epoch": 0.61, + "learning_rate": 7.961842924486347e-07, + "loss": 0.2942, + "step": 13760 + }, + { + "epoch": 0.61, + "learning_rate": 7.952355539743248e-07, + "loss": 0.1382, + "step": 13824 + }, + { + "epoch": 0.62, + "learning_rate": 7.942868155000148e-07, + "loss": 0.181, + "step": 13888 + }, + { + "epoch": 0.62, + "learning_rate": 7.933380770257049e-07, + "loss": 0.2471, + "step": 13952 + }, + { + "epoch": 0.62, + "learning_rate": 7.923893385513949e-07, + "loss": 0.1487, + "step": 14016 + }, + { + "epoch": 0.63, + "learning_rate": 7.91440600077085e-07, + "loss": 0.1653, + "step": 14080 + }, + { + "epoch": 0.63, + "learning_rate": 7.90491861602775e-07, + "loss": 0.193, + "step": 14144 + }, + { + "epoch": 0.63, + "learning_rate": 7.89543123128465e-07, + "loss": 0.115, + "step": 14208 + }, + { + "epoch": 0.63, + "learning_rate": 7.885943846541552e-07, + "loss": 0.1413, + "step": 14272 + }, + { + "epoch": 0.64, + "learning_rate": 7.876456461798452e-07, + "loss": 0.1508, + "step": 14336 + }, + { + "epoch": 0.64, + "learning_rate": 7.866969077055352e-07, + "loss": 0.1752, + "step": 14400 + }, + { + "epoch": 0.64, + "learning_rate": 7.857481692312254e-07, + "loss": 0.2432, + "step": 14464 + }, + { + "epoch": 0.65, + "learning_rate": 7.847994307569153e-07, + "loss": 0.1978, + "step": 14528 + }, + { + "epoch": 0.65, + "learning_rate": 7.838506922826054e-07, + "loss": 0.1445, + "step": 14592 + }, + { + "epoch": 0.65, + "learning_rate": 7.829019538082955e-07, + "loss": 0.1484, + "step": 14656 + }, + { + "epoch": 0.65, + "learning_rate": 7.819532153339855e-07, + "loss": 0.1887, + "step": 14720 + }, + { + "epoch": 0.66, + "learning_rate": 7.810044768596756e-07, + "loss": 0.216, + "step": 14784 + }, + { + "epoch": 0.66, + "learning_rate": 7.800557383853657e-07, + "loss": 0.1803, + "step": 14848 + }, + { + "epoch": 0.66, + "learning_rate": 7.791069999110558e-07, + "loss": 0.1332, + "step": 14912 + }, + { + "epoch": 0.67, + "learning_rate": 7.781582614367458e-07, + "loss": 0.2439, + "step": 14976 + }, + { + "epoch": 0.67, + "learning_rate": 7.772095229624358e-07, + "loss": 0.1689, + "step": 15040 + }, + { + "epoch": 0.67, + "learning_rate": 7.76260784488126e-07, + "loss": 0.1823, + "step": 15104 + }, + { + "epoch": 0.67, + "learning_rate": 7.753120460138159e-07, + "loss": 0.1905, + "step": 15168 + }, + { + "epoch": 0.68, + "learning_rate": 7.74363307539506e-07, + "loss": 0.2558, + "step": 15232 + }, + { + "epoch": 0.68, + "learning_rate": 7.734145690651961e-07, + "loss": 0.1531, + "step": 15296 + }, + { + "epoch": 0.68, + "learning_rate": 7.724658305908861e-07, + "loss": 0.1849, + "step": 15360 + }, + { + "epoch": 0.69, + "learning_rate": 7.715170921165762e-07, + "loss": 0.1317, + "step": 15424 + }, + { + "epoch": 0.69, + "learning_rate": 7.705683536422663e-07, + "loss": 0.1096, + "step": 15488 + }, + { + "epoch": 0.69, + "learning_rate": 7.696196151679564e-07, + "loss": 0.2193, + "step": 15552 + }, + { + "epoch": 0.69, + "learning_rate": 7.686708766936464e-07, + "loss": 0.1658, + "step": 15616 + }, + { + "epoch": 0.7, + "learning_rate": 7.677221382193365e-07, + "loss": 0.1553, + "step": 15680 + }, + { + "epoch": 0.7, + "learning_rate": 7.667733997450265e-07, + "loss": 0.1772, + "step": 15744 + }, + { + "epoch": 0.7, + "learning_rate": 7.658246612707165e-07, + "loss": 0.2147, + "step": 15808 + }, + { + "epoch": 0.71, + "learning_rate": 7.648759227964067e-07, + "loss": 0.1096, + "step": 15872 + }, + { + "epoch": 0.71, + "learning_rate": 7.639271843220967e-07, + "loss": 0.1613, + "step": 15936 + }, + { + "epoch": 0.71, + "learning_rate": 7.629784458477867e-07, + "loss": 0.1488, + "step": 16000 + }, + { + "epoch": 0.71, + "learning_rate": 7.620297073734768e-07, + "loss": 0.2256, + "step": 16064 + }, + { + "epoch": 0.72, + "learning_rate": 7.610809688991669e-07, + "loss": 0.2512, + "step": 16128 + }, + { + "epoch": 0.72, + "learning_rate": 7.60132230424857e-07, + "loss": 0.1264, + "step": 16192 + }, + { + "epoch": 0.72, + "learning_rate": 7.59183491950547e-07, + "loss": 0.1162, + "step": 16256 + }, + { + "epoch": 0.73, + "learning_rate": 7.58234753476237e-07, + "loss": 0.1401, + "step": 16320 + }, + { + "epoch": 0.73, + "learning_rate": 7.572860150019271e-07, + "loss": 0.1336, + "step": 16384 + }, + { + "epoch": 0.73, + "learning_rate": 7.563372765276171e-07, + "loss": 0.1234, + "step": 16448 + }, + { + "epoch": 0.73, + "learning_rate": 7.553885380533072e-07, + "loss": 0.1195, + "step": 16512 + }, + { + "epoch": 0.74, + "learning_rate": 7.544397995789973e-07, + "loss": 0.2435, + "step": 16576 + }, + { + "epoch": 0.74, + "learning_rate": 7.534910611046873e-07, + "loss": 0.1109, + "step": 16640 + }, + { + "epoch": 0.74, + "learning_rate": 7.525423226303775e-07, + "loss": 0.2088, + "step": 16704 + }, + { + "epoch": 0.75, + "learning_rate": 7.515935841560675e-07, + "loss": 0.141, + "step": 16768 + }, + { + "epoch": 0.75, + "learning_rate": 7.506448456817574e-07, + "loss": 0.1428, + "step": 16832 + }, + { + "epoch": 0.75, + "learning_rate": 7.496961072074475e-07, + "loss": 0.1505, + "step": 16896 + }, + { + "epoch": 0.75, + "learning_rate": 7.487473687331376e-07, + "loss": 0.2152, + "step": 16960 + }, + { + "epoch": 0.76, + "learning_rate": 7.477986302588277e-07, + "loss": 0.2008, + "step": 17024 + }, + { + "epoch": 0.76, + "learning_rate": 7.468498917845177e-07, + "loss": 0.1872, + "step": 17088 + }, + { + "epoch": 0.76, + "learning_rate": 7.459011533102078e-07, + "loss": 0.1313, + "step": 17152 + }, + { + "epoch": 0.77, + "learning_rate": 7.449524148358979e-07, + "loss": 0.1099, + "step": 17216 + }, + { + "epoch": 0.77, + "learning_rate": 7.440036763615879e-07, + "loss": 0.138, + "step": 17280 + }, + { + "epoch": 0.77, + "learning_rate": 7.430549378872781e-07, + "loss": 0.1871, + "step": 17344 + }, + { + "epoch": 0.77, + "learning_rate": 7.42106199412968e-07, + "loss": 0.18, + "step": 17408 + }, + { + "epoch": 0.78, + "learning_rate": 7.41157460938658e-07, + "loss": 0.1337, + "step": 17472 + }, + { + "epoch": 0.78, + "learning_rate": 7.402087224643482e-07, + "loss": 0.1222, + "step": 17536 + }, + { + "epoch": 0.78, + "learning_rate": 7.392599839900382e-07, + "loss": 0.1434, + "step": 17600 + }, + { + "epoch": 0.79, + "learning_rate": 7.383112455157283e-07, + "loss": 0.1538, + "step": 17664 + }, + { + "epoch": 0.79, + "learning_rate": 7.373625070414183e-07, + "loss": 0.1908, + "step": 17728 + }, + { + "epoch": 0.79, + "learning_rate": 7.364137685671084e-07, + "loss": 0.1244, + "step": 17792 + }, + { + "epoch": 0.79, + "learning_rate": 7.354650300927985e-07, + "loss": 0.1593, + "step": 17856 + }, + { + "epoch": 0.8, + "learning_rate": 7.345162916184885e-07, + "loss": 0.1588, + "step": 17920 + }, + { + "epoch": 0.8, + "learning_rate": 7.335675531441787e-07, + "loss": 0.1639, + "step": 17984 + }, + { + "epoch": 0.8, + "learning_rate": 7.326188146698686e-07, + "loss": 0.1431, + "step": 18048 + }, + { + "epoch": 0.81, + "learning_rate": 7.316700761955586e-07, + "loss": 0.2002, + "step": 18112 + }, + { + "epoch": 0.81, + "learning_rate": 7.307213377212488e-07, + "loss": 0.1761, + "step": 18176 + }, + { + "epoch": 0.81, + "learning_rate": 7.297725992469388e-07, + "loss": 0.1597, + "step": 18240 + }, + { + "epoch": 0.81, + "learning_rate": 7.288238607726288e-07, + "loss": 0.1952, + "step": 18304 + }, + { + "epoch": 0.82, + "learning_rate": 7.27875122298319e-07, + "loss": 0.1843, + "step": 18368 + }, + { + "epoch": 0.82, + "learning_rate": 7.26926383824009e-07, + "loss": 0.1032, + "step": 18432 + }, + { + "epoch": 0.82, + "learning_rate": 7.259776453496991e-07, + "loss": 0.1952, + "step": 18496 + }, + { + "epoch": 0.83, + "learning_rate": 7.25028906875389e-07, + "loss": 0.193, + "step": 18560 + }, + { + "epoch": 0.83, + "learning_rate": 7.240801684010791e-07, + "loss": 0.137, + "step": 18624 + }, + { + "epoch": 0.83, + "learning_rate": 7.231314299267692e-07, + "loss": 0.1992, + "step": 18688 + }, + { + "epoch": 0.83, + "learning_rate": 7.221826914524592e-07, + "loss": 0.138, + "step": 18752 + }, + { + "epoch": 0.84, + "learning_rate": 7.212339529781494e-07, + "loss": 0.2263, + "step": 18816 + }, + { + "epoch": 0.84, + "learning_rate": 7.202852145038394e-07, + "loss": 0.2101, + "step": 18880 + }, + { + "epoch": 0.84, + "learning_rate": 7.193364760295294e-07, + "loss": 0.1731, + "step": 18944 + }, + { + "epoch": 0.85, + "learning_rate": 7.183877375552196e-07, + "loss": 0.1523, + "step": 19008 + }, + { + "epoch": 0.85, + "learning_rate": 7.174389990809096e-07, + "loss": 0.1671, + "step": 19072 + }, + { + "epoch": 0.85, + "learning_rate": 7.164902606065996e-07, + "loss": 0.1549, + "step": 19136 + }, + { + "epoch": 0.85, + "learning_rate": 7.155415221322897e-07, + "loss": 0.1346, + "step": 19200 + }, + { + "epoch": 0.86, + "learning_rate": 7.145927836579797e-07, + "loss": 0.2403, + "step": 19264 + }, + { + "epoch": 0.86, + "learning_rate": 7.136588692223308e-07, + "loss": 0.1909, + "step": 19328 + }, + { + "epoch": 0.86, + "learning_rate": 7.12710130748021e-07, + "loss": 0.1801, + "step": 19392 + }, + { + "epoch": 0.87, + "learning_rate": 7.11761392273711e-07, + "loss": 0.1196, + "step": 19456 + }, + { + "epoch": 0.87, + "learning_rate": 7.10812653799401e-07, + "loss": 0.0749, + "step": 19520 + }, + { + "epoch": 0.87, + "learning_rate": 7.098639153250912e-07, + "loss": 0.1386, + "step": 19584 + }, + { + "epoch": 0.87, + "learning_rate": 7.089151768507812e-07, + "loss": 0.219, + "step": 19648 + }, + { + "epoch": 0.88, + "learning_rate": 7.079664383764713e-07, + "loss": 0.1572, + "step": 19712 + }, + { + "epoch": 0.88, + "learning_rate": 7.070176999021614e-07, + "loss": 0.191, + "step": 19776 + }, + { + "epoch": 0.88, + "learning_rate": 7.060689614278513e-07, + "loss": 0.1882, + "step": 19840 + }, + { + "epoch": 0.89, + "learning_rate": 7.051202229535414e-07, + "loss": 0.1654, + "step": 19904 + }, + { + "epoch": 0.89, + "learning_rate": 7.041714844792315e-07, + "loss": 0.1397, + "step": 19968 + }, + { + "epoch": 0.89, + "learning_rate": 7.032227460049216e-07, + "loss": 0.1948, + "step": 20032 + }, + { + "epoch": 0.89, + "learning_rate": 7.022740075306116e-07, + "loss": 0.2171, + "step": 20096 + }, + { + "epoch": 0.9, + "learning_rate": 7.013252690563016e-07, + "loss": 0.2474, + "step": 20160 + }, + { + "epoch": 0.9, + "learning_rate": 7.003765305819918e-07, + "loss": 0.2014, + "step": 20224 + }, + { + "epoch": 0.9, + "learning_rate": 6.994277921076818e-07, + "loss": 0.1256, + "step": 20288 + }, + { + "epoch": 0.91, + "learning_rate": 6.984790536333718e-07, + "loss": 0.1634, + "step": 20352 + }, + { + "epoch": 0.91, + "learning_rate": 6.975303151590619e-07, + "loss": 0.1672, + "step": 20416 + }, + { + "epoch": 0.91, + "learning_rate": 6.965815766847519e-07, + "loss": 0.1773, + "step": 20480 + }, + { + "epoch": 0.91, + "learning_rate": 6.95632838210442e-07, + "loss": 0.1157, + "step": 20544 + }, + { + "epoch": 0.92, + "learning_rate": 6.946840997361321e-07, + "loss": 0.2241, + "step": 20608 + }, + { + "epoch": 0.92, + "learning_rate": 6.937353612618222e-07, + "loss": 0.1108, + "step": 20672 + }, + { + "epoch": 0.92, + "learning_rate": 6.927866227875122e-07, + "loss": 0.1821, + "step": 20736 + }, + { + "epoch": 0.93, + "learning_rate": 6.918378843132023e-07, + "loss": 0.1459, + "step": 20800 + }, + { + "epoch": 0.93, + "learning_rate": 6.908891458388924e-07, + "loss": 0.2022, + "step": 20864 + }, + { + "epoch": 0.93, + "learning_rate": 6.899404073645823e-07, + "loss": 0.1864, + "step": 20928 + }, + { + "epoch": 0.93, + "learning_rate": 6.889916688902723e-07, + "loss": 0.1436, + "step": 20992 + }, + { + "epoch": 0.94, + "learning_rate": 6.880429304159625e-07, + "loss": 0.1771, + "step": 21056 + }, + { + "epoch": 0.94, + "learning_rate": 6.870941919416525e-07, + "loss": 0.1782, + "step": 21120 + }, + { + "epoch": 0.94, + "learning_rate": 6.861454534673426e-07, + "loss": 0.1754, + "step": 21184 + }, + { + "epoch": 0.94, + "learning_rate": 6.851967149930327e-07, + "loss": 0.1483, + "step": 21248 + }, + { + "epoch": 0.95, + "learning_rate": 6.842479765187227e-07, + "loss": 0.1373, + "step": 21312 + }, + { + "epoch": 0.95, + "learning_rate": 6.833140620830739e-07, + "loss": 0.219, + "step": 21376 + }, + { + "epoch": 0.95, + "learning_rate": 6.82365323608764e-07, + "loss": 0.1474, + "step": 21440 + }, + { + "epoch": 0.96, + "learning_rate": 6.81416585134454e-07, + "loss": 0.1713, + "step": 21504 + }, + { + "epoch": 0.96, + "learning_rate": 6.80467846660144e-07, + "loss": 0.1034, + "step": 21568 + }, + { + "epoch": 0.96, + "learning_rate": 6.795191081858341e-07, + "loss": 0.185, + "step": 21632 + }, + { + "epoch": 0.96, + "learning_rate": 6.785703697115241e-07, + "loss": 0.284, + "step": 21696 + }, + { + "epoch": 0.97, + "learning_rate": 6.776216312372143e-07, + "loss": 0.1953, + "step": 21760 + }, + { + "epoch": 0.97, + "learning_rate": 6.766728927629043e-07, + "loss": 0.168, + "step": 21824 + }, + { + "epoch": 0.97, + "learning_rate": 6.757241542885943e-07, + "loss": 0.1852, + "step": 21888 + }, + { + "epoch": 0.98, + "learning_rate": 6.747754158142845e-07, + "loss": 0.1358, + "step": 21952 + }, + { + "epoch": 0.98, + "learning_rate": 6.738266773399745e-07, + "loss": 0.1885, + "step": 22016 + }, + { + "epoch": 0.98, + "learning_rate": 6.728779388656646e-07, + "loss": 0.22, + "step": 22080 + }, + { + "epoch": 0.98, + "learning_rate": 6.719292003913545e-07, + "loss": 0.214, + "step": 22144 + }, + { + "epoch": 0.99, + "learning_rate": 6.709804619170446e-07, + "loss": 0.1198, + "step": 22208 + }, + { + "epoch": 0.99, + "learning_rate": 6.700317234427347e-07, + "loss": 0.1458, + "step": 22272 + }, + { + "epoch": 0.99, + "learning_rate": 6.690829849684247e-07, + "loss": 0.2405, + "step": 22336 + }, + { + "epoch": 1.0, + "learning_rate": 6.681342464941149e-07, + "loss": 0.1162, + "step": 22400 + }, + { + "epoch": 1.0, + "learning_rate": 6.671855080198049e-07, + "loss": 0.1508, + "step": 22464 + }, + { + "epoch": 1.0, + "learning_rate": 6.662367695454949e-07, + "loss": 0.1805, + "step": 22528 + }, + { + "epoch": 1.0, + "learning_rate": 6.652880310711851e-07, + "loss": 0.0954, + "step": 22592 + }, + { + "epoch": 1.01, + "learning_rate": 6.643392925968751e-07, + "loss": 0.0756, + "step": 22656 + }, + { + "epoch": 1.01, + "learning_rate": 6.633905541225651e-07, + "loss": 0.1514, + "step": 22720 + }, + { + "epoch": 1.01, + "learning_rate": 6.624418156482551e-07, + "loss": 0.1408, + "step": 22784 + }, + { + "epoch": 1.02, + "learning_rate": 6.614930771739452e-07, + "loss": 0.1979, + "step": 22848 + }, + { + "epoch": 1.02, + "learning_rate": 6.605443386996353e-07, + "loss": 0.1277, + "step": 22912 + }, + { + "epoch": 1.02, + "learning_rate": 6.595956002253253e-07, + "loss": 0.0607, + "step": 22976 + }, + { + "epoch": 1.02, + "learning_rate": 6.586468617510155e-07, + "loss": 0.126, + "step": 23040 + }, + { + "epoch": 1.03, + "learning_rate": 6.576981232767055e-07, + "loss": 0.116, + "step": 23104 + }, + { + "epoch": 1.03, + "learning_rate": 6.567493848023955e-07, + "loss": 0.1233, + "step": 23168 + }, + { + "epoch": 1.03, + "learning_rate": 6.558006463280857e-07, + "loss": 0.0871, + "step": 23232 + }, + { + "epoch": 1.04, + "learning_rate": 6.548519078537756e-07, + "loss": 0.0974, + "step": 23296 + }, + { + "epoch": 1.04, + "learning_rate": 6.539031693794656e-07, + "loss": 0.1102, + "step": 23360 + }, + { + "epoch": 1.04, + "learning_rate": 6.529544309051558e-07, + "loss": 0.0905, + "step": 23424 + }, + { + "epoch": 1.04, + "learning_rate": 6.520056924308458e-07, + "loss": 0.0783, + "step": 23488 + }, + { + "epoch": 1.05, + "learning_rate": 6.510569539565359e-07, + "loss": 0.0835, + "step": 23552 + }, + { + "epoch": 1.05, + "learning_rate": 6.501082154822259e-07, + "loss": 0.1459, + "step": 23616 + }, + { + "epoch": 1.05, + "learning_rate": 6.49159477007916e-07, + "loss": 0.1042, + "step": 23680 + }, + { + "epoch": 1.06, + "learning_rate": 6.482107385336061e-07, + "loss": 0.1063, + "step": 23744 + }, + { + "epoch": 1.06, + "learning_rate": 6.472620000592961e-07, + "loss": 0.1107, + "step": 23808 + }, + { + "epoch": 1.06, + "learning_rate": 6.463132615849863e-07, + "loss": 0.1557, + "step": 23872 + }, + { + "epoch": 1.06, + "learning_rate": 6.453645231106762e-07, + "loss": 0.1015, + "step": 23936 + }, + { + "epoch": 1.07, + "learning_rate": 6.444157846363662e-07, + "loss": 0.1518, + "step": 24000 + }, + { + "epoch": 1.07, + "learning_rate": 6.434670461620564e-07, + "loss": 0.1283, + "step": 24064 + }, + { + "epoch": 1.07, + "learning_rate": 6.425183076877464e-07, + "loss": 0.1181, + "step": 24128 + }, + { + "epoch": 1.08, + "learning_rate": 6.415695692134365e-07, + "loss": 0.1414, + "step": 24192 + }, + { + "epoch": 1.08, + "learning_rate": 6.406208307391266e-07, + "loss": 0.0739, + "step": 24256 + }, + { + "epoch": 1.08, + "learning_rate": 6.396869163034777e-07, + "loss": 0.1461, + "step": 24320 + }, + { + "epoch": 1.08, + "learning_rate": 6.387381778291678e-07, + "loss": 0.1104, + "step": 24384 + }, + { + "epoch": 1.09, + "learning_rate": 6.377894393548579e-07, + "loss": 0.0982, + "step": 24448 + }, + { + "epoch": 1.09, + "learning_rate": 6.368407008805478e-07, + "loss": 0.1016, + "step": 24512 + }, + { + "epoch": 1.09, + "learning_rate": 6.358919624062378e-07, + "loss": 0.1343, + "step": 24576 + }, + { + "epoch": 1.1, + "learning_rate": 6.34943223931928e-07, + "loss": 0.0936, + "step": 24640 + }, + { + "epoch": 1.1, + "learning_rate": 6.33994485457618e-07, + "loss": 0.1245, + "step": 24704 + }, + { + "epoch": 1.1, + "learning_rate": 6.330457469833081e-07, + "loss": 0.1656, + "step": 24768 + }, + { + "epoch": 1.1, + "learning_rate": 6.320970085089982e-07, + "loss": 0.1573, + "step": 24832 + }, + { + "epoch": 1.11, + "learning_rate": 6.311482700346882e-07, + "loss": 0.1143, + "step": 24896 + }, + { + "epoch": 1.11, + "learning_rate": 6.301995315603783e-07, + "loss": 0.1048, + "step": 24960 + }, + { + "epoch": 1.11, + "learning_rate": 6.292507930860684e-07, + "loss": 0.0797, + "step": 25024 + }, + { + "epoch": 1.12, + "learning_rate": 6.283020546117584e-07, + "loss": 0.0676, + "step": 25088 + }, + { + "epoch": 1.12, + "learning_rate": 6.273533161374484e-07, + "loss": 0.1028, + "step": 25152 + }, + { + "epoch": 1.12, + "learning_rate": 6.264045776631385e-07, + "loss": 0.1148, + "step": 25216 + }, + { + "epoch": 1.12, + "learning_rate": 6.254558391888286e-07, + "loss": 0.1185, + "step": 25280 + }, + { + "epoch": 1.13, + "learning_rate": 6.245071007145186e-07, + "loss": 0.1288, + "step": 25344 + }, + { + "epoch": 1.13, + "learning_rate": 6.235583622402088e-07, + "loss": 0.1474, + "step": 25408 + }, + { + "epoch": 1.13, + "learning_rate": 6.226096237658988e-07, + "loss": 0.1075, + "step": 25472 + }, + { + "epoch": 1.14, + "learning_rate": 6.216608852915888e-07, + "loss": 0.1531, + "step": 25536 + }, + { + "epoch": 1.14, + "learning_rate": 6.207121468172789e-07, + "loss": 0.1072, + "step": 25600 + }, + { + "epoch": 1.14, + "learning_rate": 6.19763408342969e-07, + "loss": 0.0739, + "step": 25664 + }, + { + "epoch": 1.14, + "learning_rate": 6.188146698686589e-07, + "loss": 0.0674, + "step": 25728 + }, + { + "epoch": 1.15, + "learning_rate": 6.17865931394349e-07, + "loss": 0.0712, + "step": 25792 + }, + { + "epoch": 1.15, + "learning_rate": 6.169171929200391e-07, + "loss": 0.116, + "step": 25856 + }, + { + "epoch": 1.15, + "learning_rate": 6.159684544457292e-07, + "loss": 0.1242, + "step": 25920 + }, + { + "epoch": 1.16, + "learning_rate": 6.150197159714192e-07, + "loss": 0.0647, + "step": 25984 + }, + { + "epoch": 1.16, + "learning_rate": 6.140709774971093e-07, + "loss": 0.1281, + "step": 26048 + }, + { + "epoch": 1.16, + "learning_rate": 6.131222390227994e-07, + "loss": 0.1245, + "step": 26112 + }, + { + "epoch": 1.16, + "learning_rate": 6.121735005484894e-07, + "loss": 0.1712, + "step": 26176 + }, + { + "epoch": 1.17, + "learning_rate": 6.112247620741796e-07, + "loss": 0.1322, + "step": 26240 + }, + { + "epoch": 1.17, + "learning_rate": 6.102760235998695e-07, + "loss": 0.1391, + "step": 26304 + }, + { + "epoch": 1.17, + "learning_rate": 6.093272851255595e-07, + "loss": 0.0661, + "step": 26368 + }, + { + "epoch": 1.18, + "learning_rate": 6.083785466512496e-07, + "loss": 0.153, + "step": 26432 + }, + { + "epoch": 1.18, + "learning_rate": 6.074298081769397e-07, + "loss": 0.0904, + "step": 26496 + }, + { + "epoch": 1.18, + "learning_rate": 6.064810697026298e-07, + "loss": 0.1427, + "step": 26560 + }, + { + "epoch": 1.18, + "learning_rate": 6.055323312283198e-07, + "loss": 0.0734, + "step": 26624 + }, + { + "epoch": 1.19, + "learning_rate": 6.045835927540099e-07, + "loss": 0.0615, + "step": 26688 + }, + { + "epoch": 1.19, + "learning_rate": 6.036348542797e-07, + "loss": 0.1454, + "step": 26752 + }, + { + "epoch": 1.19, + "learning_rate": 6.027009398440512e-07, + "loss": 0.1196, + "step": 26816 + }, + { + "epoch": 1.2, + "learning_rate": 6.017522013697411e-07, + "loss": 0.1487, + "step": 26880 + }, + { + "epoch": 1.2, + "learning_rate": 6.008182869340923e-07, + "loss": 0.1299, + "step": 26944 + }, + { + "epoch": 1.2, + "learning_rate": 5.998695484597823e-07, + "loss": 0.111, + "step": 27008 + }, + { + "epoch": 1.2, + "learning_rate": 5.989208099854724e-07, + "loss": 0.0529, + "step": 27072 + }, + { + "epoch": 1.21, + "learning_rate": 5.979720715111625e-07, + "loss": 0.1411, + "step": 27136 + }, + { + "epoch": 1.21, + "learning_rate": 5.970233330368525e-07, + "loss": 0.1335, + "step": 27200 + }, + { + "epoch": 1.21, + "learning_rate": 5.960745945625426e-07, + "loss": 0.1229, + "step": 27264 + }, + { + "epoch": 1.22, + "learning_rate": 5.951258560882326e-07, + "loss": 0.0501, + "step": 27328 + }, + { + "epoch": 1.22, + "learning_rate": 5.941771176139228e-07, + "loss": 0.0866, + "step": 27392 + }, + { + "epoch": 1.22, + "learning_rate": 5.932283791396128e-07, + "loss": 0.0643, + "step": 27456 + }, + { + "epoch": 1.22, + "learning_rate": 5.922796406653027e-07, + "loss": 0.0793, + "step": 27520 + }, + { + "epoch": 1.23, + "learning_rate": 5.913309021909929e-07, + "loss": 0.0627, + "step": 27584 + }, + { + "epoch": 1.23, + "learning_rate": 5.903821637166829e-07, + "loss": 0.1354, + "step": 27648 + }, + { + "epoch": 1.23, + "learning_rate": 5.89433425242373e-07, + "loss": 0.1341, + "step": 27712 + }, + { + "epoch": 1.24, + "learning_rate": 5.884846867680631e-07, + "loss": 0.1205, + "step": 27776 + }, + { + "epoch": 1.24, + "learning_rate": 5.875359482937531e-07, + "loss": 0.1247, + "step": 27840 + }, + { + "epoch": 1.24, + "learning_rate": 5.865872098194432e-07, + "loss": 0.1071, + "step": 27904 + }, + { + "epoch": 1.24, + "learning_rate": 5.856384713451333e-07, + "loss": 0.125, + "step": 27968 + }, + { + "epoch": 1.25, + "learning_rate": 5.846897328708234e-07, + "loss": 0.0815, + "step": 28032 + }, + { + "epoch": 1.25, + "learning_rate": 5.837409943965133e-07, + "loss": 0.0916, + "step": 28096 + }, + { + "epoch": 1.25, + "learning_rate": 5.827922559222033e-07, + "loss": 0.1076, + "step": 28160 + }, + { + "epoch": 1.26, + "learning_rate": 5.818435174478935e-07, + "loss": 0.1217, + "step": 28224 + }, + { + "epoch": 1.26, + "learning_rate": 5.808947789735835e-07, + "loss": 0.0918, + "step": 28288 + }, + { + "epoch": 1.26, + "learning_rate": 5.799460404992736e-07, + "loss": 0.1532, + "step": 28352 + }, + { + "epoch": 1.26, + "learning_rate": 5.789973020249637e-07, + "loss": 0.0839, + "step": 28416 + }, + { + "epoch": 1.27, + "learning_rate": 5.780485635506537e-07, + "loss": 0.1425, + "step": 28480 + }, + { + "epoch": 1.27, + "learning_rate": 5.770998250763438e-07, + "loss": 0.0679, + "step": 28544 + }, + { + "epoch": 1.27, + "learning_rate": 5.761510866020339e-07, + "loss": 0.0667, + "step": 28608 + }, + { + "epoch": 1.28, + "learning_rate": 5.75202348127724e-07, + "loss": 0.1245, + "step": 28672 + }, + { + "epoch": 1.28, + "learning_rate": 5.742536096534139e-07, + "loss": 0.1255, + "step": 28736 + }, + { + "epoch": 1.28, + "learning_rate": 5.73304871179104e-07, + "loss": 0.0819, + "step": 28800 + }, + { + "epoch": 1.28, + "learning_rate": 5.723561327047941e-07, + "loss": 0.1035, + "step": 28864 + }, + { + "epoch": 1.29, + "learning_rate": 5.714073942304841e-07, + "loss": 0.1238, + "step": 28928 + }, + { + "epoch": 1.29, + "learning_rate": 5.704586557561741e-07, + "loss": 0.1312, + "step": 28992 + }, + { + "epoch": 1.29, + "learning_rate": 5.695099172818643e-07, + "loss": 0.0645, + "step": 29056 + }, + { + "epoch": 1.3, + "learning_rate": 5.685611788075543e-07, + "loss": 0.1143, + "step": 29120 + }, + { + "epoch": 1.3, + "learning_rate": 5.676124403332444e-07, + "loss": 0.0848, + "step": 29184 + }, + { + "epoch": 1.3, + "learning_rate": 5.666637018589344e-07, + "loss": 0.1051, + "step": 29248 + }, + { + "epoch": 1.3, + "learning_rate": 5.657149633846244e-07, + "loss": 0.0981, + "step": 29312 + }, + { + "epoch": 1.31, + "learning_rate": 5.647662249103145e-07, + "loss": 0.0894, + "step": 29376 + }, + { + "epoch": 1.31, + "learning_rate": 5.638174864360046e-07, + "loss": 0.1164, + "step": 29440 + }, + { + "epoch": 1.31, + "learning_rate": 5.628687479616947e-07, + "loss": 0.0816, + "step": 29504 + }, + { + "epoch": 1.31, + "learning_rate": 5.619200094873847e-07, + "loss": 0.1426, + "step": 29568 + }, + { + "epoch": 1.32, + "learning_rate": 5.609712710130748e-07, + "loss": 0.1748, + "step": 29632 + }, + { + "epoch": 1.32, + "learning_rate": 5.600225325387649e-07, + "loss": 0.0979, + "step": 29696 + }, + { + "epoch": 1.32, + "learning_rate": 5.590737940644549e-07, + "loss": 0.1499, + "step": 29760 + }, + { + "epoch": 1.33, + "learning_rate": 5.58125055590145e-07, + "loss": 0.0845, + "step": 29824 + }, + { + "epoch": 1.33, + "learning_rate": 5.57176317115835e-07, + "loss": 0.1298, + "step": 29888 + }, + { + "epoch": 1.33, + "learning_rate": 5.56227578641525e-07, + "loss": 0.1241, + "step": 29952 + }, + { + "epoch": 1.33, + "learning_rate": 5.552788401672151e-07, + "loss": 0.1174, + "step": 30016 + }, + { + "epoch": 1.34, + "learning_rate": 5.543301016929052e-07, + "loss": 0.1209, + "step": 30080 + }, + { + "epoch": 1.34, + "learning_rate": 5.533813632185953e-07, + "loss": 0.1071, + "step": 30144 + }, + { + "epoch": 1.34, + "learning_rate": 5.524326247442853e-07, + "loss": 0.0615, + "step": 30208 + }, + { + "epoch": 1.35, + "learning_rate": 5.514838862699754e-07, + "loss": 0.0797, + "step": 30272 + }, + { + "epoch": 1.35, + "learning_rate": 5.505351477956655e-07, + "loss": 0.1182, + "step": 30336 + }, + { + "epoch": 1.35, + "learning_rate": 5.495864093213554e-07, + "loss": 0.1027, + "step": 30400 + }, + { + "epoch": 1.35, + "learning_rate": 5.486376708470456e-07, + "loss": 0.0997, + "step": 30464 + }, + { + "epoch": 1.36, + "learning_rate": 5.476889323727356e-07, + "loss": 0.1624, + "step": 30528 + }, + { + "epoch": 1.36, + "learning_rate": 5.467401938984256e-07, + "loss": 0.0811, + "step": 30592 + }, + { + "epoch": 1.36, + "learning_rate": 5.457914554241158e-07, + "loss": 0.0667, + "step": 30656 + }, + { + "epoch": 1.37, + "learning_rate": 5.448427169498058e-07, + "loss": 0.1017, + "step": 30720 + }, + { + "epoch": 1.37, + "learning_rate": 5.438939784754958e-07, + "loss": 0.0942, + "step": 30784 + }, + { + "epoch": 1.37, + "learning_rate": 5.429452400011859e-07, + "loss": 0.1741, + "step": 30848 + }, + { + "epoch": 1.37, + "learning_rate": 5.41996501526876e-07, + "loss": 0.1527, + "step": 30912 + }, + { + "epoch": 1.38, + "learning_rate": 5.41047763052566e-07, + "loss": 0.0979, + "step": 30976 + }, + { + "epoch": 1.38, + "learning_rate": 5.40099024578256e-07, + "loss": 0.0569, + "step": 31040 + }, + { + "epoch": 1.38, + "learning_rate": 5.391502861039461e-07, + "loss": 0.1476, + "step": 31104 + }, + { + "epoch": 1.39, + "learning_rate": 5.382015476296362e-07, + "loss": 0.097, + "step": 31168 + }, + { + "epoch": 1.39, + "learning_rate": 5.372676331939874e-07, + "loss": 0.0647, + "step": 31232 + }, + { + "epoch": 1.39, + "learning_rate": 5.363188947196774e-07, + "loss": 0.103, + "step": 31296 + }, + { + "epoch": 1.39, + "learning_rate": 5.353701562453674e-07, + "loss": 0.0903, + "step": 31360 + }, + { + "epoch": 1.4, + "learning_rate": 5.344214177710576e-07, + "loss": 0.1414, + "step": 31424 + }, + { + "epoch": 1.4, + "learning_rate": 5.334726792967476e-07, + "loss": 0.0945, + "step": 31488 + }, + { + "epoch": 1.4, + "learning_rate": 5.325239408224377e-07, + "loss": 0.1426, + "step": 31552 + }, + { + "epoch": 1.41, + "learning_rate": 5.315752023481276e-07, + "loss": 0.1063, + "step": 31616 + }, + { + "epoch": 1.41, + "learning_rate": 5.306264638738177e-07, + "loss": 0.0921, + "step": 31680 + }, + { + "epoch": 1.41, + "learning_rate": 5.296777253995078e-07, + "loss": 0.0904, + "step": 31744 + }, + { + "epoch": 1.41, + "learning_rate": 5.287289869251978e-07, + "loss": 0.1278, + "step": 31808 + }, + { + "epoch": 1.42, + "learning_rate": 5.27780248450888e-07, + "loss": 0.1034, + "step": 31872 + }, + { + "epoch": 1.42, + "learning_rate": 5.26831509976578e-07, + "loss": 0.0908, + "step": 31936 + }, + { + "epoch": 1.42, + "learning_rate": 5.25882771502268e-07, + "loss": 0.1339, + "step": 32000 + }, + { + "epoch": 1.43, + "learning_rate": 5.249340330279582e-07, + "loss": 0.0777, + "step": 32064 + }, + { + "epoch": 1.43, + "learning_rate": 5.239852945536482e-07, + "loss": 0.1353, + "step": 32128 + }, + { + "epoch": 1.43, + "learning_rate": 5.230365560793382e-07, + "loss": 0.1369, + "step": 32192 + }, + { + "epoch": 1.43, + "learning_rate": 5.220878176050283e-07, + "loss": 0.087, + "step": 32256 + }, + { + "epoch": 1.44, + "learning_rate": 5.211390791307183e-07, + "loss": 0.129, + "step": 32320 + }, + { + "epoch": 1.44, + "learning_rate": 5.201903406564084e-07, + "loss": 0.1012, + "step": 32384 + }, + { + "epoch": 1.44, + "learning_rate": 5.192416021820984e-07, + "loss": 0.1114, + "step": 32448 + }, + { + "epoch": 1.45, + "learning_rate": 5.182928637077886e-07, + "loss": 0.0917, + "step": 32512 + }, + { + "epoch": 1.45, + "learning_rate": 5.173441252334786e-07, + "loss": 0.0852, + "step": 32576 + }, + { + "epoch": 1.45, + "learning_rate": 5.163953867591686e-07, + "loss": 0.0937, + "step": 32640 + }, + { + "epoch": 1.45, + "learning_rate": 5.154466482848588e-07, + "loss": 0.1014, + "step": 32704 + }, + { + "epoch": 1.46, + "learning_rate": 5.144979098105487e-07, + "loss": 0.0621, + "step": 32768 + }, + { + "epoch": 1.46, + "learning_rate": 5.135491713362388e-07, + "loss": 0.0927, + "step": 32832 + }, + { + "epoch": 1.46, + "learning_rate": 5.126004328619289e-07, + "loss": 0.1182, + "step": 32896 + }, + { + "epoch": 1.47, + "learning_rate": 5.116516943876189e-07, + "loss": 0.1206, + "step": 32960 + }, + { + "epoch": 1.47, + "learning_rate": 5.10702955913309e-07, + "loss": 0.1605, + "step": 33024 + }, + { + "epoch": 1.47, + "learning_rate": 5.097542174389991e-07, + "loss": 0.1525, + "step": 33088 + }, + { + "epoch": 1.47, + "learning_rate": 5.088054789646892e-07, + "loss": 0.0679, + "step": 33152 + }, + { + "epoch": 1.48, + "learning_rate": 5.078567404903792e-07, + "loss": 0.1432, + "step": 33216 + }, + { + "epoch": 1.48, + "learning_rate": 5.069080020160692e-07, + "loss": 0.1117, + "step": 33280 + }, + { + "epoch": 1.48, + "learning_rate": 5.059592635417593e-07, + "loss": 0.0866, + "step": 33344 + }, + { + "epoch": 1.49, + "learning_rate": 5.050105250674493e-07, + "loss": 0.0727, + "step": 33408 + }, + { + "epoch": 1.49, + "learning_rate": 5.040617865931393e-07, + "loss": 0.0671, + "step": 33472 + }, + { + "epoch": 1.49, + "learning_rate": 5.031130481188295e-07, + "loss": 0.1083, + "step": 33536 + }, + { + "epoch": 1.49, + "learning_rate": 5.021791336831806e-07, + "loss": 0.1442, + "step": 33600 + }, + { + "epoch": 1.5, + "learning_rate": 5.012303952088707e-07, + "loss": 0.1382, + "step": 33664 + }, + { + "epoch": 1.5, + "learning_rate": 5.002816567345607e-07, + "loss": 0.1495, + "step": 33728 + }, + { + "epoch": 1.5, + "learning_rate": 4.993329182602508e-07, + "loss": 0.1246, + "step": 33792 + }, + { + "epoch": 1.51, + "learning_rate": 4.983841797859409e-07, + "loss": 0.0701, + "step": 33856 + }, + { + "epoch": 1.51, + "learning_rate": 4.974354413116309e-07, + "loss": 0.1176, + "step": 33920 + }, + { + "epoch": 1.51, + "learning_rate": 4.964867028373209e-07, + "loss": 0.1294, + "step": 33984 + }, + { + "epoch": 1.51, + "learning_rate": 4.95537964363011e-07, + "loss": 0.1144, + "step": 34048 + }, + { + "epoch": 1.52, + "learning_rate": 4.945892258887011e-07, + "loss": 0.1356, + "step": 34112 + }, + { + "epoch": 1.52, + "learning_rate": 4.936404874143911e-07, + "loss": 0.0847, + "step": 34176 + }, + { + "epoch": 1.52, + "learning_rate": 4.926917489400812e-07, + "loss": 0.1499, + "step": 34240 + }, + { + "epoch": 1.53, + "learning_rate": 4.917430104657713e-07, + "loss": 0.1345, + "step": 34304 + }, + { + "epoch": 1.53, + "learning_rate": 4.907942719914614e-07, + "loss": 0.0594, + "step": 34368 + }, + { + "epoch": 1.53, + "learning_rate": 4.898455335171514e-07, + "loss": 0.1239, + "step": 34432 + }, + { + "epoch": 1.53, + "learning_rate": 4.888967950428415e-07, + "loss": 0.0936, + "step": 34496 + }, + { + "epoch": 1.54, + "learning_rate": 4.879480565685315e-07, + "loss": 0.0963, + "step": 34560 + }, + { + "epoch": 1.54, + "learning_rate": 4.869993180942215e-07, + "loss": 0.1015, + "step": 34624 + }, + { + "epoch": 1.54, + "learning_rate": 4.860505796199116e-07, + "loss": 0.0758, + "step": 34688 + }, + { + "epoch": 1.55, + "learning_rate": 4.851018411456017e-07, + "loss": 0.0954, + "step": 34752 + }, + { + "epoch": 1.55, + "learning_rate": 4.841531026712917e-07, + "loss": 0.1693, + "step": 34816 + }, + { + "epoch": 1.55, + "learning_rate": 4.832043641969818e-07, + "loss": 0.1428, + "step": 34880 + }, + { + "epoch": 1.55, + "learning_rate": 4.822556257226719e-07, + "loss": 0.1161, + "step": 34944 + }, + { + "epoch": 1.56, + "learning_rate": 4.81306887248362e-07, + "loss": 0.0599, + "step": 35008 + }, + { + "epoch": 1.56, + "learning_rate": 4.803581487740521e-07, + "loss": 0.1093, + "step": 35072 + }, + { + "epoch": 1.56, + "learning_rate": 4.79409410299742e-07, + "loss": 0.0763, + "step": 35136 + }, + { + "epoch": 1.57, + "learning_rate": 4.784606718254321e-07, + "loss": 0.119, + "step": 35200 + }, + { + "epoch": 1.57, + "learning_rate": 4.775119333511221e-07, + "loss": 0.1467, + "step": 35264 + }, + { + "epoch": 1.57, + "learning_rate": 4.765631948768122e-07, + "loss": 0.1311, + "step": 35328 + }, + { + "epoch": 1.57, + "learning_rate": 4.7561445640250225e-07, + "loss": 0.1628, + "step": 35392 + }, + { + "epoch": 1.58, + "learning_rate": 4.7466571792819234e-07, + "loss": 0.0847, + "step": 35456 + }, + { + "epoch": 1.58, + "learning_rate": 4.737169794538824e-07, + "loss": 0.0888, + "step": 35520 + }, + { + "epoch": 1.58, + "learning_rate": 4.7276824097957247e-07, + "loss": 0.1145, + "step": 35584 + }, + { + "epoch": 1.59, + "learning_rate": 4.7181950250526256e-07, + "loss": 0.1296, + "step": 35648 + }, + { + "epoch": 1.59, + "learning_rate": 4.7087076403095254e-07, + "loss": 0.0984, + "step": 35712 + }, + { + "epoch": 1.59, + "learning_rate": 4.6992202555664263e-07, + "loss": 0.1509, + "step": 35776 + }, + { + "epoch": 1.59, + "learning_rate": 4.6897328708233267e-07, + "loss": 0.1288, + "step": 35840 + }, + { + "epoch": 1.6, + "learning_rate": 4.6802454860802276e-07, + "loss": 0.1383, + "step": 35904 + }, + { + "epoch": 1.6, + "learning_rate": 4.6707581013371285e-07, + "loss": 0.1004, + "step": 35968 + }, + { + "epoch": 1.6, + "learning_rate": 4.6614189569806395e-07, + "loss": 0.1098, + "step": 36032 + }, + { + "epoch": 1.61, + "learning_rate": 4.6519315722375404e-07, + "loss": 0.1092, + "step": 36096 + }, + { + "epoch": 1.61, + "learning_rate": 4.642444187494441e-07, + "loss": 0.1028, + "step": 36160 + }, + { + "epoch": 1.61, + "learning_rate": 4.6329568027513417e-07, + "loss": 0.1196, + "step": 36224 + }, + { + "epoch": 1.61, + "learning_rate": 4.6234694180082415e-07, + "loss": 0.1209, + "step": 36288 + }, + { + "epoch": 1.62, + "learning_rate": 4.6139820332651424e-07, + "loss": 0.0756, + "step": 36352 + }, + { + "epoch": 1.62, + "learning_rate": 4.6044946485220433e-07, + "loss": 0.0949, + "step": 36416 + }, + { + "epoch": 1.62, + "learning_rate": 4.5950072637789437e-07, + "loss": 0.0852, + "step": 36480 + }, + { + "epoch": 1.63, + "learning_rate": 4.585668119422455e-07, + "loss": 0.1649, + "step": 36544 + }, + { + "epoch": 1.63, + "learning_rate": 4.5761807346793556e-07, + "loss": 0.1239, + "step": 36608 + }, + { + "epoch": 1.63, + "learning_rate": 4.5666933499362565e-07, + "loss": 0.0986, + "step": 36672 + }, + { + "epoch": 1.63, + "learning_rate": 4.5572059651931574e-07, + "loss": 0.1478, + "step": 36736 + }, + { + "epoch": 1.64, + "learning_rate": 4.547718580450058e-07, + "loss": 0.1032, + "step": 36800 + }, + { + "epoch": 1.64, + "learning_rate": 4.538231195706958e-07, + "loss": 0.1364, + "step": 36864 + }, + { + "epoch": 1.64, + "learning_rate": 4.5287438109638585e-07, + "loss": 0.0667, + "step": 36928 + }, + { + "epoch": 1.65, + "learning_rate": 4.5192564262207594e-07, + "loss": 0.0836, + "step": 36992 + }, + { + "epoch": 1.65, + "learning_rate": 4.5097690414776603e-07, + "loss": 0.1446, + "step": 37056 + }, + { + "epoch": 1.65, + "learning_rate": 4.50028165673456e-07, + "loss": 0.0684, + "step": 37120 + }, + { + "epoch": 1.65, + "learning_rate": 4.490794271991461e-07, + "loss": 0.1048, + "step": 37184 + }, + { + "epoch": 1.66, + "learning_rate": 4.4813068872483615e-07, + "loss": 0.0935, + "step": 37248 + }, + { + "epoch": 1.66, + "learning_rate": 4.4718195025052624e-07, + "loss": 0.1176, + "step": 37312 + }, + { + "epoch": 1.66, + "learning_rate": 4.4623321177621633e-07, + "loss": 0.095, + "step": 37376 + }, + { + "epoch": 1.67, + "learning_rate": 4.452844733019063e-07, + "loss": 0.139, + "step": 37440 + }, + { + "epoch": 1.67, + "learning_rate": 4.443357348275964e-07, + "loss": 0.0939, + "step": 37504 + }, + { + "epoch": 1.67, + "learning_rate": 4.433869963532865e-07, + "loss": 0.0793, + "step": 37568 + }, + { + "epoch": 1.67, + "learning_rate": 4.4243825787897653e-07, + "loss": 0.1213, + "step": 37632 + }, + { + "epoch": 1.68, + "learning_rate": 4.414895194046666e-07, + "loss": 0.1258, + "step": 37696 + }, + { + "epoch": 1.68, + "learning_rate": 4.405407809303566e-07, + "loss": 0.0902, + "step": 37760 + }, + { + "epoch": 1.68, + "learning_rate": 4.395920424560467e-07, + "loss": 0.1153, + "step": 37824 + }, + { + "epoch": 1.68, + "learning_rate": 4.386433039817368e-07, + "loss": 0.1569, + "step": 37888 + }, + { + "epoch": 1.69, + "learning_rate": 4.376945655074268e-07, + "loss": 0.0657, + "step": 37952 + }, + { + "epoch": 1.69, + "learning_rate": 4.367458270331169e-07, + "loss": 0.1206, + "step": 38016 + }, + { + "epoch": 1.69, + "learning_rate": 4.357970885588069e-07, + "loss": 0.1313, + "step": 38080 + }, + { + "epoch": 1.7, + "learning_rate": 4.34848350084497e-07, + "loss": 0.1358, + "step": 38144 + }, + { + "epoch": 1.7, + "learning_rate": 4.338996116101871e-07, + "loss": 0.0711, + "step": 38208 + }, + { + "epoch": 1.7, + "learning_rate": 4.329508731358771e-07, + "loss": 0.1359, + "step": 38272 + }, + { + "epoch": 1.7, + "learning_rate": 4.3200213466156716e-07, + "loss": 0.087, + "step": 38336 + }, + { + "epoch": 1.71, + "learning_rate": 4.3105339618725725e-07, + "loss": 0.0831, + "step": 38400 + }, + { + "epoch": 1.71, + "learning_rate": 4.301046577129473e-07, + "loss": 0.065, + "step": 38464 + }, + { + "epoch": 1.71, + "learning_rate": 4.291559192386374e-07, + "loss": 0.1132, + "step": 38528 + }, + { + "epoch": 1.72, + "learning_rate": 4.282071807643274e-07, + "loss": 0.1098, + "step": 38592 + }, + { + "epoch": 1.72, + "learning_rate": 4.2725844229001745e-07, + "loss": 0.0918, + "step": 38656 + }, + { + "epoch": 1.72, + "learning_rate": 4.2630970381570754e-07, + "loss": 0.1517, + "step": 38720 + }, + { + "epoch": 1.72, + "learning_rate": 4.253609653413976e-07, + "loss": 0.0869, + "step": 38784 + }, + { + "epoch": 1.73, + "learning_rate": 4.2441222686708767e-07, + "loss": 0.1281, + "step": 38848 + }, + { + "epoch": 1.73, + "learning_rate": 4.234634883927777e-07, + "loss": 0.0536, + "step": 38912 + }, + { + "epoch": 1.73, + "learning_rate": 4.2251474991846775e-07, + "loss": 0.1776, + "step": 38976 + }, + { + "epoch": 1.74, + "learning_rate": 4.2156601144415784e-07, + "loss": 0.15, + "step": 39040 + }, + { + "epoch": 1.74, + "learning_rate": 4.206172729698479e-07, + "loss": 0.0522, + "step": 39104 + }, + { + "epoch": 1.74, + "learning_rate": 4.1966853449553797e-07, + "loss": 0.0863, + "step": 39168 + }, + { + "epoch": 1.74, + "learning_rate": 4.18719796021228e-07, + "loss": 0.0848, + "step": 39232 + }, + { + "epoch": 1.75, + "learning_rate": 4.1777105754691804e-07, + "loss": 0.1043, + "step": 39296 + }, + { + "epoch": 1.75, + "learning_rate": 4.1682231907260813e-07, + "loss": 0.0975, + "step": 39360 + }, + { + "epoch": 1.75, + "learning_rate": 4.1587358059829817e-07, + "loss": 0.155, + "step": 39424 + }, + { + "epoch": 1.76, + "learning_rate": 4.1492484212398826e-07, + "loss": 0.0518, + "step": 39488 + }, + { + "epoch": 1.76, + "learning_rate": 4.139761036496783e-07, + "loss": 0.1139, + "step": 39552 + }, + { + "epoch": 1.76, + "learning_rate": 4.1304218921402945e-07, + "loss": 0.1094, + "step": 39616 + }, + { + "epoch": 1.76, + "learning_rate": 4.1209345073971954e-07, + "loss": 0.0979, + "step": 39680 + }, + { + "epoch": 1.77, + "learning_rate": 4.111447122654096e-07, + "loss": 0.0968, + "step": 39744 + }, + { + "epoch": 1.77, + "learning_rate": 4.101959737910996e-07, + "loss": 0.0828, + "step": 39808 + }, + { + "epoch": 1.77, + "learning_rate": 4.0924723531678965e-07, + "loss": 0.1267, + "step": 39872 + }, + { + "epoch": 1.78, + "learning_rate": 4.0829849684247974e-07, + "loss": 0.1223, + "step": 39936 + }, + { + "epoch": 1.78, + "learning_rate": 4.0734975836816983e-07, + "loss": 0.0536, + "step": 40000 + }, + { + "epoch": 1.78, + "learning_rate": 4.0640101989385987e-07, + "loss": 0.0832, + "step": 40064 + }, + { + "epoch": 1.78, + "learning_rate": 4.054522814195499e-07, + "loss": 0.1293, + "step": 40128 + }, + { + "epoch": 1.79, + "learning_rate": 4.0450354294524e-07, + "loss": 0.0894, + "step": 40192 + }, + { + "epoch": 1.79, + "learning_rate": 4.0355480447093004e-07, + "loss": 0.0937, + "step": 40256 + }, + { + "epoch": 1.79, + "learning_rate": 4.0260606599662013e-07, + "loss": 0.0785, + "step": 40320 + }, + { + "epoch": 1.8, + "learning_rate": 4.0165732752231016e-07, + "loss": 0.1403, + "step": 40384 + }, + { + "epoch": 1.8, + "learning_rate": 4.007085890480002e-07, + "loss": 0.0647, + "step": 40448 + }, + { + "epoch": 1.8, + "learning_rate": 3.997598505736903e-07, + "loss": 0.108, + "step": 40512 + }, + { + "epoch": 1.8, + "learning_rate": 3.9881111209938033e-07, + "loss": 0.1352, + "step": 40576 + }, + { + "epoch": 1.81, + "learning_rate": 3.978623736250704e-07, + "loss": 0.1345, + "step": 40640 + }, + { + "epoch": 1.81, + "learning_rate": 3.969136351507604e-07, + "loss": 0.0638, + "step": 40704 + }, + { + "epoch": 1.81, + "learning_rate": 3.959648966764505e-07, + "loss": 0.1575, + "step": 40768 + }, + { + "epoch": 1.82, + "learning_rate": 3.950161582021406e-07, + "loss": 0.0485, + "step": 40832 + }, + { + "epoch": 1.82, + "learning_rate": 3.940674197278306e-07, + "loss": 0.1627, + "step": 40896 + }, + { + "epoch": 1.82, + "learning_rate": 3.931186812535207e-07, + "loss": 0.096, + "step": 40960 + }, + { + "epoch": 1.82, + "learning_rate": 3.9216994277921075e-07, + "loss": 0.1305, + "step": 41024 + }, + { + "epoch": 1.83, + "learning_rate": 3.912212043049008e-07, + "loss": 0.0839, + "step": 41088 + }, + { + "epoch": 1.83, + "learning_rate": 3.902724658305909e-07, + "loss": 0.1202, + "step": 41152 + }, + { + "epoch": 1.83, + "learning_rate": 3.893237273562809e-07, + "loss": 0.1068, + "step": 41216 + }, + { + "epoch": 1.84, + "learning_rate": 3.88374988881971e-07, + "loss": 0.1165, + "step": 41280 + }, + { + "epoch": 1.84, + "learning_rate": 3.8742625040766105e-07, + "loss": 0.0656, + "step": 41344 + }, + { + "epoch": 1.84, + "learning_rate": 3.864775119333511e-07, + "loss": 0.0999, + "step": 41408 + }, + { + "epoch": 1.84, + "learning_rate": 3.855287734590412e-07, + "loss": 0.1246, + "step": 41472 + }, + { + "epoch": 1.85, + "learning_rate": 3.845800349847312e-07, + "loss": 0.1357, + "step": 41536 + }, + { + "epoch": 1.85, + "learning_rate": 3.836312965104213e-07, + "loss": 0.1208, + "step": 41600 + }, + { + "epoch": 1.85, + "learning_rate": 3.8268255803611134e-07, + "loss": 0.1149, + "step": 41664 + }, + { + "epoch": 1.86, + "learning_rate": 3.817338195618014e-07, + "loss": 0.0814, + "step": 41728 + }, + { + "epoch": 1.86, + "learning_rate": 3.8078508108749147e-07, + "loss": 0.1499, + "step": 41792 + }, + { + "epoch": 1.86, + "learning_rate": 3.7983634261318156e-07, + "loss": 0.088, + "step": 41856 + }, + { + "epoch": 1.86, + "learning_rate": 3.7888760413887155e-07, + "loss": 0.0644, + "step": 41920 + }, + { + "epoch": 1.87, + "learning_rate": 3.7793886566456164e-07, + "loss": 0.0924, + "step": 41984 + }, + { + "epoch": 1.87, + "learning_rate": 3.769901271902517e-07, + "loss": 0.1307, + "step": 42048 + }, + { + "epoch": 1.87, + "learning_rate": 3.7604138871594176e-07, + "loss": 0.0862, + "step": 42112 + }, + { + "epoch": 1.88, + "learning_rate": 3.7509265024163185e-07, + "loss": 0.1854, + "step": 42176 + }, + { + "epoch": 1.88, + "learning_rate": 3.7414391176732184e-07, + "loss": 0.0671, + "step": 42240 + }, + { + "epoch": 1.88, + "learning_rate": 3.7319517329301193e-07, + "loss": 0.0915, + "step": 42304 + }, + { + "epoch": 1.88, + "learning_rate": 3.7224643481870197e-07, + "loss": 0.0638, + "step": 42368 + }, + { + "epoch": 1.89, + "learning_rate": 3.7129769634439206e-07, + "loss": 0.0935, + "step": 42432 + }, + { + "epoch": 1.89, + "learning_rate": 3.7034895787008215e-07, + "loss": 0.1093, + "step": 42496 + }, + { + "epoch": 1.89, + "learning_rate": 3.6940021939577213e-07, + "loss": 0.0964, + "step": 42560 + }, + { + "epoch": 1.9, + "learning_rate": 3.684514809214622e-07, + "loss": 0.1378, + "step": 42624 + }, + { + "epoch": 1.9, + "learning_rate": 3.675027424471523e-07, + "loss": 0.157, + "step": 42688 + }, + { + "epoch": 1.9, + "learning_rate": 3.6655400397284235e-07, + "loss": 0.1068, + "step": 42752 + }, + { + "epoch": 1.9, + "learning_rate": 3.656052654985324e-07, + "loss": 0.1646, + "step": 42816 + }, + { + "epoch": 1.91, + "learning_rate": 3.6465652702422243e-07, + "loss": 0.0418, + "step": 42880 + }, + { + "epoch": 1.91, + "learning_rate": 3.637077885499125e-07, + "loss": 0.1197, + "step": 42944 + }, + { + "epoch": 1.91, + "learning_rate": 3.627590500756026e-07, + "loss": 0.1105, + "step": 43008 + }, + { + "epoch": 1.92, + "learning_rate": 3.6181031160129265e-07, + "loss": 0.0999, + "step": 43072 + }, + { + "epoch": 1.92, + "learning_rate": 3.608615731269827e-07, + "loss": 0.1232, + "step": 43136 + }, + { + "epoch": 1.92, + "learning_rate": 3.599128346526727e-07, + "loss": 0.0982, + "step": 43200 + }, + { + "epoch": 1.92, + "learning_rate": 3.589640961783628e-07, + "loss": 0.0958, + "step": 43264 + }, + { + "epoch": 1.93, + "learning_rate": 3.580153577040529e-07, + "loss": 0.1199, + "step": 43328 + }, + { + "epoch": 1.93, + "learning_rate": 3.5706661922974294e-07, + "loss": 0.1282, + "step": 43392 + }, + { + "epoch": 1.93, + "learning_rate": 3.56117880755433e-07, + "loss": 0.1198, + "step": 43456 + }, + { + "epoch": 1.94, + "learning_rate": 3.5516914228112307e-07, + "loss": 0.078, + "step": 43520 + }, + { + "epoch": 1.94, + "learning_rate": 3.542204038068131e-07, + "loss": 0.1085, + "step": 43584 + }, + { + "epoch": 1.94, + "learning_rate": 3.532716653325032e-07, + "loss": 0.0971, + "step": 43648 + }, + { + "epoch": 1.94, + "learning_rate": 3.523229268581932e-07, + "loss": 0.1054, + "step": 43712 + }, + { + "epoch": 1.95, + "learning_rate": 3.5137418838388327e-07, + "loss": 0.0871, + "step": 43776 + }, + { + "epoch": 1.95, + "learning_rate": 3.5042544990957336e-07, + "loss": 0.0859, + "step": 43840 + }, + { + "epoch": 1.95, + "learning_rate": 3.494767114352634e-07, + "loss": 0.1574, + "step": 43904 + }, + { + "epoch": 1.96, + "learning_rate": 3.485279729609535e-07, + "loss": 0.151, + "step": 43968 + }, + { + "epoch": 1.96, + "learning_rate": 3.475792344866435e-07, + "loss": 0.0996, + "step": 44032 + }, + { + "epoch": 1.96, + "learning_rate": 3.4663049601233357e-07, + "loss": 0.1001, + "step": 44096 + }, + { + "epoch": 1.96, + "learning_rate": 3.4568175753802366e-07, + "loss": 0.0738, + "step": 44160 + }, + { + "epoch": 1.97, + "learning_rate": 3.447330190637137e-07, + "loss": 0.0682, + "step": 44224 + }, + { + "epoch": 1.97, + "learning_rate": 3.437842805894038e-07, + "loss": 0.0821, + "step": 44288 + }, + { + "epoch": 1.97, + "learning_rate": 3.428355421150938e-07, + "loss": 0.0613, + "step": 44352 + }, + { + "epoch": 1.98, + "learning_rate": 3.4188680364078386e-07, + "loss": 0.137, + "step": 44416 + }, + { + "epoch": 1.98, + "learning_rate": 3.4093806516647395e-07, + "loss": 0.0937, + "step": 44480 + }, + { + "epoch": 1.98, + "learning_rate": 3.39989326692164e-07, + "loss": 0.1133, + "step": 44544 + }, + { + "epoch": 1.98, + "learning_rate": 3.390405882178541e-07, + "loss": 0.1079, + "step": 44608 + }, + { + "epoch": 1.99, + "learning_rate": 3.380918497435441e-07, + "loss": 0.089, + "step": 44672 + }, + { + "epoch": 1.99, + "learning_rate": 3.3714311126923416e-07, + "loss": 0.1321, + "step": 44736 + }, + { + "epoch": 1.99, + "learning_rate": 3.3619437279492425e-07, + "loss": 0.1646, + "step": 44800 + }, + { + "epoch": 2.0, + "learning_rate": 3.3524563432061434e-07, + "loss": 0.1659, + "step": 44864 + }, + { + "epoch": 2.0, + "learning_rate": 3.342968958463043e-07, + "loss": 0.084, + "step": 44928 + }, + { + "epoch": 2.0, + "learning_rate": 3.333481573719944e-07, + "loss": 0.1143, + "step": 44992 + }, + { + "epoch": 2.0, + "learning_rate": 3.3239941889768445e-07, + "loss": 0.0553, + "step": 45056 + }, + { + "epoch": 2.01, + "learning_rate": 3.3145068042337454e-07, + "loss": 0.073, + "step": 45120 + }, + { + "epoch": 2.01, + "learning_rate": 3.3050194194906463e-07, + "loss": 0.0648, + "step": 45184 + }, + { + "epoch": 2.01, + "learning_rate": 3.295532034747546e-07, + "loss": 0.0901, + "step": 45248 + }, + { + "epoch": 2.02, + "learning_rate": 3.286044650004447e-07, + "loss": 0.0771, + "step": 45312 + }, + { + "epoch": 2.02, + "learning_rate": 3.2765572652613474e-07, + "loss": 0.072, + "step": 45376 + }, + { + "epoch": 2.02, + "learning_rate": 3.2670698805182484e-07, + "loss": 0.0564, + "step": 45440 + }, + { + "epoch": 2.02, + "learning_rate": 3.2577307361617593e-07, + "loss": 0.0466, + "step": 45504 + }, + { + "epoch": 2.03, + "learning_rate": 3.24824335141866e-07, + "loss": 0.06, + "step": 45568 + }, + { + "epoch": 2.03, + "learning_rate": 3.238755966675561e-07, + "loss": 0.0333, + "step": 45632 + }, + { + "epoch": 2.03, + "learning_rate": 3.2292685819324615e-07, + "loss": 0.053, + "step": 45696 + }, + { + "epoch": 2.04, + "learning_rate": 3.2197811971893624e-07, + "loss": 0.0864, + "step": 45760 + }, + { + "epoch": 2.04, + "learning_rate": 3.2102938124462623e-07, + "loss": 0.0339, + "step": 45824 + }, + { + "epoch": 2.04, + "learning_rate": 3.200806427703163e-07, + "loss": 0.0486, + "step": 45888 + }, + { + "epoch": 2.04, + "learning_rate": 3.1914672833466747e-07, + "loss": 0.1192, + "step": 45952 + }, + { + "epoch": 2.05, + "learning_rate": 3.1819798986035756e-07, + "loss": 0.0666, + "step": 46016 + }, + { + "epoch": 2.05, + "learning_rate": 3.172492513860476e-07, + "loss": 0.0451, + "step": 46080 + }, + { + "epoch": 2.05, + "learning_rate": 3.1630051291173763e-07, + "loss": 0.0871, + "step": 46144 + }, + { + "epoch": 2.05, + "learning_rate": 3.153517744374277e-07, + "loss": 0.065, + "step": 46208 + }, + { + "epoch": 2.06, + "learning_rate": 3.144030359631178e-07, + "loss": 0.0784, + "step": 46272 + }, + { + "epoch": 2.06, + "learning_rate": 3.1345429748880785e-07, + "loss": 0.1044, + "step": 46336 + }, + { + "epoch": 2.06, + "learning_rate": 3.125055590144979e-07, + "loss": 0.0529, + "step": 46400 + }, + { + "epoch": 2.07, + "learning_rate": 3.1155682054018793e-07, + "loss": 0.073, + "step": 46464 + }, + { + "epoch": 2.07, + "learning_rate": 3.10608082065878e-07, + "loss": 0.0843, + "step": 46528 + }, + { + "epoch": 2.07, + "learning_rate": 3.096593435915681e-07, + "loss": 0.0552, + "step": 46592 + }, + { + "epoch": 2.07, + "learning_rate": 3.087106051172581e-07, + "loss": 0.0743, + "step": 46656 + }, + { + "epoch": 2.08, + "learning_rate": 3.077618666429482e-07, + "loss": 0.0612, + "step": 46720 + }, + { + "epoch": 2.08, + "learning_rate": 3.068131281686382e-07, + "loss": 0.0693, + "step": 46784 + }, + { + "epoch": 2.08, + "learning_rate": 3.058643896943283e-07, + "loss": 0.0755, + "step": 46848 + }, + { + "epoch": 2.09, + "learning_rate": 3.049156512200184e-07, + "loss": 0.085, + "step": 46912 + }, + { + "epoch": 2.09, + "learning_rate": 3.039669127457084e-07, + "loss": 0.0316, + "step": 46976 + }, + { + "epoch": 2.09, + "learning_rate": 3.030181742713985e-07, + "loss": 0.0542, + "step": 47040 + }, + { + "epoch": 2.09, + "learning_rate": 3.0206943579708857e-07, + "loss": 0.0582, + "step": 47104 + }, + { + "epoch": 2.1, + "learning_rate": 3.011206973227786e-07, + "loss": 0.0471, + "step": 47168 + }, + { + "epoch": 2.1, + "learning_rate": 3.001719588484687e-07, + "loss": 0.0288, + "step": 47232 + }, + { + "epoch": 2.1, + "learning_rate": 2.992232203741587e-07, + "loss": 0.0996, + "step": 47296 + }, + { + "epoch": 2.11, + "learning_rate": 2.9827448189984877e-07, + "loss": 0.0591, + "step": 47360 + }, + { + "epoch": 2.11, + "learning_rate": 2.9732574342553886e-07, + "loss": 0.0438, + "step": 47424 + }, + { + "epoch": 2.11, + "learning_rate": 2.963770049512289e-07, + "loss": 0.0632, + "step": 47488 + }, + { + "epoch": 2.11, + "learning_rate": 2.95428266476919e-07, + "loss": 0.0576, + "step": 47552 + }, + { + "epoch": 2.12, + "learning_rate": 2.94479528002609e-07, + "loss": 0.0892, + "step": 47616 + }, + { + "epoch": 2.12, + "learning_rate": 2.9353078952829907e-07, + "loss": 0.1369, + "step": 47680 + }, + { + "epoch": 2.12, + "learning_rate": 2.9258205105398916e-07, + "loss": 0.0736, + "step": 47744 + }, + { + "epoch": 2.13, + "learning_rate": 2.916333125796792e-07, + "loss": 0.1227, + "step": 47808 + }, + { + "epoch": 2.13, + "learning_rate": 2.9068457410536923e-07, + "loss": 0.0895, + "step": 47872 + }, + { + "epoch": 2.13, + "learning_rate": 2.897358356310593e-07, + "loss": 0.0623, + "step": 47936 + }, + { + "epoch": 2.13, + "learning_rate": 2.8878709715674936e-07, + "loss": 0.0817, + "step": 48000 + }, + { + "epoch": 2.14, + "learning_rate": 2.8783835868243945e-07, + "loss": 0.0819, + "step": 48064 + }, + { + "epoch": 2.14, + "learning_rate": 2.868896202081295e-07, + "loss": 0.069, + "step": 48128 + }, + { + "epoch": 2.14, + "learning_rate": 2.8594088173381953e-07, + "loss": 0.0737, + "step": 48192 + }, + { + "epoch": 2.15, + "learning_rate": 2.849921432595096e-07, + "loss": 0.0437, + "step": 48256 + }, + { + "epoch": 2.15, + "learning_rate": 2.8404340478519966e-07, + "loss": 0.0608, + "step": 48320 + }, + { + "epoch": 2.15, + "learning_rate": 2.8309466631088975e-07, + "loss": 0.0831, + "step": 48384 + }, + { + "epoch": 2.15, + "learning_rate": 2.8214592783657984e-07, + "loss": 0.0239, + "step": 48448 + }, + { + "epoch": 2.16, + "learning_rate": 2.811971893622698e-07, + "loss": 0.0509, + "step": 48512 + }, + { + "epoch": 2.16, + "learning_rate": 2.802484508879599e-07, + "loss": 0.0671, + "step": 48576 + }, + { + "epoch": 2.16, + "learning_rate": 2.7929971241364995e-07, + "loss": 0.0449, + "step": 48640 + }, + { + "epoch": 2.17, + "learning_rate": 2.7835097393934004e-07, + "loss": 0.0436, + "step": 48704 + }, + { + "epoch": 2.17, + "learning_rate": 2.774022354650301e-07, + "loss": 0.0752, + "step": 48768 + }, + { + "epoch": 2.17, + "learning_rate": 2.764534969907201e-07, + "loss": 0.0975, + "step": 48832 + }, + { + "epoch": 2.17, + "learning_rate": 2.755047585164102e-07, + "loss": 0.0787, + "step": 48896 + }, + { + "epoch": 2.18, + "learning_rate": 2.7457084408076136e-07, + "loss": 0.0786, + "step": 48960 + }, + { + "epoch": 2.18, + "learning_rate": 2.7362210560645145e-07, + "loss": 0.0799, + "step": 49024 + }, + { + "epoch": 2.18, + "learning_rate": 2.7267336713214143e-07, + "loss": 0.0533, + "step": 49088 + }, + { + "epoch": 2.19, + "learning_rate": 2.717246286578315e-07, + "loss": 0.0628, + "step": 49152 + }, + { + "epoch": 2.19, + "learning_rate": 2.707758901835216e-07, + "loss": 0.0589, + "step": 49216 + }, + { + "epoch": 2.19, + "learning_rate": 2.6982715170921165e-07, + "loss": 0.0374, + "step": 49280 + }, + { + "epoch": 2.19, + "learning_rate": 2.688784132349017e-07, + "loss": 0.0635, + "step": 49344 + }, + { + "epoch": 2.2, + "learning_rate": 2.679296747605917e-07, + "loss": 0.0509, + "step": 49408 + }, + { + "epoch": 2.2, + "learning_rate": 2.669809362862818e-07, + "loss": 0.0751, + "step": 49472 + }, + { + "epoch": 2.2, + "learning_rate": 2.660321978119719e-07, + "loss": 0.0769, + "step": 49536 + }, + { + "epoch": 2.21, + "learning_rate": 2.6508345933766194e-07, + "loss": 0.1124, + "step": 49600 + }, + { + "epoch": 2.21, + "learning_rate": 2.64134720863352e-07, + "loss": 0.0478, + "step": 49664 + }, + { + "epoch": 2.21, + "learning_rate": 2.6318598238904207e-07, + "loss": 0.0753, + "step": 49728 + }, + { + "epoch": 2.21, + "learning_rate": 2.622372439147321e-07, + "loss": 0.0611, + "step": 49792 + }, + { + "epoch": 2.22, + "learning_rate": 2.612885054404222e-07, + "loss": 0.0475, + "step": 49856 + }, + { + "epoch": 2.22, + "learning_rate": 2.603545910047733e-07, + "loss": 0.0461, + "step": 49920 + }, + { + "epoch": 2.22, + "learning_rate": 2.594058525304634e-07, + "loss": 0.044, + "step": 49984 + }, + { + "epoch": 2.23, + "learning_rate": 2.5845711405615343e-07, + "loss": 0.085, + "step": 50048 + }, + { + "epoch": 2.23, + "learning_rate": 2.575083755818435e-07, + "loss": 0.1055, + "step": 50112 + }, + { + "epoch": 2.23, + "learning_rate": 2.565596371075336e-07, + "loss": 0.0932, + "step": 50176 + }, + { + "epoch": 2.23, + "learning_rate": 2.556108986332236e-07, + "loss": 0.0656, + "step": 50240 + }, + { + "epoch": 2.24, + "learning_rate": 2.546621601589137e-07, + "loss": 0.0679, + "step": 50304 + }, + { + "epoch": 2.24, + "learning_rate": 2.537134216846037e-07, + "loss": 0.0673, + "step": 50368 + }, + { + "epoch": 2.24, + "learning_rate": 2.527646832102938e-07, + "loss": 0.0718, + "step": 50432 + }, + { + "epoch": 2.25, + "learning_rate": 2.518159447359839e-07, + "loss": 0.0329, + "step": 50496 + }, + { + "epoch": 2.25, + "learning_rate": 2.508672062616739e-07, + "loss": 0.0793, + "step": 50560 + }, + { + "epoch": 2.25, + "learning_rate": 2.49918467787364e-07, + "loss": 0.0633, + "step": 50624 + }, + { + "epoch": 2.25, + "learning_rate": 2.4896972931305407e-07, + "loss": 0.0257, + "step": 50688 + }, + { + "epoch": 2.26, + "learning_rate": 2.480209908387441e-07, + "loss": 0.0608, + "step": 50752 + }, + { + "epoch": 2.26, + "learning_rate": 2.4707225236443414e-07, + "loss": 0.0701, + "step": 50816 + }, + { + "epoch": 2.26, + "learning_rate": 2.461235138901242e-07, + "loss": 0.0573, + "step": 50880 + }, + { + "epoch": 2.27, + "learning_rate": 2.4517477541581427e-07, + "loss": 0.136, + "step": 50944 + }, + { + "epoch": 2.27, + "learning_rate": 2.4422603694150436e-07, + "loss": 0.0862, + "step": 51008 + }, + { + "epoch": 2.27, + "learning_rate": 2.432772984671944e-07, + "loss": 0.0793, + "step": 51072 + }, + { + "epoch": 2.27, + "learning_rate": 2.4232855999288444e-07, + "loss": 0.0501, + "step": 51136 + }, + { + "epoch": 2.28, + "learning_rate": 2.413798215185745e-07, + "loss": 0.0761, + "step": 51200 + }, + { + "epoch": 2.28, + "learning_rate": 2.4043108304426457e-07, + "loss": 0.0484, + "step": 51264 + }, + { + "epoch": 2.28, + "learning_rate": 2.394823445699546e-07, + "loss": 0.0963, + "step": 51328 + }, + { + "epoch": 2.29, + "learning_rate": 2.385336060956447e-07, + "loss": 0.0816, + "step": 51392 + }, + { + "epoch": 2.29, + "learning_rate": 2.3758486762133476e-07, + "loss": 0.0204, + "step": 51456 + }, + { + "epoch": 2.29, + "learning_rate": 2.366361291470248e-07, + "loss": 0.0517, + "step": 51520 + }, + { + "epoch": 2.29, + "learning_rate": 2.3568739067271486e-07, + "loss": 0.0587, + "step": 51584 + }, + { + "epoch": 2.3, + "learning_rate": 2.3473865219840492e-07, + "loss": 0.0545, + "step": 51648 + }, + { + "epoch": 2.3, + "learning_rate": 2.33789913724095e-07, + "loss": 0.0462, + "step": 51712 + }, + { + "epoch": 2.3, + "learning_rate": 2.3284117524978505e-07, + "loss": 0.0475, + "step": 51776 + }, + { + "epoch": 2.31, + "learning_rate": 2.318924367754751e-07, + "loss": 0.1472, + "step": 51840 + }, + { + "epoch": 2.31, + "learning_rate": 2.3094369830116518e-07, + "loss": 0.0886, + "step": 51904 + }, + { + "epoch": 2.31, + "learning_rate": 2.2999495982685522e-07, + "loss": 0.0724, + "step": 51968 + }, + { + "epoch": 2.31, + "learning_rate": 2.2904622135254528e-07, + "loss": 0.0841, + "step": 52032 + }, + { + "epoch": 2.32, + "learning_rate": 2.2809748287823532e-07, + "loss": 0.0481, + "step": 52096 + }, + { + "epoch": 2.32, + "learning_rate": 2.2714874440392538e-07, + "loss": 0.0837, + "step": 52160 + }, + { + "epoch": 2.32, + "learning_rate": 2.2620000592961548e-07, + "loss": 0.0733, + "step": 52224 + }, + { + "epoch": 2.33, + "learning_rate": 2.252512674553055e-07, + "loss": 0.058, + "step": 52288 + }, + { + "epoch": 2.33, + "learning_rate": 2.2430252898099558e-07, + "loss": 0.0247, + "step": 52352 + }, + { + "epoch": 2.33, + "learning_rate": 2.2335379050668561e-07, + "loss": 0.056, + "step": 52416 + }, + { + "epoch": 2.33, + "learning_rate": 2.224050520323757e-07, + "loss": 0.0634, + "step": 52480 + }, + { + "epoch": 2.34, + "learning_rate": 2.2145631355806574e-07, + "loss": 0.0858, + "step": 52544 + }, + { + "epoch": 2.34, + "learning_rate": 2.205075750837558e-07, + "loss": 0.0639, + "step": 52608 + }, + { + "epoch": 2.34, + "learning_rate": 2.1955883660944587e-07, + "loss": 0.0742, + "step": 52672 + }, + { + "epoch": 2.35, + "learning_rate": 2.1861009813513594e-07, + "loss": 0.0472, + "step": 52736 + }, + { + "epoch": 2.35, + "learning_rate": 2.17661359660826e-07, + "loss": 0.0809, + "step": 52800 + }, + { + "epoch": 2.35, + "learning_rate": 2.1672744522517712e-07, + "loss": 0.035, + "step": 52864 + }, + { + "epoch": 2.35, + "learning_rate": 2.1577870675086721e-07, + "loss": 0.1202, + "step": 52928 + }, + { + "epoch": 2.36, + "learning_rate": 2.1482996827655725e-07, + "loss": 0.0451, + "step": 52992 + }, + { + "epoch": 2.36, + "learning_rate": 2.1388122980224732e-07, + "loss": 0.0622, + "step": 53056 + }, + { + "epoch": 2.36, + "learning_rate": 2.1293249132793735e-07, + "loss": 0.0428, + "step": 53120 + }, + { + "epoch": 2.37, + "learning_rate": 2.1198375285362744e-07, + "loss": 0.0734, + "step": 53184 + }, + { + "epoch": 2.37, + "learning_rate": 2.110350143793175e-07, + "loss": 0.0441, + "step": 53248 + }, + { + "epoch": 2.37, + "learning_rate": 2.1008627590500755e-07, + "loss": 0.0537, + "step": 53312 + }, + { + "epoch": 2.37, + "learning_rate": 2.091375374306976e-07, + "loss": 0.0983, + "step": 53376 + }, + { + "epoch": 2.38, + "learning_rate": 2.0818879895638767e-07, + "loss": 0.0633, + "step": 53440 + }, + { + "epoch": 2.38, + "learning_rate": 2.0724006048207774e-07, + "loss": 0.0639, + "step": 53504 + }, + { + "epoch": 2.38, + "learning_rate": 2.0629132200776778e-07, + "loss": 0.0446, + "step": 53568 + }, + { + "epoch": 2.39, + "learning_rate": 2.0534258353345784e-07, + "loss": 0.0512, + "step": 53632 + }, + { + "epoch": 2.39, + "learning_rate": 2.0439384505914793e-07, + "loss": 0.0764, + "step": 53696 + }, + { + "epoch": 2.39, + "learning_rate": 2.0344510658483797e-07, + "loss": 0.0737, + "step": 53760 + }, + { + "epoch": 2.39, + "learning_rate": 2.0249636811052803e-07, + "loss": 0.0627, + "step": 53824 + }, + { + "epoch": 2.4, + "learning_rate": 2.0154762963621807e-07, + "loss": 0.1065, + "step": 53888 + }, + { + "epoch": 2.4, + "learning_rate": 2.0059889116190813e-07, + "loss": 0.1112, + "step": 53952 + }, + { + "epoch": 2.4, + "learning_rate": 1.996501526875982e-07, + "loss": 0.0649, + "step": 54016 + }, + { + "epoch": 2.41, + "learning_rate": 1.9870141421328826e-07, + "loss": 0.0729, + "step": 54080 + }, + { + "epoch": 2.41, + "learning_rate": 1.9775267573897833e-07, + "loss": 0.0621, + "step": 54144 + }, + { + "epoch": 2.41, + "learning_rate": 1.9680393726466836e-07, + "loss": 0.0563, + "step": 54208 + }, + { + "epoch": 2.41, + "learning_rate": 1.9585519879035846e-07, + "loss": 0.0783, + "step": 54272 + }, + { + "epoch": 2.42, + "learning_rate": 1.949064603160485e-07, + "loss": 0.0339, + "step": 54336 + }, + { + "epoch": 2.42, + "learning_rate": 1.9395772184173856e-07, + "loss": 0.1098, + "step": 54400 + }, + { + "epoch": 2.42, + "learning_rate": 1.930089833674286e-07, + "loss": 0.0531, + "step": 54464 + }, + { + "epoch": 2.42, + "learning_rate": 1.9206024489311869e-07, + "loss": 0.062, + "step": 54528 + }, + { + "epoch": 2.43, + "learning_rate": 1.9111150641880875e-07, + "loss": 0.0335, + "step": 54592 + }, + { + "epoch": 2.43, + "learning_rate": 1.901627679444988e-07, + "loss": 0.097, + "step": 54656 + }, + { + "epoch": 2.43, + "learning_rate": 1.8921402947018885e-07, + "loss": 0.1206, + "step": 54720 + }, + { + "epoch": 2.44, + "learning_rate": 1.882652909958789e-07, + "loss": 0.0818, + "step": 54784 + }, + { + "epoch": 2.44, + "learning_rate": 1.8731655252156898e-07, + "loss": 0.047, + "step": 54848 + }, + { + "epoch": 2.44, + "learning_rate": 1.8636781404725902e-07, + "loss": 0.0854, + "step": 54912 + }, + { + "epoch": 2.44, + "learning_rate": 1.8541907557294908e-07, + "loss": 0.0492, + "step": 54976 + }, + { + "epoch": 2.45, + "learning_rate": 1.8447033709863915e-07, + "loss": 0.1021, + "step": 55040 + }, + { + "epoch": 2.45, + "learning_rate": 1.835215986243292e-07, + "loss": 0.0635, + "step": 55104 + }, + { + "epoch": 2.45, + "learning_rate": 1.8257286015001927e-07, + "loss": 0.0809, + "step": 55168 + }, + { + "epoch": 2.46, + "learning_rate": 1.816241216757093e-07, + "loss": 0.0943, + "step": 55232 + }, + { + "epoch": 2.46, + "learning_rate": 1.8067538320139938e-07, + "loss": 0.0428, + "step": 55296 + }, + { + "epoch": 2.46, + "learning_rate": 1.7972664472708944e-07, + "loss": 0.1056, + "step": 55360 + }, + { + "epoch": 2.46, + "learning_rate": 1.787779062527795e-07, + "loss": 0.0723, + "step": 55424 + }, + { + "epoch": 2.47, + "learning_rate": 1.7782916777846957e-07, + "loss": 0.075, + "step": 55488 + }, + { + "epoch": 2.47, + "learning_rate": 1.768804293041596e-07, + "loss": 0.0867, + "step": 55552 + }, + { + "epoch": 2.47, + "learning_rate": 1.7593169082984967e-07, + "loss": 0.0473, + "step": 55616 + }, + { + "epoch": 2.48, + "learning_rate": 1.7498295235553973e-07, + "loss": 0.1154, + "step": 55680 + }, + { + "epoch": 2.48, + "learning_rate": 1.740342138812298e-07, + "loss": 0.118, + "step": 55744 + }, + { + "epoch": 2.48, + "learning_rate": 1.7308547540691984e-07, + "loss": 0.0474, + "step": 55808 + }, + { + "epoch": 2.48, + "learning_rate": 1.721367369326099e-07, + "loss": 0.0949, + "step": 55872 + }, + { + "epoch": 2.49, + "learning_rate": 1.711879984583e-07, + "loss": 0.0887, + "step": 55936 + }, + { + "epoch": 2.49, + "learning_rate": 1.7023925998399003e-07, + "loss": 0.0711, + "step": 56000 + }, + { + "epoch": 2.49, + "learning_rate": 1.692905215096801e-07, + "loss": 0.0556, + "step": 56064 + }, + { + "epoch": 2.5, + "learning_rate": 1.6834178303537013e-07, + "loss": 0.1028, + "step": 56128 + }, + { + "epoch": 2.5, + "learning_rate": 1.6739304456106022e-07, + "loss": 0.0673, + "step": 56192 + }, + { + "epoch": 2.5, + "learning_rate": 1.6644430608675029e-07, + "loss": 0.0826, + "step": 56256 + }, + { + "epoch": 2.5, + "learning_rate": 1.6549556761244032e-07, + "loss": 0.0579, + "step": 56320 + }, + { + "epoch": 2.51, + "learning_rate": 1.645468291381304e-07, + "loss": 0.0895, + "step": 56384 + }, + { + "epoch": 2.51, + "learning_rate": 1.6359809066382042e-07, + "loss": 0.1035, + "step": 56448 + }, + { + "epoch": 2.51, + "learning_rate": 1.6264935218951052e-07, + "loss": 0.158, + "step": 56512 + }, + { + "epoch": 2.52, + "learning_rate": 1.6170061371520055e-07, + "loss": 0.0721, + "step": 56576 + }, + { + "epoch": 2.52, + "learning_rate": 1.6075187524089062e-07, + "loss": 0.0803, + "step": 56640 + }, + { + "epoch": 2.52, + "learning_rate": 1.5980313676658068e-07, + "loss": 0.1033, + "step": 56704 + }, + { + "epoch": 2.52, + "learning_rate": 1.5885439829227075e-07, + "loss": 0.1123, + "step": 56768 + }, + { + "epoch": 2.53, + "learning_rate": 1.579056598179608e-07, + "loss": 0.0409, + "step": 56832 + }, + { + "epoch": 2.53, + "learning_rate": 1.5695692134365085e-07, + "loss": 0.0909, + "step": 56896 + }, + { + "epoch": 2.53, + "learning_rate": 1.560081828693409e-07, + "loss": 0.0896, + "step": 56960 + }, + { + "epoch": 2.54, + "learning_rate": 1.5505944439503098e-07, + "loss": 0.0479, + "step": 57024 + }, + { + "epoch": 2.54, + "learning_rate": 1.5411070592072104e-07, + "loss": 0.0769, + "step": 57088 + }, + { + "epoch": 2.54, + "learning_rate": 1.531619674464111e-07, + "loss": 0.0643, + "step": 57152 + }, + { + "epoch": 2.54, + "learning_rate": 1.5221322897210114e-07, + "loss": 0.0449, + "step": 57216 + }, + { + "epoch": 2.55, + "learning_rate": 1.5126449049779123e-07, + "loss": 0.0761, + "step": 57280 + }, + { + "epoch": 2.55, + "learning_rate": 1.5031575202348127e-07, + "loss": 0.0619, + "step": 57344 + }, + { + "epoch": 2.55, + "learning_rate": 1.4936701354917133e-07, + "loss": 0.0834, + "step": 57408 + }, + { + "epoch": 2.56, + "learning_rate": 1.4841827507486137e-07, + "loss": 0.0556, + "step": 57472 + }, + { + "epoch": 2.56, + "learning_rate": 1.4746953660055144e-07, + "loss": 0.0629, + "step": 57536 + }, + { + "epoch": 2.56, + "learning_rate": 1.4652079812624153e-07, + "loss": 0.0941, + "step": 57600 + }, + { + "epoch": 2.56, + "learning_rate": 1.4557205965193156e-07, + "loss": 0.0543, + "step": 57664 + }, + { + "epoch": 2.57, + "learning_rate": 1.4462332117762163e-07, + "loss": 0.0463, + "step": 57728 + }, + { + "epoch": 2.57, + "learning_rate": 1.4367458270331167e-07, + "loss": 0.0364, + "step": 57792 + }, + { + "epoch": 2.57, + "learning_rate": 1.4272584422900176e-07, + "loss": 0.0395, + "step": 57856 + }, + { + "epoch": 2.58, + "learning_rate": 1.417771057546918e-07, + "loss": 0.0566, + "step": 57920 + }, + { + "epoch": 2.58, + "learning_rate": 1.4082836728038186e-07, + "loss": 0.0857, + "step": 57984 + }, + { + "epoch": 2.58, + "learning_rate": 1.3987962880607192e-07, + "loss": 0.1735, + "step": 58048 + }, + { + "epoch": 2.58, + "learning_rate": 1.3893089033176199e-07, + "loss": 0.0754, + "step": 58112 + }, + { + "epoch": 2.59, + "learning_rate": 1.3798215185745205e-07, + "loss": 0.0505, + "step": 58176 + }, + { + "epoch": 2.59, + "learning_rate": 1.370334133831421e-07, + "loss": 0.0766, + "step": 58240 + }, + { + "epoch": 2.59, + "learning_rate": 1.3608467490883215e-07, + "loss": 0.0526, + "step": 58304 + }, + { + "epoch": 2.6, + "learning_rate": 1.351359364345222e-07, + "loss": 0.0535, + "step": 58368 + }, + { + "epoch": 2.6, + "learning_rate": 1.3418719796021228e-07, + "loss": 0.0465, + "step": 58432 + }, + { + "epoch": 2.6, + "learning_rate": 1.3323845948590235e-07, + "loss": 0.0652, + "step": 58496 + }, + { + "epoch": 2.6, + "learning_rate": 1.3228972101159238e-07, + "loss": 0.0588, + "step": 58560 + }, + { + "epoch": 2.61, + "learning_rate": 1.3134098253728245e-07, + "loss": 0.06, + "step": 58624 + }, + { + "epoch": 2.61, + "learning_rate": 1.303922440629725e-07, + "loss": 0.0837, + "step": 58688 + }, + { + "epoch": 2.61, + "learning_rate": 1.2944350558866258e-07, + "loss": 0.0505, + "step": 58752 + }, + { + "epoch": 2.62, + "learning_rate": 1.284947671143526e-07, + "loss": 0.0536, + "step": 58816 + }, + { + "epoch": 2.62, + "learning_rate": 1.2754602864004268e-07, + "loss": 0.0765, + "step": 58880 + }, + { + "epoch": 2.62, + "learning_rate": 1.2659729016573277e-07, + "loss": 0.0612, + "step": 58944 + }, + { + "epoch": 2.62, + "learning_rate": 1.256485516914228e-07, + "loss": 0.0564, + "step": 59008 + }, + { + "epoch": 2.63, + "learning_rate": 1.2469981321711287e-07, + "loss": 0.0484, + "step": 59072 + }, + { + "epoch": 2.63, + "learning_rate": 1.2375107474280293e-07, + "loss": 0.1047, + "step": 59136 + }, + { + "epoch": 2.63, + "learning_rate": 1.2280233626849297e-07, + "loss": 0.0873, + "step": 59200 + }, + { + "epoch": 2.64, + "learning_rate": 1.2185359779418304e-07, + "loss": 0.0641, + "step": 59264 + }, + { + "epoch": 2.64, + "learning_rate": 1.209048593198731e-07, + "loss": 0.0654, + "step": 59328 + }, + { + "epoch": 2.64, + "learning_rate": 1.1995612084556316e-07, + "loss": 0.0936, + "step": 59392 + }, + { + "epoch": 2.64, + "learning_rate": 1.1900738237125321e-07, + "loss": 0.0602, + "step": 59456 + }, + { + "epoch": 2.65, + "learning_rate": 1.1805864389694328e-07, + "loss": 0.0533, + "step": 59520 + }, + { + "epoch": 2.65, + "learning_rate": 1.1710990542263334e-07, + "loss": 0.1065, + "step": 59584 + }, + { + "epoch": 2.65, + "learning_rate": 1.161611669483234e-07, + "loss": 0.1187, + "step": 59648 + }, + { + "epoch": 2.66, + "learning_rate": 1.1521242847401346e-07, + "loss": 0.0678, + "step": 59712 + }, + { + "epoch": 2.66, + "learning_rate": 1.1426368999970351e-07, + "loss": 0.0702, + "step": 59776 + }, + { + "epoch": 2.66, + "learning_rate": 1.1331495152539357e-07, + "loss": 0.0872, + "step": 59840 + }, + { + "epoch": 2.66, + "learning_rate": 1.1236621305108364e-07, + "loss": 0.0574, + "step": 59904 + }, + { + "epoch": 2.67, + "learning_rate": 1.114174745767737e-07, + "loss": 0.0689, + "step": 59968 + }, + { + "epoch": 2.67, + "learning_rate": 1.1048356014112484e-07, + "loss": 0.0703, + "step": 60032 + }, + { + "epoch": 2.67, + "learning_rate": 1.095348216668149e-07, + "loss": 0.0578, + "step": 60096 + }, + { + "epoch": 2.68, + "learning_rate": 1.0858608319250497e-07, + "loss": 0.0828, + "step": 60160 + }, + { + "epoch": 2.68, + "learning_rate": 1.0763734471819502e-07, + "loss": 0.0917, + "step": 60224 + }, + { + "epoch": 2.68, + "learning_rate": 1.0668860624388508e-07, + "loss": 0.073, + "step": 60288 + }, + { + "epoch": 2.68, + "learning_rate": 1.0573986776957513e-07, + "loss": 0.0453, + "step": 60352 + }, + { + "epoch": 2.69, + "learning_rate": 1.047911292952652e-07, + "loss": 0.0881, + "step": 60416 + }, + { + "epoch": 2.69, + "learning_rate": 1.0384239082095525e-07, + "loss": 0.0675, + "step": 60480 + }, + { + "epoch": 2.69, + "learning_rate": 1.0289365234664532e-07, + "loss": 0.0965, + "step": 60544 + }, + { + "epoch": 2.7, + "learning_rate": 1.0194491387233538e-07, + "loss": 0.0637, + "step": 60608 + }, + { + "epoch": 2.7, + "learning_rate": 1.0099617539802544e-07, + "loss": 0.033, + "step": 60672 + }, + { + "epoch": 2.7, + "learning_rate": 1.0004743692371549e-07, + "loss": 0.0532, + "step": 60736 + }, + { + "epoch": 2.7, + "learning_rate": 9.909869844940556e-08, + "loss": 0.053, + "step": 60800 + }, + { + "epoch": 2.71, + "learning_rate": 9.81499599750956e-08, + "loss": 0.0748, + "step": 60864 + }, + { + "epoch": 2.71, + "learning_rate": 9.720122150078566e-08, + "loss": 0.1016, + "step": 60928 + }, + { + "epoch": 2.71, + "learning_rate": 9.625248302647573e-08, + "loss": 0.0864, + "step": 60992 + }, + { + "epoch": 2.72, + "learning_rate": 9.530374455216579e-08, + "loss": 0.0621, + "step": 61056 + }, + { + "epoch": 2.72, + "learning_rate": 9.435500607785585e-08, + "loss": 0.053, + "step": 61120 + }, + { + "epoch": 2.72, + "learning_rate": 9.34062676035459e-08, + "loss": 0.0541, + "step": 61184 + }, + { + "epoch": 2.72, + "learning_rate": 9.245752912923596e-08, + "loss": 0.0849, + "step": 61248 + }, + { + "epoch": 2.73, + "learning_rate": 9.150879065492602e-08, + "loss": 0.0488, + "step": 61312 + }, + { + "epoch": 2.73, + "learning_rate": 9.056005218061609e-08, + "loss": 0.0471, + "step": 61376 + }, + { + "epoch": 2.73, + "learning_rate": 8.961131370630614e-08, + "loss": 0.0974, + "step": 61440 + }, + { + "epoch": 2.74, + "learning_rate": 8.866257523199621e-08, + "loss": 0.0871, + "step": 61504 + }, + { + "epoch": 2.74, + "learning_rate": 8.771383675768626e-08, + "loss": 0.0958, + "step": 61568 + }, + { + "epoch": 2.74, + "learning_rate": 8.676509828337632e-08, + "loss": 0.0631, + "step": 61632 + }, + { + "epoch": 2.74, + "learning_rate": 8.581635980906637e-08, + "loss": 0.1262, + "step": 61696 + }, + { + "epoch": 2.75, + "learning_rate": 8.486762133475642e-08, + "loss": 0.0563, + "step": 61760 + }, + { + "epoch": 2.75, + "learning_rate": 8.39188828604465e-08, + "loss": 0.0844, + "step": 61824 + }, + { + "epoch": 2.75, + "learning_rate": 8.297014438613655e-08, + "loss": 0.0916, + "step": 61888 + }, + { + "epoch": 2.76, + "learning_rate": 8.202140591182662e-08, + "loss": 0.0569, + "step": 61952 + }, + { + "epoch": 2.76, + "learning_rate": 8.107266743751667e-08, + "loss": 0.1387, + "step": 62016 + }, + { + "epoch": 2.76, + "learning_rate": 8.012392896320673e-08, + "loss": 0.0627, + "step": 62080 + }, + { + "epoch": 2.76, + "learning_rate": 7.917519048889678e-08, + "loss": 0.0815, + "step": 62144 + }, + { + "epoch": 2.77, + "learning_rate": 7.822645201458685e-08, + "loss": 0.1097, + "step": 62208 + }, + { + "epoch": 2.77, + "learning_rate": 7.727771354027691e-08, + "loss": 0.094, + "step": 62272 + }, + { + "epoch": 2.77, + "learning_rate": 7.632897506596698e-08, + "loss": 0.0843, + "step": 62336 + }, + { + "epoch": 2.78, + "learning_rate": 7.538023659165703e-08, + "loss": 0.0687, + "step": 62400 + }, + { + "epoch": 2.78, + "learning_rate": 7.443149811734709e-08, + "loss": 0.0932, + "step": 62464 + }, + { + "epoch": 2.78, + "learning_rate": 7.348275964303714e-08, + "loss": 0.0515, + "step": 62528 + }, + { + "epoch": 2.78, + "learning_rate": 7.25340211687272e-08, + "loss": 0.0626, + "step": 62592 + }, + { + "epoch": 2.79, + "learning_rate": 7.158528269441727e-08, + "loss": 0.1467, + "step": 62656 + }, + { + "epoch": 2.79, + "learning_rate": 7.063654422010733e-08, + "loss": 0.0827, + "step": 62720 + }, + { + "epoch": 2.79, + "learning_rate": 6.968780574579738e-08, + "loss": 0.0736, + "step": 62784 + }, + { + "epoch": 2.79, + "learning_rate": 6.873906727148744e-08, + "loss": 0.0955, + "step": 62848 + }, + { + "epoch": 2.8, + "learning_rate": 6.77903287971775e-08, + "loss": 0.0599, + "step": 62912 + }, + { + "epoch": 2.8, + "learning_rate": 6.684159032286755e-08, + "loss": 0.0315, + "step": 62976 + }, + { + "epoch": 2.8, + "learning_rate": 6.589285184855762e-08, + "loss": 0.0659, + "step": 63040 + }, + { + "epoch": 2.81, + "learning_rate": 6.494411337424768e-08, + "loss": 0.0614, + "step": 63104 + }, + { + "epoch": 2.81, + "learning_rate": 6.399537489993774e-08, + "loss": 0.0426, + "step": 63168 + }, + { + "epoch": 2.81, + "learning_rate": 6.306146046428888e-08, + "loss": 0.116, + "step": 63232 + }, + { + "epoch": 2.81, + "learning_rate": 6.211272198997894e-08, + "loss": 0.1004, + "step": 63296 + }, + { + "epoch": 2.82, + "learning_rate": 6.116398351566901e-08, + "loss": 0.0469, + "step": 63360 + }, + { + "epoch": 2.82, + "learning_rate": 6.021524504135907e-08, + "loss": 0.0403, + "step": 63424 + }, + { + "epoch": 2.82, + "learning_rate": 5.9266506567049123e-08, + "loss": 0.0604, + "step": 63488 + }, + { + "epoch": 2.83, + "learning_rate": 5.831776809273918e-08, + "loss": 0.0871, + "step": 63552 + }, + { + "epoch": 2.83, + "learning_rate": 5.7369029618429245e-08, + "loss": 0.0703, + "step": 63616 + }, + { + "epoch": 2.83, + "learning_rate": 5.64202911441193e-08, + "loss": 0.0564, + "step": 63680 + }, + { + "epoch": 2.83, + "learning_rate": 5.547155266980936e-08, + "loss": 0.0806, + "step": 63744 + }, + { + "epoch": 2.84, + "learning_rate": 5.4522814195499424e-08, + "loss": 0.031, + "step": 63808 + }, + { + "epoch": 2.84, + "learning_rate": 5.357407572118948e-08, + "loss": 0.071, + "step": 63872 + }, + { + "epoch": 2.84, + "learning_rate": 5.262533724687954e-08, + "loss": 0.0471, + "step": 63936 + }, + { + "epoch": 2.85, + "learning_rate": 5.167659877256959e-08, + "loss": 0.0629, + "step": 64000 + }, + { + "epoch": 2.85, + "learning_rate": 5.0727860298259654e-08, + "loss": 0.0759, + "step": 64064 + }, + { + "epoch": 2.85, + "learning_rate": 4.977912182394971e-08, + "loss": 0.0494, + "step": 64128 + }, + { + "epoch": 2.85, + "learning_rate": 4.883038334963977e-08, + "loss": 0.0805, + "step": 64192 + }, + { + "epoch": 2.86, + "learning_rate": 4.7881644875329834e-08, + "loss": 0.0371, + "step": 64256 + }, + { + "epoch": 2.86, + "learning_rate": 4.693290640101989e-08, + "loss": 0.0471, + "step": 64320 + }, + { + "epoch": 2.86, + "learning_rate": 4.598416792670995e-08, + "loss": 0.0223, + "step": 64384 + }, + { + "epoch": 2.87, + "learning_rate": 4.503542945240001e-08, + "loss": 0.1177, + "step": 64448 + }, + { + "epoch": 2.87, + "learning_rate": 4.408669097809007e-08, + "loss": 0.0777, + "step": 64512 + }, + { + "epoch": 2.87, + "learning_rate": 4.313795250378013e-08, + "loss": 0.0444, + "step": 64576 + }, + { + "epoch": 2.87, + "learning_rate": 4.2189214029470186e-08, + "loss": 0.1089, + "step": 64640 + }, + { + "epoch": 2.88, + "learning_rate": 4.124047555516025e-08, + "loss": 0.0793, + "step": 64704 + }, + { + "epoch": 2.88, + "learning_rate": 4.029173708085031e-08, + "loss": 0.0732, + "step": 64768 + }, + { + "epoch": 2.88, + "learning_rate": 3.9342998606540365e-08, + "loss": 0.0384, + "step": 64832 + }, + { + "epoch": 2.89, + "learning_rate": 3.839426013223043e-08, + "loss": 0.0462, + "step": 64896 + }, + { + "epoch": 2.89, + "learning_rate": 3.744552165792048e-08, + "loss": 0.0671, + "step": 64960 + }, + { + "epoch": 2.89, + "learning_rate": 3.649678318361054e-08, + "loss": 0.0454, + "step": 65024 + }, + { + "epoch": 2.89, + "learning_rate": 3.55480447093006e-08, + "loss": 0.0829, + "step": 65088 + }, + { + "epoch": 2.9, + "learning_rate": 3.459930623499066e-08, + "loss": 0.0956, + "step": 65152 + }, + { + "epoch": 2.9, + "learning_rate": 3.3650567760680717e-08, + "loss": 0.0478, + "step": 65216 + }, + { + "epoch": 2.9, + "learning_rate": 3.2701829286370774e-08, + "loss": 0.0558, + "step": 65280 + }, + { + "epoch": 2.91, + "learning_rate": 3.175309081206084e-08, + "loss": 0.0913, + "step": 65344 + }, + { + "epoch": 2.91, + "learning_rate": 3.0804352337750896e-08, + "loss": 0.0834, + "step": 65408 + }, + { + "epoch": 2.91, + "learning_rate": 2.9855613863440953e-08, + "loss": 0.0448, + "step": 65472 + }, + { + "epoch": 2.91, + "learning_rate": 2.8906875389131014e-08, + "loss": 0.0597, + "step": 65536 + }, + { + "epoch": 2.92, + "learning_rate": 2.7958136914821072e-08, + "loss": 0.1018, + "step": 65600 + }, + { + "epoch": 2.92, + "learning_rate": 2.7024222479172225e-08, + "loss": 0.0871, + "step": 65664 + }, + { + "epoch": 2.92, + "learning_rate": 2.6075484004862282e-08, + "loss": 0.0386, + "step": 65728 + }, + { + "epoch": 2.93, + "learning_rate": 2.5126745530552343e-08, + "loss": 0.1053, + "step": 65792 + }, + { + "epoch": 2.93, + "learning_rate": 2.41780070562424e-08, + "loss": 0.0801, + "step": 65856 + }, + { + "epoch": 2.93, + "learning_rate": 2.3229268581932462e-08, + "loss": 0.0605, + "step": 65920 + }, + { + "epoch": 2.93, + "learning_rate": 2.228053010762252e-08, + "loss": 0.0583, + "step": 65984 + }, + { + "epoch": 2.94, + "learning_rate": 2.1331791633312577e-08, + "loss": 0.0541, + "step": 66048 + }, + { + "epoch": 2.94, + "learning_rate": 2.0383053159002638e-08, + "loss": 0.0358, + "step": 66112 + }, + { + "epoch": 2.94, + "learning_rate": 1.9434314684692695e-08, + "loss": 0.0385, + "step": 66176 + }, + { + "epoch": 2.95, + "learning_rate": 1.8485576210382756e-08, + "loss": 0.0723, + "step": 66240 + }, + { + "epoch": 2.95, + "learning_rate": 1.7536837736072817e-08, + "loss": 0.059, + "step": 66304 + }, + { + "epoch": 2.95, + "learning_rate": 1.6588099261762874e-08, + "loss": 0.0381, + "step": 66368 + }, + { + "epoch": 2.95, + "learning_rate": 1.5639360787452932e-08, + "loss": 0.0644, + "step": 66432 + }, + { + "epoch": 2.96, + "learning_rate": 1.4690622313142993e-08, + "loss": 0.0506, + "step": 66496 + }, + { + "epoch": 2.96, + "learning_rate": 1.374188383883305e-08, + "loss": 0.0423, + "step": 66560 + }, + { + "epoch": 2.96, + "learning_rate": 1.279314536452311e-08, + "loss": 0.0629, + "step": 66624 + }, + { + "epoch": 2.97, + "learning_rate": 1.1844406890213169e-08, + "loss": 0.0264, + "step": 66688 + }, + { + "epoch": 2.97, + "learning_rate": 1.089566841590323e-08, + "loss": 0.0475, + "step": 66752 + }, + { + "epoch": 2.97, + "learning_rate": 9.946929941593287e-09, + "loss": 0.0624, + "step": 66816 + }, + { + "epoch": 2.97, + "learning_rate": 8.998191467283346e-09, + "loss": 0.0693, + "step": 66880 + }, + { + "epoch": 2.98, + "learning_rate": 8.049452992973405e-09, + "loss": 0.1005, + "step": 66944 + }, + { + "epoch": 2.98, + "learning_rate": 7.1007145186634646e-09, + "loss": 0.0561, + "step": 67008 + }, + { + "epoch": 2.98, + "learning_rate": 6.151976044353523e-09, + "loss": 0.1169, + "step": 67072 + }, + { + "epoch": 2.99, + "learning_rate": 5.203237570043583e-09, + "loss": 0.059, + "step": 67136 + }, + { + "epoch": 2.99, + "learning_rate": 4.254499095733641e-09, + "loss": 0.0378, + "step": 67200 + }, + { + "epoch": 2.99, + "learning_rate": 3.3057606214237005e-09, + "loss": 0.0409, + "step": 67264 + }, + { + "epoch": 2.99, + "learning_rate": 2.3570221471137597e-09, + "loss": 0.1203, + "step": 67328 + }, + { + "epoch": 3.0, + "learning_rate": 1.4082836728038187e-09, + "loss": 0.097, + "step": 67392 + }, + { + "epoch": 3.0, + "learning_rate": 4.5954519849387766e-10, + "loss": 0.0873, + "step": 67456 + } + ], + "logging_steps": 64, + "max_steps": 67458, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 22486, + "total_flos": 1.432248707211264e+20, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}