|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 67458, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.990957336416734e-07, |
|
"loss": 0.1541, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.981469951673633e-07, |
|
"loss": 0.0788, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.971982566930536e-07, |
|
"loss": 0.2255, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.962495182187434e-07, |
|
"loss": 0.1454, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.953007797444335e-07, |
|
"loss": 0.081, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.943520412701236e-07, |
|
"loss": 0.1782, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.934033027958137e-07, |
|
"loss": 0.1119, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.924545643215036e-07, |
|
"loss": 0.1648, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.915058258471937e-07, |
|
"loss": 0.1536, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.905570873728838e-07, |
|
"loss": 0.1527, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.896083488985739e-07, |
|
"loss": 0.1153, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.88659610424264e-07, |
|
"loss": 0.0826, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.87710871949954e-07, |
|
"loss": 0.135, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.867621334756441e-07, |
|
"loss": 0.1657, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.85813395001334e-07, |
|
"loss": 0.1777, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.848646565270243e-07, |
|
"loss": 0.0975, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.839159180527142e-07, |
|
"loss": 0.1336, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.829671795784043e-07, |
|
"loss": 0.0785, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.820184411040944e-07, |
|
"loss": 0.164, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.810697026297845e-07, |
|
"loss": 0.0864, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.801209641554746e-07, |
|
"loss": 0.128, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.791722256811644e-07, |
|
"loss": 0.0897, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.782234872068545e-07, |
|
"loss": 0.0989, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.772747487325446e-07, |
|
"loss": 0.1309, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.763408342968958e-07, |
|
"loss": 0.2144, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.75392095822586e-07, |
|
"loss": 0.0691, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.744433573482758e-07, |
|
"loss": 0.1476, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.73494618873966e-07, |
|
"loss": 0.1541, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.72545880399656e-07, |
|
"loss": 0.1108, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.71597141925346e-07, |
|
"loss": 0.112, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.706484034510362e-07, |
|
"loss": 0.1901, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.696996649767262e-07, |
|
"loss": 0.1207, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.687509265024163e-07, |
|
"loss": 0.128, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.678021880281062e-07, |
|
"loss": 0.2029, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.668534495537965e-07, |
|
"loss": 0.1629, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.659047110794864e-07, |
|
"loss": 0.1662, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.649559726051765e-07, |
|
"loss": 0.1392, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.640072341308666e-07, |
|
"loss": 0.081, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.630584956565567e-07, |
|
"loss": 0.0894, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.621097571822468e-07, |
|
"loss": 0.1017, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.611610187079368e-07, |
|
"loss": 0.1476, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.602122802336267e-07, |
|
"loss": 0.1592, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.592635417593168e-07, |
|
"loss": 0.146, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.58314803285007e-07, |
|
"loss": 0.1552, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.57366064810697e-07, |
|
"loss": 0.0498, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.56417326336387e-07, |
|
"loss": 0.0999, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.55468587862077e-07, |
|
"loss": 0.1196, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.545198493877673e-07, |
|
"loss": 0.1172, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.535711109134573e-07, |
|
"loss": 0.1107, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.526223724391472e-07, |
|
"loss": 0.182, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.516736339648373e-07, |
|
"loss": 0.0687, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.507248954905273e-07, |
|
"loss": 0.1274, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.497761570162175e-07, |
|
"loss": 0.1374, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.488274185419075e-07, |
|
"loss": 0.159, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.478786800675975e-07, |
|
"loss": 0.0687, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.469299415932877e-07, |
|
"loss": 0.1446, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.459812031189777e-07, |
|
"loss": 0.1047, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.450324646446678e-07, |
|
"loss": 0.1445, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.440837261703578e-07, |
|
"loss": 0.1033, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.431349876960478e-07, |
|
"loss": 0.1514, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.421862492217379e-07, |
|
"loss": 0.1245, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.41237510747428e-07, |
|
"loss": 0.2032, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.402887722731181e-07, |
|
"loss": 0.0935, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.393400337988081e-07, |
|
"loss": 0.1492, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.383912953244981e-07, |
|
"loss": 0.1011, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.374425568501883e-07, |
|
"loss": 0.1279, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.364938183758783e-07, |
|
"loss": 0.0842, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.355599039402295e-07, |
|
"loss": 0.1482, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.346111654659194e-07, |
|
"loss": 0.1444, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.336624269916095e-07, |
|
"loss": 0.1124, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.327285125559607e-07, |
|
"loss": 0.1552, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.317797740816507e-07, |
|
"loss": 0.113, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.308310356073408e-07, |
|
"loss": 0.1406, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.298822971330309e-07, |
|
"loss": 0.1464, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.289335586587209e-07, |
|
"loss": 0.0889, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.279848201844111e-07, |
|
"loss": 0.1028, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.270360817101011e-07, |
|
"loss": 0.1816, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.26087343235791e-07, |
|
"loss": 0.0867, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.251386047614812e-07, |
|
"loss": 0.1426, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.241898662871712e-07, |
|
"loss": 0.1404, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.232411278128613e-07, |
|
"loss": 0.0599, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.222923893385513e-07, |
|
"loss": 0.0753, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.213436508642414e-07, |
|
"loss": 0.1109, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.203949123899315e-07, |
|
"loss": 0.1836, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.194461739156215e-07, |
|
"loss": 0.0633, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.184974354413117e-07, |
|
"loss": 0.1392, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.175486969670017e-07, |
|
"loss": 0.11, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.165999584926916e-07, |
|
"loss": 0.1901, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.156512200183818e-07, |
|
"loss": 0.1399, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.147024815440718e-07, |
|
"loss": 0.1312, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.137537430697619e-07, |
|
"loss": 0.0884, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.12805004595452e-07, |
|
"loss": 0.1663, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.11856266121142e-07, |
|
"loss": 0.0728, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.109075276468321e-07, |
|
"loss": 0.1709, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.099587891725221e-07, |
|
"loss": 0.1476, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.090100506982122e-07, |
|
"loss": 0.0778, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.080613122239022e-07, |
|
"loss": 0.1081, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.071125737495922e-07, |
|
"loss": 0.1059, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.061638352752824e-07, |
|
"loss": 0.1143, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.052150968009724e-07, |
|
"loss": 0.1205, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.042663583266624e-07, |
|
"loss": 0.1479, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.033176198523526e-07, |
|
"loss": 0.0817, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.023688813780426e-07, |
|
"loss": 0.1398, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.014201429037327e-07, |
|
"loss": 0.1608, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.004714044294228e-07, |
|
"loss": 0.1345, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.995226659551127e-07, |
|
"loss": 0.1773, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.985739274808028e-07, |
|
"loss": 0.1769, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.976251890064928e-07, |
|
"loss": 0.1345, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.96676450532183e-07, |
|
"loss": 0.1293, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.95727712057873e-07, |
|
"loss": 0.1388, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.94778973583563e-07, |
|
"loss": 0.1004, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.938302351092532e-07, |
|
"loss": 0.1363, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.928814966349432e-07, |
|
"loss": 0.0933, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.919327581606333e-07, |
|
"loss": 0.1617, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.909840196863233e-07, |
|
"loss": 0.1085, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.900352812120133e-07, |
|
"loss": 0.1836, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.890865427377034e-07, |
|
"loss": 0.1464, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.881378042633935e-07, |
|
"loss": 0.1166, |
|
"step": 7552 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.871890657890836e-07, |
|
"loss": 0.1071, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.862403273147736e-07, |
|
"loss": 0.1564, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.852915888404636e-07, |
|
"loss": 0.0937, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.843428503661538e-07, |
|
"loss": 0.1095, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.833941118918438e-07, |
|
"loss": 0.1313, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.824453734175337e-07, |
|
"loss": 0.0819, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.814966349432239e-07, |
|
"loss": 0.1273, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.805478964689139e-07, |
|
"loss": 0.1079, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.79599157994604e-07, |
|
"loss": 0.0566, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.786504195202941e-07, |
|
"loss": 0.0882, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.777016810459841e-07, |
|
"loss": 0.1801, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.767529425716742e-07, |
|
"loss": 0.1006, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.758042040973643e-07, |
|
"loss": 0.1033, |
|
"step": 8384 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.748554656230544e-07, |
|
"loss": 0.0907, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.739067271487443e-07, |
|
"loss": 0.0548, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.729579886744343e-07, |
|
"loss": 0.1173, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.720092502001245e-07, |
|
"loss": 0.1415, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.710605117258145e-07, |
|
"loss": 0.1035, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.701117732515046e-07, |
|
"loss": 0.1016, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.691630347771947e-07, |
|
"loss": 0.111, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.682291203415458e-07, |
|
"loss": 0.1427, |
|
"step": 8896 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.672803818672359e-07, |
|
"loss": 0.1181, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.66331643392926e-07, |
|
"loss": 0.097, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.65382904918616e-07, |
|
"loss": 0.1367, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.64434166444306e-07, |
|
"loss": 0.1212, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.634854279699961e-07, |
|
"loss": 0.1393, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.625366894956861e-07, |
|
"loss": 0.1322, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.615879510213763e-07, |
|
"loss": 0.0786, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.606392125470663e-07, |
|
"loss": 0.1263, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.596904740727563e-07, |
|
"loss": 0.1537, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.587417355984465e-07, |
|
"loss": 0.1267, |
|
"step": 9536 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.577929971241365e-07, |
|
"loss": 0.1087, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.568442586498266e-07, |
|
"loss": 0.1097, |
|
"step": 9664 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.558955201755165e-07, |
|
"loss": 0.1423, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.549467817012066e-07, |
|
"loss": 0.0911, |
|
"step": 9792 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.539980432268967e-07, |
|
"loss": 0.0748, |
|
"step": 9856 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.530493047525867e-07, |
|
"loss": 0.112, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.521005662782769e-07, |
|
"loss": 0.1765, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.511518278039669e-07, |
|
"loss": 0.1539, |
|
"step": 10048 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.502030893296569e-07, |
|
"loss": 0.1382, |
|
"step": 10112 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.492543508553471e-07, |
|
"loss": 0.1146, |
|
"step": 10176 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.48305612381037e-07, |
|
"loss": 0.1589, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.473568739067271e-07, |
|
"loss": 0.1184, |
|
"step": 10304 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.464081354324172e-07, |
|
"loss": 0.0892, |
|
"step": 10368 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.454593969581072e-07, |
|
"loss": 0.1883, |
|
"step": 10432 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.445106584837973e-07, |
|
"loss": 0.1441, |
|
"step": 10496 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.435619200094873e-07, |
|
"loss": 0.1233, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.426131815351774e-07, |
|
"loss": 0.1376, |
|
"step": 10624 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.416644430608675e-07, |
|
"loss": 0.057, |
|
"step": 10688 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.407157045865575e-07, |
|
"loss": 0.0966, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.397669661122477e-07, |
|
"loss": 0.1026, |
|
"step": 10816 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.388182276379376e-07, |
|
"loss": 0.1161, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.378694891636276e-07, |
|
"loss": 0.1889, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.369207506893178e-07, |
|
"loss": 0.0808, |
|
"step": 11008 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.359868362536689e-07, |
|
"loss": 0.8512, |
|
"step": 11072 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.35038097779359e-07, |
|
"loss": 0.1572, |
|
"step": 11136 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.34089359305049e-07, |
|
"loss": 0.1883, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.331406208307391e-07, |
|
"loss": 0.0772, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.321918823564291e-07, |
|
"loss": 0.0924, |
|
"step": 11328 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.312431438821193e-07, |
|
"loss": 0.1308, |
|
"step": 11392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.302944054078093e-07, |
|
"loss": 0.2317, |
|
"step": 11456 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.293456669334992e-07, |
|
"loss": 0.1581, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.283969284591894e-07, |
|
"loss": 0.1068, |
|
"step": 11584 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.274481899848794e-07, |
|
"loss": 0.0793, |
|
"step": 11648 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.264994515105695e-07, |
|
"loss": 0.1407, |
|
"step": 11712 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.255507130362596e-07, |
|
"loss": 0.2219, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.246019745619496e-07, |
|
"loss": 0.1364, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.236532360876397e-07, |
|
"loss": 0.0842, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.227193216519909e-07, |
|
"loss": 0.0893, |
|
"step": 11968 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.217705831776809e-07, |
|
"loss": 0.1798, |
|
"step": 12032 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.20821844703371e-07, |
|
"loss": 0.1263, |
|
"step": 12096 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.19873106229061e-07, |
|
"loss": 0.1289, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.18924367754751e-07, |
|
"loss": 0.1712, |
|
"step": 12224 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.179756292804412e-07, |
|
"loss": 0.2207, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.170268908061312e-07, |
|
"loss": 0.1219, |
|
"step": 12352 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.160781523318212e-07, |
|
"loss": 0.1446, |
|
"step": 12416 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.151294138575113e-07, |
|
"loss": 0.076, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.141806753832014e-07, |
|
"loss": 0.1448, |
|
"step": 12544 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.132319369088915e-07, |
|
"loss": 0.1613, |
|
"step": 12608 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.122831984345814e-07, |
|
"loss": 0.1241, |
|
"step": 12672 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.113344599602715e-07, |
|
"loss": 0.1833, |
|
"step": 12736 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.103857214859616e-07, |
|
"loss": 0.2071, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.094369830116516e-07, |
|
"loss": 0.1467, |
|
"step": 12864 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.084882445373418e-07, |
|
"loss": 0.0889, |
|
"step": 12928 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.075395060630318e-07, |
|
"loss": 0.141, |
|
"step": 12992 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.065907675887218e-07, |
|
"loss": 0.106, |
|
"step": 13056 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.05642029114412e-07, |
|
"loss": 0.1359, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.04693290640102e-07, |
|
"loss": 0.2273, |
|
"step": 13184 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.03744552165792e-07, |
|
"loss": 0.1071, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.02795813691482e-07, |
|
"loss": 0.1106, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.018470752171721e-07, |
|
"loss": 0.1035, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.008983367428622e-07, |
|
"loss": 0.1459, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.999495982685522e-07, |
|
"loss": 0.1739, |
|
"step": 13504 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.990008597942423e-07, |
|
"loss": 0.116, |
|
"step": 13568 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.980521213199324e-07, |
|
"loss": 0.1017, |
|
"step": 13632 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.971033828456224e-07, |
|
"loss": 0.1414, |
|
"step": 13696 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.961546443713126e-07, |
|
"loss": 0.2619, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.952059058970026e-07, |
|
"loss": 0.1166, |
|
"step": 13824 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.942571674226925e-07, |
|
"loss": 0.142, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.933084289483827e-07, |
|
"loss": 0.2068, |
|
"step": 13952 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.923596904740727e-07, |
|
"loss": 0.1119, |
|
"step": 14016 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.914109519997628e-07, |
|
"loss": 0.143, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.904770375641139e-07, |
|
"loss": 0.1718, |
|
"step": 14144 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.89528299089804e-07, |
|
"loss": 0.0871, |
|
"step": 14208 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.88579560615494e-07, |
|
"loss": 0.1059, |
|
"step": 14272 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.876308221411842e-07, |
|
"loss": 0.1276, |
|
"step": 14336 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.866820836668742e-07, |
|
"loss": 0.1503, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.857333451925641e-07, |
|
"loss": 0.2201, |
|
"step": 14464 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.847846067182543e-07, |
|
"loss": 0.1737, |
|
"step": 14528 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.838358682439443e-07, |
|
"loss": 0.1133, |
|
"step": 14592 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.828871297696344e-07, |
|
"loss": 0.1378, |
|
"step": 14656 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.819383912953245e-07, |
|
"loss": 0.166, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.809896528210145e-07, |
|
"loss": 0.2001, |
|
"step": 14784 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.800409143467046e-07, |
|
"loss": 0.158, |
|
"step": 14848 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.790921758723946e-07, |
|
"loss": 0.1009, |
|
"step": 14912 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.781434373980848e-07, |
|
"loss": 0.2009, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.771946989237747e-07, |
|
"loss": 0.1437, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.762459604494647e-07, |
|
"loss": 0.1653, |
|
"step": 15104 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.752972219751549e-07, |
|
"loss": 0.1736, |
|
"step": 15168 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.743484835008449e-07, |
|
"loss": 0.2137, |
|
"step": 15232 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.73399745026535e-07, |
|
"loss": 0.1282, |
|
"step": 15296 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.724510065522251e-07, |
|
"loss": 0.1766, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.715022680779151e-07, |
|
"loss": 0.0876, |
|
"step": 15424 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.705535296036052e-07, |
|
"loss": 0.1338, |
|
"step": 15488 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.696047911292953e-07, |
|
"loss": 0.2006, |
|
"step": 15552 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.686560526549854e-07, |
|
"loss": 0.1505, |
|
"step": 15616 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.677073141806753e-07, |
|
"loss": 0.1223, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.667585757063653e-07, |
|
"loss": 0.1693, |
|
"step": 15744 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.658098372320555e-07, |
|
"loss": 0.1872, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.648610987577455e-07, |
|
"loss": 0.1109, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.639123602834355e-07, |
|
"loss": 0.1351, |
|
"step": 15936 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.629636218091257e-07, |
|
"loss": 0.1336, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.620148833348157e-07, |
|
"loss": 0.2153, |
|
"step": 16064 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.610661448605058e-07, |
|
"loss": 0.2274, |
|
"step": 16128 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.601174063861959e-07, |
|
"loss": 0.105, |
|
"step": 16192 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.591686679118858e-07, |
|
"loss": 0.1016, |
|
"step": 16256 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.582199294375759e-07, |
|
"loss": 0.116, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.57271190963266e-07, |
|
"loss": 0.1086, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.563224524889561e-07, |
|
"loss": 0.1153, |
|
"step": 16448 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.553737140146461e-07, |
|
"loss": 0.1044, |
|
"step": 16512 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.544249755403361e-07, |
|
"loss": 0.2228, |
|
"step": 16576 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.534762370660263e-07, |
|
"loss": 0.1002, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.525274985917163e-07, |
|
"loss": 0.1919, |
|
"step": 16704 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.515787601174063e-07, |
|
"loss": 0.13, |
|
"step": 16768 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.506300216430964e-07, |
|
"loss": 0.1165, |
|
"step": 16832 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.496812831687864e-07, |
|
"loss": 0.1385, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.487325446944765e-07, |
|
"loss": 0.1836, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.477838062201666e-07, |
|
"loss": 0.1852, |
|
"step": 17024 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.468498917845177e-07, |
|
"loss": 0.1721, |
|
"step": 17088 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.459011533102078e-07, |
|
"loss": 0.1164, |
|
"step": 17152 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.449524148358979e-07, |
|
"loss": 0.0989, |
|
"step": 17216 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.440036763615879e-07, |
|
"loss": 0.1524, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.430549378872781e-07, |
|
"loss": 0.174, |
|
"step": 17344 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.42106199412968e-07, |
|
"loss": 0.1509, |
|
"step": 17408 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.41157460938658e-07, |
|
"loss": 0.1264, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.402087224643482e-07, |
|
"loss": 0.1148, |
|
"step": 17536 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.392599839900382e-07, |
|
"loss": 0.1227, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.383112455157283e-07, |
|
"loss": 0.1462, |
|
"step": 17664 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.373625070414183e-07, |
|
"loss": 0.1673, |
|
"step": 17728 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.364137685671084e-07, |
|
"loss": 0.1274, |
|
"step": 17792 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.354650300927985e-07, |
|
"loss": 0.162, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.345162916184885e-07, |
|
"loss": 0.148, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.335675531441787e-07, |
|
"loss": 0.157, |
|
"step": 17984 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.326188146698686e-07, |
|
"loss": 0.1419, |
|
"step": 18048 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.316700761955586e-07, |
|
"loss": 0.1916, |
|
"step": 18112 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.307213377212488e-07, |
|
"loss": 0.1772, |
|
"step": 18176 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.297725992469388e-07, |
|
"loss": 0.1243, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.288238607726288e-07, |
|
"loss": 0.1738, |
|
"step": 18304 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.27875122298319e-07, |
|
"loss": 0.165, |
|
"step": 18368 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.26926383824009e-07, |
|
"loss": 0.0869, |
|
"step": 18432 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.259776453496991e-07, |
|
"loss": 0.1613, |
|
"step": 18496 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.25028906875389e-07, |
|
"loss": 0.1646, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.240801684010791e-07, |
|
"loss": 0.1232, |
|
"step": 18624 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.231314299267692e-07, |
|
"loss": 0.1695, |
|
"step": 18688 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.221826914524592e-07, |
|
"loss": 0.1305, |
|
"step": 18752 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.212339529781494e-07, |
|
"loss": 0.2047, |
|
"step": 18816 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.202852145038394e-07, |
|
"loss": 0.1881, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.193364760295294e-07, |
|
"loss": 0.1513, |
|
"step": 18944 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.183877375552196e-07, |
|
"loss": 0.1355, |
|
"step": 19008 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.174389990809096e-07, |
|
"loss": 0.1733, |
|
"step": 19072 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.164902606065996e-07, |
|
"loss": 0.1647, |
|
"step": 19136 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.155415221322897e-07, |
|
"loss": 0.132, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.145927836579797e-07, |
|
"loss": 0.2464, |
|
"step": 19264 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.136440451836698e-07, |
|
"loss": 0.1954, |
|
"step": 19328 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.126953067093598e-07, |
|
"loss": 0.1751, |
|
"step": 19392 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.1174656823505e-07, |
|
"loss": 0.1182, |
|
"step": 19456 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.1079782976074e-07, |
|
"loss": 0.0678, |
|
"step": 19520 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.0984909128643e-07, |
|
"loss": 0.1656, |
|
"step": 19584 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.089003528121202e-07, |
|
"loss": 0.219, |
|
"step": 19648 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.079516143378101e-07, |
|
"loss": 0.15, |
|
"step": 19712 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.070028758635002e-07, |
|
"loss": 0.1845, |
|
"step": 19776 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.060541373891903e-07, |
|
"loss": 0.1766, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.051053989148803e-07, |
|
"loss": 0.1355, |
|
"step": 19904 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.041566604405704e-07, |
|
"loss": 0.1268, |
|
"step": 19968 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.032079219662605e-07, |
|
"loss": 0.191, |
|
"step": 20032 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.022591834919506e-07, |
|
"loss": 0.2032, |
|
"step": 20096 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.013104450176406e-07, |
|
"loss": 0.2632, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.003617065433306e-07, |
|
"loss": 0.2028, |
|
"step": 20224 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.994129680690208e-07, |
|
"loss": 0.135, |
|
"step": 20288 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.984642295947107e-07, |
|
"loss": 0.1635, |
|
"step": 20352 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.975154911204007e-07, |
|
"loss": 0.1986, |
|
"step": 20416 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.965667526460909e-07, |
|
"loss": 0.1786, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.956180141717809e-07, |
|
"loss": 0.116, |
|
"step": 20544 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.94669275697471e-07, |
|
"loss": 0.2206, |
|
"step": 20608 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.937205372231611e-07, |
|
"loss": 0.0995, |
|
"step": 20672 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.927717987488511e-07, |
|
"loss": 0.1634, |
|
"step": 20736 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.918230602745412e-07, |
|
"loss": 0.1429, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.908743218002312e-07, |
|
"loss": 0.2244, |
|
"step": 20864 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.899255833259213e-07, |
|
"loss": 0.1666, |
|
"step": 20928 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.889768448516113e-07, |
|
"loss": 0.1413, |
|
"step": 20992 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.880281063773013e-07, |
|
"loss": 0.1824, |
|
"step": 21056 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.870941919416525e-07, |
|
"loss": 0.1737, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.861454534673426e-07, |
|
"loss": 0.171, |
|
"step": 21184 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.851967149930327e-07, |
|
"loss": 0.1571, |
|
"step": 21248 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.842479765187227e-07, |
|
"loss": 0.115, |
|
"step": 21312 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.832992380444128e-07, |
|
"loss": 0.2192, |
|
"step": 21376 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.823504995701029e-07, |
|
"loss": 0.146, |
|
"step": 21440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.81401761095793e-07, |
|
"loss": 0.1586, |
|
"step": 21504 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.804530226214829e-07, |
|
"loss": 0.1035, |
|
"step": 21568 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.795191081858341e-07, |
|
"loss": 0.1898, |
|
"step": 21632 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.785703697115241e-07, |
|
"loss": 0.2901, |
|
"step": 21696 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.776216312372143e-07, |
|
"loss": 0.1986, |
|
"step": 21760 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.766728927629043e-07, |
|
"loss": 0.1774, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.757241542885943e-07, |
|
"loss": 0.1945, |
|
"step": 21888 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.747754158142845e-07, |
|
"loss": 0.1453, |
|
"step": 21952 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.738266773399745e-07, |
|
"loss": 0.1882, |
|
"step": 22016 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.728779388656646e-07, |
|
"loss": 0.2266, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.719292003913545e-07, |
|
"loss": 0.2192, |
|
"step": 22144 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.709804619170446e-07, |
|
"loss": 0.1191, |
|
"step": 22208 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.700317234427347e-07, |
|
"loss": 0.1323, |
|
"step": 22272 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.690829849684247e-07, |
|
"loss": 0.2345, |
|
"step": 22336 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.681342464941149e-07, |
|
"loss": 0.122, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.671855080198049e-07, |
|
"loss": 0.1643, |
|
"step": 22464 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.662367695454949e-07, |
|
"loss": 0.1756, |
|
"step": 22528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.653028551098461e-07, |
|
"loss": 0.0794, |
|
"step": 22592 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.643541166355362e-07, |
|
"loss": 0.0636, |
|
"step": 22656 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.634053781612263e-07, |
|
"loss": 0.1252, |
|
"step": 22720 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.624566396869162e-07, |
|
"loss": 0.1114, |
|
"step": 22784 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.615079012126063e-07, |
|
"loss": 0.1799, |
|
"step": 22848 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.605591627382963e-07, |
|
"loss": 0.1088, |
|
"step": 22912 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.596104242639865e-07, |
|
"loss": 0.0468, |
|
"step": 22976 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.586616857896765e-07, |
|
"loss": 0.0891, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.577129473153665e-07, |
|
"loss": 0.0967, |
|
"step": 23104 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.567642088410567e-07, |
|
"loss": 0.102, |
|
"step": 23168 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.558154703667467e-07, |
|
"loss": 0.0713, |
|
"step": 23232 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.548667318924368e-07, |
|
"loss": 0.0656, |
|
"step": 23296 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.539179934181268e-07, |
|
"loss": 0.1185, |
|
"step": 23360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.529692549438168e-07, |
|
"loss": 0.0787, |
|
"step": 23424 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.520205164695069e-07, |
|
"loss": 0.0442, |
|
"step": 23488 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.51071777995197e-07, |
|
"loss": 0.0793, |
|
"step": 23552 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.501230395208871e-07, |
|
"loss": 0.1479, |
|
"step": 23616 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.491743010465771e-07, |
|
"loss": 0.0891, |
|
"step": 23680 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.482255625722671e-07, |
|
"loss": 0.0879, |
|
"step": 23744 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.472768240979573e-07, |
|
"loss": 0.0888, |
|
"step": 23808 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.463429096623084e-07, |
|
"loss": 0.1321, |
|
"step": 23872 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.453941711879985e-07, |
|
"loss": 0.0842, |
|
"step": 23936 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.444454327136884e-07, |
|
"loss": 0.1128, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.434966942393785e-07, |
|
"loss": 0.1089, |
|
"step": 24064 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.425479557650686e-07, |
|
"loss": 0.078, |
|
"step": 24128 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.415992172907586e-07, |
|
"loss": 0.1481, |
|
"step": 24192 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.406504788164487e-07, |
|
"loss": 0.0639, |
|
"step": 24256 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.397017403421388e-07, |
|
"loss": 0.1182, |
|
"step": 24320 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.387530018678289e-07, |
|
"loss": 0.0864, |
|
"step": 24384 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.378042633935189e-07, |
|
"loss": 0.0626, |
|
"step": 24448 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.36855524919209e-07, |
|
"loss": 0.0763, |
|
"step": 24512 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.35906786444899e-07, |
|
"loss": 0.1031, |
|
"step": 24576 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.34958047970589e-07, |
|
"loss": 0.0772, |
|
"step": 24640 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.340093094962791e-07, |
|
"loss": 0.1173, |
|
"step": 24704 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.330605710219692e-07, |
|
"loss": 0.1254, |
|
"step": 24768 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.321118325476592e-07, |
|
"loss": 0.1646, |
|
"step": 24832 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.311630940733493e-07, |
|
"loss": 0.1065, |
|
"step": 24896 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.302143555990394e-07, |
|
"loss": 0.0916, |
|
"step": 24960 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.292656171247295e-07, |
|
"loss": 0.0769, |
|
"step": 25024 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.283168786504195e-07, |
|
"loss": 0.05, |
|
"step": 25088 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.273681401761095e-07, |
|
"loss": 0.0748, |
|
"step": 25152 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.264194017017996e-07, |
|
"loss": 0.089, |
|
"step": 25216 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.254706632274896e-07, |
|
"loss": 0.0951, |
|
"step": 25280 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.245219247531798e-07, |
|
"loss": 0.0955, |
|
"step": 25344 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.235731862788698e-07, |
|
"loss": 0.1024, |
|
"step": 25408 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.226244478045598e-07, |
|
"loss": 0.0898, |
|
"step": 25472 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.216757093302499e-07, |
|
"loss": 0.1351, |
|
"step": 25536 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.2072697085594e-07, |
|
"loss": 0.0797, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.197782323816301e-07, |
|
"loss": 0.0459, |
|
"step": 25664 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.1882949390732e-07, |
|
"loss": 0.0463, |
|
"step": 25728 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.178807554330101e-07, |
|
"loss": 0.0487, |
|
"step": 25792 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.169320169587002e-07, |
|
"loss": 0.0971, |
|
"step": 25856 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.159832784843902e-07, |
|
"loss": 0.0793, |
|
"step": 25920 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.150345400100804e-07, |
|
"loss": 0.0507, |
|
"step": 25984 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.140858015357704e-07, |
|
"loss": 0.1244, |
|
"step": 26048 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.131370630614604e-07, |
|
"loss": 0.1191, |
|
"step": 26112 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.121883245871506e-07, |
|
"loss": 0.1716, |
|
"step": 26176 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.112395861128406e-07, |
|
"loss": 0.1052, |
|
"step": 26240 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.102908476385305e-07, |
|
"loss": 0.1227, |
|
"step": 26304 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.093421091642206e-07, |
|
"loss": 0.0686, |
|
"step": 26368 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.083933706899107e-07, |
|
"loss": 0.1174, |
|
"step": 26432 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.074446322156008e-07, |
|
"loss": 0.0606, |
|
"step": 26496 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.064958937412908e-07, |
|
"loss": 0.1176, |
|
"step": 26560 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.055471552669809e-07, |
|
"loss": 0.0385, |
|
"step": 26624 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.04598416792671e-07, |
|
"loss": 0.0468, |
|
"step": 26688 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.03649678318361e-07, |
|
"loss": 0.1169, |
|
"step": 26752 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.027009398440512e-07, |
|
"loss": 0.1115, |
|
"step": 26816 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.017522013697411e-07, |
|
"loss": 0.1258, |
|
"step": 26880 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.008034628954311e-07, |
|
"loss": 0.1025, |
|
"step": 26944 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5.998547244211213e-07, |
|
"loss": 0.0834, |
|
"step": 27008 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5.989059859468113e-07, |
|
"loss": 0.0421, |
|
"step": 27072 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5.979572474725014e-07, |
|
"loss": 0.0936, |
|
"step": 27136 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5.970085089981914e-07, |
|
"loss": 0.1253, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5.960597705238815e-07, |
|
"loss": 0.1037, |
|
"step": 27264 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.951110320495716e-07, |
|
"loss": 0.0553, |
|
"step": 27328 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.941622935752616e-07, |
|
"loss": 0.0932, |
|
"step": 27392 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.932135551009518e-07, |
|
"loss": 0.0359, |
|
"step": 27456 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.922648166266417e-07, |
|
"loss": 0.0564, |
|
"step": 27520 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.913160781523317e-07, |
|
"loss": 0.0475, |
|
"step": 27584 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.903673396780219e-07, |
|
"loss": 0.1187, |
|
"step": 27648 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.894186012037119e-07, |
|
"loss": 0.102, |
|
"step": 27712 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.88469862729402e-07, |
|
"loss": 0.1048, |
|
"step": 27776 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.875211242550921e-07, |
|
"loss": 0.1066, |
|
"step": 27840 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.865723857807821e-07, |
|
"loss": 0.1039, |
|
"step": 27904 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.856236473064722e-07, |
|
"loss": 0.1027, |
|
"step": 27968 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.846749088321621e-07, |
|
"loss": 0.0535, |
|
"step": 28032 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.837261703578523e-07, |
|
"loss": 0.0737, |
|
"step": 28096 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.827774318835423e-07, |
|
"loss": 0.094, |
|
"step": 28160 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.818286934092323e-07, |
|
"loss": 0.1002, |
|
"step": 28224 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.808799549349225e-07, |
|
"loss": 0.0618, |
|
"step": 28288 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.799312164606125e-07, |
|
"loss": 0.1356, |
|
"step": 28352 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.789824779863025e-07, |
|
"loss": 0.0802, |
|
"step": 28416 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.780337395119927e-07, |
|
"loss": 0.1333, |
|
"step": 28480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.770850010376827e-07, |
|
"loss": 0.0687, |
|
"step": 28544 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.761362625633727e-07, |
|
"loss": 0.0562, |
|
"step": 28608 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.751875240890628e-07, |
|
"loss": 0.0955, |
|
"step": 28672 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.742387856147528e-07, |
|
"loss": 0.0915, |
|
"step": 28736 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.732900471404429e-07, |
|
"loss": 0.0632, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.723413086661329e-07, |
|
"loss": 0.1047, |
|
"step": 28864 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.713925701918231e-07, |
|
"loss": 0.0981, |
|
"step": 28928 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.704438317175131e-07, |
|
"loss": 0.0954, |
|
"step": 28992 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.694950932432031e-07, |
|
"loss": 0.0572, |
|
"step": 29056 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.685463547688933e-07, |
|
"loss": 0.1177, |
|
"step": 29120 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.675976162945832e-07, |
|
"loss": 0.0748, |
|
"step": 29184 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.666488778202733e-07, |
|
"loss": 0.0783, |
|
"step": 29248 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.657001393459634e-07, |
|
"loss": 0.0556, |
|
"step": 29312 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.647514008716534e-07, |
|
"loss": 0.0592, |
|
"step": 29376 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.638026623973435e-07, |
|
"loss": 0.0917, |
|
"step": 29440 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.628539239230336e-07, |
|
"loss": 0.0847, |
|
"step": 29504 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.619051854487237e-07, |
|
"loss": 0.1253, |
|
"step": 29568 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.609564469744137e-07, |
|
"loss": 0.1362, |
|
"step": 29632 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.600077085001037e-07, |
|
"loss": 0.0799, |
|
"step": 29696 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.590589700257938e-07, |
|
"loss": 0.1235, |
|
"step": 29760 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.581102315514838e-07, |
|
"loss": 0.056, |
|
"step": 29824 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.571614930771739e-07, |
|
"loss": 0.0952, |
|
"step": 29888 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.56212754602864e-07, |
|
"loss": 0.102, |
|
"step": 29952 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.55264016128554e-07, |
|
"loss": 0.105, |
|
"step": 30016 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.543152776542441e-07, |
|
"loss": 0.1049, |
|
"step": 30080 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.533665391799342e-07, |
|
"loss": 0.0722, |
|
"step": 30144 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.524178007056243e-07, |
|
"loss": 0.0389, |
|
"step": 30208 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.514690622313143e-07, |
|
"loss": 0.0589, |
|
"step": 30272 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.505203237570043e-07, |
|
"loss": 0.1049, |
|
"step": 30336 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.495715852826944e-07, |
|
"loss": 0.1013, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.486228468083844e-07, |
|
"loss": 0.0866, |
|
"step": 30464 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.476741083340744e-07, |
|
"loss": 0.1195, |
|
"step": 30528 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.467253698597646e-07, |
|
"loss": 0.049, |
|
"step": 30592 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.457766313854546e-07, |
|
"loss": 0.0521, |
|
"step": 30656 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.448278929111447e-07, |
|
"loss": 0.0658, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.438791544368348e-07, |
|
"loss": 0.0735, |
|
"step": 30784 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.429304159625248e-07, |
|
"loss": 0.1463, |
|
"step": 30848 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.419816774882148e-07, |
|
"loss": 0.1333, |
|
"step": 30912 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.410329390139049e-07, |
|
"loss": 0.0795, |
|
"step": 30976 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.40084200539595e-07, |
|
"loss": 0.0599, |
|
"step": 31040 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.39135462065285e-07, |
|
"loss": 0.1246, |
|
"step": 31104 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.381867235909751e-07, |
|
"loss": 0.0802, |
|
"step": 31168 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.372379851166652e-07, |
|
"loss": 0.0565, |
|
"step": 31232 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.362892466423552e-07, |
|
"loss": 0.0646, |
|
"step": 31296 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.353405081680454e-07, |
|
"loss": 0.0712, |
|
"step": 31360 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.343917696937354e-07, |
|
"loss": 0.1214, |
|
"step": 31424 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.334430312194253e-07, |
|
"loss": 0.0654, |
|
"step": 31488 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.324942927451154e-07, |
|
"loss": 0.1136, |
|
"step": 31552 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.315455542708055e-07, |
|
"loss": 0.0907, |
|
"step": 31616 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.305968157964956e-07, |
|
"loss": 0.0797, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.296480773221856e-07, |
|
"loss": 0.0739, |
|
"step": 31744 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.286993388478757e-07, |
|
"loss": 0.0763, |
|
"step": 31808 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.277654244122268e-07, |
|
"loss": 0.0724, |
|
"step": 31872 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.26816685937917e-07, |
|
"loss": 0.0855, |
|
"step": 31936 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.25867947463607e-07, |
|
"loss": 0.0873, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.24919208989297e-07, |
|
"loss": 0.0688, |
|
"step": 32064 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.239704705149871e-07, |
|
"loss": 0.0945, |
|
"step": 32128 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.230217320406771e-07, |
|
"loss": 0.1052, |
|
"step": 32192 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.220729935663672e-07, |
|
"loss": 0.057, |
|
"step": 32256 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.211242550920573e-07, |
|
"loss": 0.1135, |
|
"step": 32320 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.201755166177473e-07, |
|
"loss": 0.0806, |
|
"step": 32384 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.192267781434374e-07, |
|
"loss": 0.0931, |
|
"step": 32448 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.182780396691274e-07, |
|
"loss": 0.0863, |
|
"step": 32512 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.173293011948176e-07, |
|
"loss": 0.0623, |
|
"step": 32576 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.163805627205076e-07, |
|
"loss": 0.0643, |
|
"step": 32640 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.154318242461975e-07, |
|
"loss": 0.0693, |
|
"step": 32704 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.144830857718877e-07, |
|
"loss": 0.0433, |
|
"step": 32768 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.135343472975777e-07, |
|
"loss": 0.0808, |
|
"step": 32832 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.125856088232677e-07, |
|
"loss": 0.0939, |
|
"step": 32896 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.116368703489579e-07, |
|
"loss": 0.0852, |
|
"step": 32960 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.106881318746479e-07, |
|
"loss": 0.155, |
|
"step": 33024 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.09739393400338e-07, |
|
"loss": 0.127, |
|
"step": 33088 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.087906549260281e-07, |
|
"loss": 0.0432, |
|
"step": 33152 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.078419164517181e-07, |
|
"loss": 0.1161, |
|
"step": 33216 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.068931779774081e-07, |
|
"loss": 0.0877, |
|
"step": 33280 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.059444395030981e-07, |
|
"loss": 0.0579, |
|
"step": 33344 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.049957010287883e-07, |
|
"loss": 0.0408, |
|
"step": 33408 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.040469625544783e-07, |
|
"loss": 0.062, |
|
"step": 33472 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.030982240801683e-07, |
|
"loss": 0.1147, |
|
"step": 33536 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.021494856058585e-07, |
|
"loss": 0.1064, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.012007471315485e-07, |
|
"loss": 0.1084, |
|
"step": 33664 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.002520086572386e-07, |
|
"loss": 0.1422, |
|
"step": 33728 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.993032701829287e-07, |
|
"loss": 0.1086, |
|
"step": 33792 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.983545317086186e-07, |
|
"loss": 0.0744, |
|
"step": 33856 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.974057932343087e-07, |
|
"loss": 0.1086, |
|
"step": 33920 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.964570547599988e-07, |
|
"loss": 0.1106, |
|
"step": 33984 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.955083162856888e-07, |
|
"loss": 0.0761, |
|
"step": 34048 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.945595778113789e-07, |
|
"loss": 0.1153, |
|
"step": 34112 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.93610839337069e-07, |
|
"loss": 0.0666, |
|
"step": 34176 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.92662100862759e-07, |
|
"loss": 0.095, |
|
"step": 34240 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.917133623884491e-07, |
|
"loss": 0.1187, |
|
"step": 34304 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.907646239141392e-07, |
|
"loss": 0.0593, |
|
"step": 34368 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.898158854398292e-07, |
|
"loss": 0.0832, |
|
"step": 34432 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.888671469655192e-07, |
|
"loss": 0.0724, |
|
"step": 34496 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.879184084912093e-07, |
|
"loss": 0.0552, |
|
"step": 34560 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.869696700168994e-07, |
|
"loss": 0.0912, |
|
"step": 34624 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.860209315425894e-07, |
|
"loss": 0.0644, |
|
"step": 34688 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.850721930682795e-07, |
|
"loss": 0.0813, |
|
"step": 34752 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.841234545939696e-07, |
|
"loss": 0.1482, |
|
"step": 34816 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.831747161196596e-07, |
|
"loss": 0.1192, |
|
"step": 34880 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.822259776453497e-07, |
|
"loss": 0.09, |
|
"step": 34944 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.812772391710397e-07, |
|
"loss": 0.0527, |
|
"step": 35008 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.803285006967298e-07, |
|
"loss": 0.0867, |
|
"step": 35072 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.793797622224199e-07, |
|
"loss": 0.0483, |
|
"step": 35136 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.784310237481099e-07, |
|
"loss": 0.1053, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.774822852738e-07, |
|
"loss": 0.1272, |
|
"step": 35264 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.7653354679949004e-07, |
|
"loss": 0.1117, |
|
"step": 35328 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.7558480832518013e-07, |
|
"loss": 0.1487, |
|
"step": 35392 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.746360698508701e-07, |
|
"loss": 0.0671, |
|
"step": 35456 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.736873313765602e-07, |
|
"loss": 0.0657, |
|
"step": 35520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.7273859290225024e-07, |
|
"loss": 0.1076, |
|
"step": 35584 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.7178985442794033e-07, |
|
"loss": 0.1425, |
|
"step": 35648 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.708411159536304e-07, |
|
"loss": 0.088, |
|
"step": 35712 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.698923774793204e-07, |
|
"loss": 0.1325, |
|
"step": 35776 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.689436390050105e-07, |
|
"loss": 0.1102, |
|
"step": 35840 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.679949005307006e-07, |
|
"loss": 0.126, |
|
"step": 35904 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.6704616205639063e-07, |
|
"loss": 0.074, |
|
"step": 35968 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.661122476207417e-07, |
|
"loss": 0.1062, |
|
"step": 36032 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.651635091464318e-07, |
|
"loss": 0.0812, |
|
"step": 36096 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.642147706721219e-07, |
|
"loss": 0.1062, |
|
"step": 36160 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.6326603219781194e-07, |
|
"loss": 0.0861, |
|
"step": 36224 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.6231729372350203e-07, |
|
"loss": 0.0879, |
|
"step": 36288 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.6136855524919207e-07, |
|
"loss": 0.0649, |
|
"step": 36352 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.604198167748821e-07, |
|
"loss": 0.0695, |
|
"step": 36416 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.594710783005722e-07, |
|
"loss": 0.0705, |
|
"step": 36480 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.5852233982626224e-07, |
|
"loss": 0.1642, |
|
"step": 36544 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.5757360135195233e-07, |
|
"loss": 0.0944, |
|
"step": 36608 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.5662486287764237e-07, |
|
"loss": 0.0872, |
|
"step": 36672 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.556909484419935e-07, |
|
"loss": 0.105, |
|
"step": 36736 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.547422099676836e-07, |
|
"loss": 0.0825, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.5379347149337364e-07, |
|
"loss": 0.1063, |
|
"step": 36864 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.528447330190637e-07, |
|
"loss": 0.0384, |
|
"step": 36928 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.518959945447537e-07, |
|
"loss": 0.0765, |
|
"step": 36992 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.509472560704438e-07, |
|
"loss": 0.1533, |
|
"step": 37056 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.499985175961339e-07, |
|
"loss": 0.0634, |
|
"step": 37120 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.490497791218239e-07, |
|
"loss": 0.074, |
|
"step": 37184 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.48101040647514e-07, |
|
"loss": 0.0641, |
|
"step": 37248 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.4715230217320407e-07, |
|
"loss": 0.0927, |
|
"step": 37312 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.462035636988941e-07, |
|
"loss": 0.0837, |
|
"step": 37376 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.452548252245842e-07, |
|
"loss": 0.1142, |
|
"step": 37440 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.443060867502742e-07, |
|
"loss": 0.0672, |
|
"step": 37504 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.4335734827596427e-07, |
|
"loss": 0.0726, |
|
"step": 37568 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.4240860980165436e-07, |
|
"loss": 0.1104, |
|
"step": 37632 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.414598713273444e-07, |
|
"loss": 0.1157, |
|
"step": 37696 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.405111328530345e-07, |
|
"loss": 0.0694, |
|
"step": 37760 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.3956239437872447e-07, |
|
"loss": 0.0905, |
|
"step": 37824 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.3861365590441456e-07, |
|
"loss": 0.134, |
|
"step": 37888 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.3766491743010465e-07, |
|
"loss": 0.0478, |
|
"step": 37952 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.367161789557947e-07, |
|
"loss": 0.1028, |
|
"step": 38016 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.357674404814848e-07, |
|
"loss": 0.1023, |
|
"step": 38080 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.348187020071748e-07, |
|
"loss": 0.1258, |
|
"step": 38144 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.3386996353286486e-07, |
|
"loss": 0.0619, |
|
"step": 38208 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.3292122505855495e-07, |
|
"loss": 0.1135, |
|
"step": 38272 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.31972486584245e-07, |
|
"loss": 0.062, |
|
"step": 38336 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.31023748109935e-07, |
|
"loss": 0.0513, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.300750096356251e-07, |
|
"loss": 0.0451, |
|
"step": 38464 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.2912627116131515e-07, |
|
"loss": 0.1072, |
|
"step": 38528 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.2817753268700524e-07, |
|
"loss": 0.0962, |
|
"step": 38592 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.2722879421269533e-07, |
|
"loss": 0.0748, |
|
"step": 38656 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.262800557383853e-07, |
|
"loss": 0.1036, |
|
"step": 38720 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.253313172640754e-07, |
|
"loss": 0.0929, |
|
"step": 38784 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.2438257878976545e-07, |
|
"loss": 0.145, |
|
"step": 38848 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.2343384031545554e-07, |
|
"loss": 0.0449, |
|
"step": 38912 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.2248510184114563e-07, |
|
"loss": 0.1341, |
|
"step": 38976 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.215363633668356e-07, |
|
"loss": 0.1309, |
|
"step": 39040 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.205876248925257e-07, |
|
"loss": 0.0446, |
|
"step": 39104 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.1963888641821574e-07, |
|
"loss": 0.0743, |
|
"step": 39168 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.1870497198256694e-07, |
|
"loss": 0.0653, |
|
"step": 39232 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.1775623350825693e-07, |
|
"loss": 0.0822, |
|
"step": 39296 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.16807495033947e-07, |
|
"loss": 0.0777, |
|
"step": 39360 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.158587565596371e-07, |
|
"loss": 0.126, |
|
"step": 39424 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1491001808532715e-07, |
|
"loss": 0.0348, |
|
"step": 39488 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1396127961101724e-07, |
|
"loss": 0.0783, |
|
"step": 39552 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.130125411367072e-07, |
|
"loss": 0.0846, |
|
"step": 39616 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.120638026623973e-07, |
|
"loss": 0.0792, |
|
"step": 39680 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.111150641880874e-07, |
|
"loss": 0.0684, |
|
"step": 39744 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.1016632571377744e-07, |
|
"loss": 0.0712, |
|
"step": 39808 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.092175872394675e-07, |
|
"loss": 0.0962, |
|
"step": 39872 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.0826884876515757e-07, |
|
"loss": 0.0995, |
|
"step": 39936 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.073201102908476e-07, |
|
"loss": 0.0408, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.063713718165377e-07, |
|
"loss": 0.0742, |
|
"step": 40064 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.0542263334222774e-07, |
|
"loss": 0.0876, |
|
"step": 40128 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.044738948679178e-07, |
|
"loss": 0.0912, |
|
"step": 40192 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.0352515639360786e-07, |
|
"loss": 0.0635, |
|
"step": 40256 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.025764179192979e-07, |
|
"loss": 0.0641, |
|
"step": 40320 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.01627679444988e-07, |
|
"loss": 0.1015, |
|
"step": 40384 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.006789409706781e-07, |
|
"loss": 0.0551, |
|
"step": 40448 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.9973020249636807e-07, |
|
"loss": 0.1139, |
|
"step": 40512 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.9878146402205816e-07, |
|
"loss": 0.1007, |
|
"step": 40576 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.978475495864093e-07, |
|
"loss": 0.1153, |
|
"step": 40640 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.969136351507604e-07, |
|
"loss": 0.0426, |
|
"step": 40704 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.959648966764505e-07, |
|
"loss": 0.1175, |
|
"step": 40768 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.950161582021406e-07, |
|
"loss": 0.0451, |
|
"step": 40832 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.940674197278306e-07, |
|
"loss": 0.1535, |
|
"step": 40896 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.931186812535207e-07, |
|
"loss": 0.0703, |
|
"step": 40960 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.9216994277921075e-07, |
|
"loss": 0.1161, |
|
"step": 41024 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.912212043049008e-07, |
|
"loss": 0.0631, |
|
"step": 41088 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.902724658305909e-07, |
|
"loss": 0.086, |
|
"step": 41152 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.893237273562809e-07, |
|
"loss": 0.0811, |
|
"step": 41216 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.88374988881971e-07, |
|
"loss": 0.097, |
|
"step": 41280 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.8742625040766105e-07, |
|
"loss": 0.0747, |
|
"step": 41344 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.864775119333511e-07, |
|
"loss": 0.0843, |
|
"step": 41408 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.855287734590412e-07, |
|
"loss": 0.0942, |
|
"step": 41472 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.845800349847312e-07, |
|
"loss": 0.1175, |
|
"step": 41536 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.836312965104213e-07, |
|
"loss": 0.0899, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.8268255803611134e-07, |
|
"loss": 0.1071, |
|
"step": 41664 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.817338195618014e-07, |
|
"loss": 0.0559, |
|
"step": 41728 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.8078508108749147e-07, |
|
"loss": 0.1248, |
|
"step": 41792 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.7983634261318156e-07, |
|
"loss": 0.0601, |
|
"step": 41856 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.7888760413887155e-07, |
|
"loss": 0.049, |
|
"step": 41920 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.7793886566456164e-07, |
|
"loss": 0.0611, |
|
"step": 41984 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.769901271902517e-07, |
|
"loss": 0.0884, |
|
"step": 42048 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.7604138871594176e-07, |
|
"loss": 0.0759, |
|
"step": 42112 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.7509265024163185e-07, |
|
"loss": 0.1484, |
|
"step": 42176 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.7414391176732184e-07, |
|
"loss": 0.0447, |
|
"step": 42240 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.7319517329301193e-07, |
|
"loss": 0.0491, |
|
"step": 42304 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.7224643481870197e-07, |
|
"loss": 0.053, |
|
"step": 42368 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.7129769634439206e-07, |
|
"loss": 0.0832, |
|
"step": 42432 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.7034895787008215e-07, |
|
"loss": 0.0677, |
|
"step": 42496 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.6940021939577213e-07, |
|
"loss": 0.0599, |
|
"step": 42560 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.684514809214622e-07, |
|
"loss": 0.1093, |
|
"step": 42624 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.675027424471523e-07, |
|
"loss": 0.116, |
|
"step": 42688 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.6655400397284235e-07, |
|
"loss": 0.1072, |
|
"step": 42752 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.656052654985324e-07, |
|
"loss": 0.1406, |
|
"step": 42816 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.6465652702422243e-07, |
|
"loss": 0.0339, |
|
"step": 42880 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.637077885499125e-07, |
|
"loss": 0.1058, |
|
"step": 42944 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.627590500756026e-07, |
|
"loss": 0.1187, |
|
"step": 43008 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.6181031160129265e-07, |
|
"loss": 0.0695, |
|
"step": 43072 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.608615731269827e-07, |
|
"loss": 0.0938, |
|
"step": 43136 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.599128346526727e-07, |
|
"loss": 0.08, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.589640961783628e-07, |
|
"loss": 0.0911, |
|
"step": 43264 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.580153577040529e-07, |
|
"loss": 0.0983, |
|
"step": 43328 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.5706661922974294e-07, |
|
"loss": 0.115, |
|
"step": 43392 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.56117880755433e-07, |
|
"loss": 0.0913, |
|
"step": 43456 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.5516914228112307e-07, |
|
"loss": 0.0739, |
|
"step": 43520 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.542204038068131e-07, |
|
"loss": 0.1011, |
|
"step": 43584 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.532716653325032e-07, |
|
"loss": 0.0595, |
|
"step": 43648 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.523229268581932e-07, |
|
"loss": 0.1025, |
|
"step": 43712 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.5137418838388327e-07, |
|
"loss": 0.1022, |
|
"step": 43776 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.5042544990957336e-07, |
|
"loss": 0.0641, |
|
"step": 43840 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.494767114352634e-07, |
|
"loss": 0.1397, |
|
"step": 43904 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.485279729609535e-07, |
|
"loss": 0.136, |
|
"step": 43968 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.475792344866435e-07, |
|
"loss": 0.0946, |
|
"step": 44032 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.4663049601233357e-07, |
|
"loss": 0.0963, |
|
"step": 44096 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.4568175753802366e-07, |
|
"loss": 0.0511, |
|
"step": 44160 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.447330190637137e-07, |
|
"loss": 0.0665, |
|
"step": 44224 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.437842805894038e-07, |
|
"loss": 0.0735, |
|
"step": 44288 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.428355421150938e-07, |
|
"loss": 0.0443, |
|
"step": 44352 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.4188680364078386e-07, |
|
"loss": 0.1191, |
|
"step": 44416 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.4093806516647395e-07, |
|
"loss": 0.0886, |
|
"step": 44480 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.39989326692164e-07, |
|
"loss": 0.0917, |
|
"step": 44544 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.390405882178541e-07, |
|
"loss": 0.0751, |
|
"step": 44608 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.380918497435441e-07, |
|
"loss": 0.0692, |
|
"step": 44672 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.3714311126923416e-07, |
|
"loss": 0.1101, |
|
"step": 44736 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.3619437279492425e-07, |
|
"loss": 0.1307, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3524563432061434e-07, |
|
"loss": 0.1512, |
|
"step": 44864 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.342968958463043e-07, |
|
"loss": 0.0731, |
|
"step": 44928 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.333481573719944e-07, |
|
"loss": 0.0688, |
|
"step": 44992 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3239941889768445e-07, |
|
"loss": 0.0476, |
|
"step": 45056 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.3145068042337454e-07, |
|
"loss": 0.0511, |
|
"step": 45120 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.3050194194906463e-07, |
|
"loss": 0.0658, |
|
"step": 45184 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.295532034747546e-07, |
|
"loss": 0.0714, |
|
"step": 45248 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.286044650004447e-07, |
|
"loss": 0.0491, |
|
"step": 45312 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.2765572652613474e-07, |
|
"loss": 0.0355, |
|
"step": 45376 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.2670698805182484e-07, |
|
"loss": 0.0322, |
|
"step": 45440 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.257582495775149e-07, |
|
"loss": 0.034, |
|
"step": 45504 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.248095111032049e-07, |
|
"loss": 0.0448, |
|
"step": 45568 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.23860772628895e-07, |
|
"loss": 0.0289, |
|
"step": 45632 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.229120341545851e-07, |
|
"loss": 0.0324, |
|
"step": 45696 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2196329568027513e-07, |
|
"loss": 0.0802, |
|
"step": 45760 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2101455720596517e-07, |
|
"loss": 0.0237, |
|
"step": 45824 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.200806427703163e-07, |
|
"loss": 0.0423, |
|
"step": 45888 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.191319042960064e-07, |
|
"loss": 0.0968, |
|
"step": 45952 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.1818316582169645e-07, |
|
"loss": 0.0359, |
|
"step": 46016 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.1723442734738654e-07, |
|
"loss": 0.0361, |
|
"step": 46080 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.162856888730766e-07, |
|
"loss": 0.0676, |
|
"step": 46144 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.153369503987666e-07, |
|
"loss": 0.0388, |
|
"step": 46208 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.143882119244567e-07, |
|
"loss": 0.0332, |
|
"step": 46272 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.1343947345014674e-07, |
|
"loss": 0.0941, |
|
"step": 46336 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.124907349758368e-07, |
|
"loss": 0.0518, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.1154199650152687e-07, |
|
"loss": 0.0624, |
|
"step": 46464 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.105932580272169e-07, |
|
"loss": 0.0516, |
|
"step": 46528 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.09644519552907e-07, |
|
"loss": 0.0347, |
|
"step": 46592 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.086957810785971e-07, |
|
"loss": 0.0452, |
|
"step": 46656 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.0774704260428707e-07, |
|
"loss": 0.063, |
|
"step": 46720 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.0679830412997716e-07, |
|
"loss": 0.0623, |
|
"step": 46784 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.058495656556672e-07, |
|
"loss": 0.054, |
|
"step": 46848 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.049008271813573e-07, |
|
"loss": 0.0659, |
|
"step": 46912 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.039520887070474e-07, |
|
"loss": 0.0178, |
|
"step": 46976 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.0300335023273737e-07, |
|
"loss": 0.0412, |
|
"step": 47040 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.0205461175842746e-07, |
|
"loss": 0.0401, |
|
"step": 47104 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.011058732841175e-07, |
|
"loss": 0.0287, |
|
"step": 47168 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.001571348098076e-07, |
|
"loss": 0.0171, |
|
"step": 47232 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.992083963354976e-07, |
|
"loss": 0.0606, |
|
"step": 47296 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.9825965786118766e-07, |
|
"loss": 0.0343, |
|
"step": 47360 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.9731091938687775e-07, |
|
"loss": 0.0276, |
|
"step": 47424 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.9636218091256784e-07, |
|
"loss": 0.0567, |
|
"step": 47488 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.954134424382579e-07, |
|
"loss": 0.0347, |
|
"step": 47552 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.944647039639479e-07, |
|
"loss": 0.0634, |
|
"step": 47616 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.9351596548963795e-07, |
|
"loss": 0.0949, |
|
"step": 47680 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.9256722701532805e-07, |
|
"loss": 0.0485, |
|
"step": 47744 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.9161848854101814e-07, |
|
"loss": 0.0969, |
|
"step": 47808 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.906697500667082e-07, |
|
"loss": 0.0709, |
|
"step": 47872 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.897210115923982e-07, |
|
"loss": 0.046, |
|
"step": 47936 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.8877227311808825e-07, |
|
"loss": 0.0567, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.8782353464377834e-07, |
|
"loss": 0.0712, |
|
"step": 48064 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.8687479616946843e-07, |
|
"loss": 0.0472, |
|
"step": 48128 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.859260576951584e-07, |
|
"loss": 0.0687, |
|
"step": 48192 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.849773192208485e-07, |
|
"loss": 0.0295, |
|
"step": 48256 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.840285807465386e-07, |
|
"loss": 0.0404, |
|
"step": 48320 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8309466631088975e-07, |
|
"loss": 0.0719, |
|
"step": 48384 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8214592783657984e-07, |
|
"loss": 0.0129, |
|
"step": 48448 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.811971893622698e-07, |
|
"loss": 0.0262, |
|
"step": 48512 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.802484508879599e-07, |
|
"loss": 0.0499, |
|
"step": 48576 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.7929971241364995e-07, |
|
"loss": 0.0229, |
|
"step": 48640 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.7835097393934004e-07, |
|
"loss": 0.0338, |
|
"step": 48704 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.774022354650301e-07, |
|
"loss": 0.0614, |
|
"step": 48768 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.764534969907201e-07, |
|
"loss": 0.0913, |
|
"step": 48832 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.755047585164102e-07, |
|
"loss": 0.0481, |
|
"step": 48896 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.7455602004210024e-07, |
|
"loss": 0.0549, |
|
"step": 48960 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.7360728156779033e-07, |
|
"loss": 0.0532, |
|
"step": 49024 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.7265854309348037e-07, |
|
"loss": 0.0379, |
|
"step": 49088 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.717098046191704e-07, |
|
"loss": 0.0579, |
|
"step": 49152 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.707610661448605e-07, |
|
"loss": 0.0431, |
|
"step": 49216 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.698123276705506e-07, |
|
"loss": 0.0223, |
|
"step": 49280 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.6886358919624063e-07, |
|
"loss": 0.0339, |
|
"step": 49344 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.6791485072193067e-07, |
|
"loss": 0.0396, |
|
"step": 49408 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.669661122476207e-07, |
|
"loss": 0.0539, |
|
"step": 49472 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.660173737733108e-07, |
|
"loss": 0.0415, |
|
"step": 49536 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.650686352990009e-07, |
|
"loss": 0.083, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.6411989682469087e-07, |
|
"loss": 0.0333, |
|
"step": 49664 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.6317115835038096e-07, |
|
"loss": 0.0592, |
|
"step": 49728 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.62222419876071e-07, |
|
"loss": 0.0342, |
|
"step": 49792 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.612736814017611e-07, |
|
"loss": 0.0314, |
|
"step": 49856 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.603249429274512e-07, |
|
"loss": 0.0285, |
|
"step": 49920 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.5937620445314116e-07, |
|
"loss": 0.0208, |
|
"step": 49984 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.5842746597883126e-07, |
|
"loss": 0.0813, |
|
"step": 50048 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.5747872750452135e-07, |
|
"loss": 0.0699, |
|
"step": 50112 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.565299890302114e-07, |
|
"loss": 0.0617, |
|
"step": 50176 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.555812505559015e-07, |
|
"loss": 0.0531, |
|
"step": 50240 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.5463251208159146e-07, |
|
"loss": 0.0577, |
|
"step": 50304 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.5368377360728155e-07, |
|
"loss": 0.0538, |
|
"step": 50368 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.5273503513297164e-07, |
|
"loss": 0.0789, |
|
"step": 50432 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.517862966586617e-07, |
|
"loss": 0.025, |
|
"step": 50496 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.5083755818435177e-07, |
|
"loss": 0.0663, |
|
"step": 50560 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.498888197100418e-07, |
|
"loss": 0.0497, |
|
"step": 50624 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.4894008123573184e-07, |
|
"loss": 0.0207, |
|
"step": 50688 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.4799134276142193e-07, |
|
"loss": 0.0381, |
|
"step": 50752 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.4704260428711197e-07, |
|
"loss": 0.0441, |
|
"step": 50816 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.46093865812802e-07, |
|
"loss": 0.0447, |
|
"step": 50880 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.451451273384921e-07, |
|
"loss": 0.105, |
|
"step": 50944 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.4419638886418214e-07, |
|
"loss": 0.0551, |
|
"step": 51008 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.4324765038987223e-07, |
|
"loss": 0.0489, |
|
"step": 51072 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.4229891191556227e-07, |
|
"loss": 0.0243, |
|
"step": 51136 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.413501734412523e-07, |
|
"loss": 0.0633, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.404014349669424e-07, |
|
"loss": 0.0378, |
|
"step": 51264 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.3945269649263243e-07, |
|
"loss": 0.0696, |
|
"step": 51328 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3850395801832247e-07, |
|
"loss": 0.0525, |
|
"step": 51392 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3755521954401256e-07, |
|
"loss": 0.012, |
|
"step": 51456 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3660648106970262e-07, |
|
"loss": 0.0391, |
|
"step": 51520 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3565774259539266e-07, |
|
"loss": 0.0417, |
|
"step": 51584 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3470900412108275e-07, |
|
"loss": 0.0322, |
|
"step": 51648 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.337602656467728e-07, |
|
"loss": 0.0429, |
|
"step": 51712 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3281152717246285e-07, |
|
"loss": 0.0403, |
|
"step": 51776 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.3186278869815292e-07, |
|
"loss": 0.0978, |
|
"step": 51840 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.3091405022384298e-07, |
|
"loss": 0.0637, |
|
"step": 51904 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.2996531174953305e-07, |
|
"loss": 0.0582, |
|
"step": 51968 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.2901657327522309e-07, |
|
"loss": 0.0633, |
|
"step": 52032 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.2806783480091315e-07, |
|
"loss": 0.0419, |
|
"step": 52096 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.271190963266032e-07, |
|
"loss": 0.065, |
|
"step": 52160 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.2617035785229328e-07, |
|
"loss": 0.0501, |
|
"step": 52224 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2522161937798334e-07, |
|
"loss": 0.0296, |
|
"step": 52288 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2427288090367338e-07, |
|
"loss": 0.0098, |
|
"step": 52352 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2332414242936344e-07, |
|
"loss": 0.0349, |
|
"step": 52416 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.223754039550535e-07, |
|
"loss": 0.041, |
|
"step": 52480 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.2142666548074357e-07, |
|
"loss": 0.0507, |
|
"step": 52544 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.204779270064336e-07, |
|
"loss": 0.0495, |
|
"step": 52608 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.1952918853212367e-07, |
|
"loss": 0.0757, |
|
"step": 52672 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.1858045005781376e-07, |
|
"loss": 0.0352, |
|
"step": 52736 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.176317115835038e-07, |
|
"loss": 0.0602, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.1668297310919387e-07, |
|
"loss": 0.0309, |
|
"step": 52864 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.157342346348839e-07, |
|
"loss": 0.0931, |
|
"step": 52928 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.14785496160574e-07, |
|
"loss": 0.0305, |
|
"step": 52992 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1383675768626403e-07, |
|
"loss": 0.0377, |
|
"step": 53056 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.128880192119541e-07, |
|
"loss": 0.0263, |
|
"step": 53120 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.1193928073764416e-07, |
|
"loss": 0.0304, |
|
"step": 53184 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.109905422633342e-07, |
|
"loss": 0.0397, |
|
"step": 53248 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.100418037890243e-07, |
|
"loss": 0.0294, |
|
"step": 53312 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.0909306531471433e-07, |
|
"loss": 0.0738, |
|
"step": 53376 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.081443268404044e-07, |
|
"loss": 0.0298, |
|
"step": 53440 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.0719558836609443e-07, |
|
"loss": 0.046, |
|
"step": 53504 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.0624684989178452e-07, |
|
"loss": 0.024, |
|
"step": 53568 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.0529811141747458e-07, |
|
"loss": 0.0338, |
|
"step": 53632 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.0434937294316462e-07, |
|
"loss": 0.056, |
|
"step": 53696 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.0340063446885468e-07, |
|
"loss": 0.0483, |
|
"step": 53760 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.0245189599454472e-07, |
|
"loss": 0.0351, |
|
"step": 53824 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.015031575202348e-07, |
|
"loss": 0.1004, |
|
"step": 53888 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0055441904592485e-07, |
|
"loss": 0.0775, |
|
"step": 53952 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.9960568057161492e-07, |
|
"loss": 0.0388, |
|
"step": 54016 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.9865694209730498e-07, |
|
"loss": 0.0591, |
|
"step": 54080 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.9770820362299504e-07, |
|
"loss": 0.0416, |
|
"step": 54144 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.967594651486851e-07, |
|
"loss": 0.0373, |
|
"step": 54208 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.9581072667437515e-07, |
|
"loss": 0.0544, |
|
"step": 54272 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.948619882000652e-07, |
|
"loss": 0.0256, |
|
"step": 54336 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9391324972575527e-07, |
|
"loss": 0.0852, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9296451125144534e-07, |
|
"loss": 0.0333, |
|
"step": 54464 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.920157727771354e-07, |
|
"loss": 0.0327, |
|
"step": 54528 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.9106703430282544e-07, |
|
"loss": 0.0311, |
|
"step": 54592 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.9011829582851553e-07, |
|
"loss": 0.0676, |
|
"step": 54656 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.8916955735420557e-07, |
|
"loss": 0.098, |
|
"step": 54720 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8823564291855672e-07, |
|
"loss": 0.0668, |
|
"step": 54784 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8728690444424678e-07, |
|
"loss": 0.0498, |
|
"step": 54848 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8633816596993685e-07, |
|
"loss": 0.0681, |
|
"step": 54912 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8538942749562688e-07, |
|
"loss": 0.0353, |
|
"step": 54976 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8444068902131695e-07, |
|
"loss": 0.0776, |
|
"step": 55040 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8349195054700704e-07, |
|
"loss": 0.0636, |
|
"step": 55104 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8254321207269708e-07, |
|
"loss": 0.0415, |
|
"step": 55168 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.8159447359838714e-07, |
|
"loss": 0.0646, |
|
"step": 55232 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.8064573512407718e-07, |
|
"loss": 0.0218, |
|
"step": 55296 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.7971182068842835e-07, |
|
"loss": 0.0818, |
|
"step": 55360 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.787630822141184e-07, |
|
"loss": 0.0541, |
|
"step": 55424 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.7781434373980848e-07, |
|
"loss": 0.0667, |
|
"step": 55488 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.768804293041596e-07, |
|
"loss": 0.0717, |
|
"step": 55552 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.7593169082984967e-07, |
|
"loss": 0.035, |
|
"step": 55616 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7498295235553973e-07, |
|
"loss": 0.0863, |
|
"step": 55680 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.740342138812298e-07, |
|
"loss": 0.0839, |
|
"step": 55744 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7308547540691984e-07, |
|
"loss": 0.0298, |
|
"step": 55808 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.721367369326099e-07, |
|
"loss": 0.0739, |
|
"step": 55872 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7120282249696105e-07, |
|
"loss": 0.0557, |
|
"step": 55936 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7025408402265111e-07, |
|
"loss": 0.0717, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.693053455483412e-07, |
|
"loss": 0.0297, |
|
"step": 56064 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6835660707403124e-07, |
|
"loss": 0.073, |
|
"step": 56128 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.674078685997213e-07, |
|
"loss": 0.0389, |
|
"step": 56192 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6645913012541134e-07, |
|
"loss": 0.0722, |
|
"step": 56256 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6551039165110144e-07, |
|
"loss": 0.0544, |
|
"step": 56320 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6456165317679147e-07, |
|
"loss": 0.0641, |
|
"step": 56384 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6361291470248154e-07, |
|
"loss": 0.0791, |
|
"step": 56448 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.626641762281716e-07, |
|
"loss": 0.1186, |
|
"step": 56512 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.6171543775386164e-07, |
|
"loss": 0.038, |
|
"step": 56576 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.6076669927955173e-07, |
|
"loss": 0.0557, |
|
"step": 56640 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5981796080524177e-07, |
|
"loss": 0.0722, |
|
"step": 56704 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5886922233093183e-07, |
|
"loss": 0.0837, |
|
"step": 56768 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5792048385662187e-07, |
|
"loss": 0.0399, |
|
"step": 56832 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5697174538231196e-07, |
|
"loss": 0.0971, |
|
"step": 56896 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5602300690800202e-07, |
|
"loss": 0.0811, |
|
"step": 56960 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5507426843369206e-07, |
|
"loss": 0.029, |
|
"step": 57024 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5412552995938213e-07, |
|
"loss": 0.0522, |
|
"step": 57088 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.531767914850722e-07, |
|
"loss": 0.0415, |
|
"step": 57152 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5222805301076225e-07, |
|
"loss": 0.0377, |
|
"step": 57216 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.512793145364523e-07, |
|
"loss": 0.0351, |
|
"step": 57280 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.5033057606214236e-07, |
|
"loss": 0.0392, |
|
"step": 57344 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.4938183758783242e-07, |
|
"loss": 0.0584, |
|
"step": 57408 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.4843309911352248e-07, |
|
"loss": 0.0573, |
|
"step": 57472 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.4748436063921255e-07, |
|
"loss": 0.0395, |
|
"step": 57536 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.4653562216490259e-07, |
|
"loss": 0.068, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.4558688369059265e-07, |
|
"loss": 0.0393, |
|
"step": 57664 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4463814521628274e-07, |
|
"loss": 0.0338, |
|
"step": 57728 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4368940674197278e-07, |
|
"loss": 0.0328, |
|
"step": 57792 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4274066826766284e-07, |
|
"loss": 0.0261, |
|
"step": 57856 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4179192979335288e-07, |
|
"loss": 0.0446, |
|
"step": 57920 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4084319131904297e-07, |
|
"loss": 0.0593, |
|
"step": 57984 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.39894452844733e-07, |
|
"loss": 0.1374, |
|
"step": 58048 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.3894571437042307e-07, |
|
"loss": 0.04, |
|
"step": 58112 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3799697589611314e-07, |
|
"loss": 0.0511, |
|
"step": 58176 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3704823742180317e-07, |
|
"loss": 0.0591, |
|
"step": 58240 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3609949894749326e-07, |
|
"loss": 0.0226, |
|
"step": 58304 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.351507604731833e-07, |
|
"loss": 0.034, |
|
"step": 58368 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.3420202199887337e-07, |
|
"loss": 0.0142, |
|
"step": 58432 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.332532835245634e-07, |
|
"loss": 0.0594, |
|
"step": 58496 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.323045450502535e-07, |
|
"loss": 0.0507, |
|
"step": 58560 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.3135580657594356e-07, |
|
"loss": 0.0577, |
|
"step": 58624 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.304070681016336e-07, |
|
"loss": 0.0608, |
|
"step": 58688 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.2945832962732366e-07, |
|
"loss": 0.0248, |
|
"step": 58752 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.2850959115301373e-07, |
|
"loss": 0.0388, |
|
"step": 58816 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.275608526787038e-07, |
|
"loss": 0.0625, |
|
"step": 58880 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.2661211420439383e-07, |
|
"loss": 0.0497, |
|
"step": 58944 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.256633757300839e-07, |
|
"loss": 0.0442, |
|
"step": 59008 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.2471463725577396e-07, |
|
"loss": 0.0489, |
|
"step": 59072 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.2376589878146402e-07, |
|
"loss": 0.0707, |
|
"step": 59136 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.2281716030715406e-07, |
|
"loss": 0.0761, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.2186842183284415e-07, |
|
"loss": 0.0605, |
|
"step": 59264 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.2091968335853419e-07, |
|
"loss": 0.0451, |
|
"step": 59328 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.1997094488422425e-07, |
|
"loss": 0.0617, |
|
"step": 59392 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.1902220640991431e-07, |
|
"loss": 0.0501, |
|
"step": 59456 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.1807346793560436e-07, |
|
"loss": 0.0313, |
|
"step": 59520 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.1712472946129443e-07, |
|
"loss": 0.079, |
|
"step": 59584 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.1617599098698449e-07, |
|
"loss": 0.1015, |
|
"step": 59648 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1522725251267456e-07, |
|
"loss": 0.0585, |
|
"step": 59712 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1427851403836461e-07, |
|
"loss": 0.0521, |
|
"step": 59776 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1332977556405467e-07, |
|
"loss": 0.0504, |
|
"step": 59840 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1238103708974472e-07, |
|
"loss": 0.0409, |
|
"step": 59904 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.1143229861543477e-07, |
|
"loss": 0.047, |
|
"step": 59968 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.1048356014112484e-07, |
|
"loss": 0.064, |
|
"step": 60032 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.095348216668149e-07, |
|
"loss": 0.0465, |
|
"step": 60096 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.0858608319250497e-07, |
|
"loss": 0.0636, |
|
"step": 60160 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.0763734471819502e-07, |
|
"loss": 0.0653, |
|
"step": 60224 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.0668860624388508e-07, |
|
"loss": 0.0682, |
|
"step": 60288 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.0573986776957513e-07, |
|
"loss": 0.0219, |
|
"step": 60352 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.047911292952652e-07, |
|
"loss": 0.0452, |
|
"step": 60416 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.0385721485961635e-07, |
|
"loss": 0.059, |
|
"step": 60480 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.0290847638530641e-07, |
|
"loss": 0.0801, |
|
"step": 60544 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0195973791099646e-07, |
|
"loss": 0.0582, |
|
"step": 60608 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0101099943668653e-07, |
|
"loss": 0.0236, |
|
"step": 60672 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0006226096237659e-07, |
|
"loss": 0.0576, |
|
"step": 60736 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.911352248806665e-08, |
|
"loss": 0.0438, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.81647840137567e-08, |
|
"loss": 0.0709, |
|
"step": 60864 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.721604553944676e-08, |
|
"loss": 0.0626, |
|
"step": 60928 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.626730706513682e-08, |
|
"loss": 0.0581, |
|
"step": 60992 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.531856859082687e-08, |
|
"loss": 0.0422, |
|
"step": 61056 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.436983011651695e-08, |
|
"loss": 0.0403, |
|
"step": 61120 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.3421091642207e-08, |
|
"loss": 0.0469, |
|
"step": 61184 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.247235316789706e-08, |
|
"loss": 0.0607, |
|
"step": 61248 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.152361469358711e-08, |
|
"loss": 0.0208, |
|
"step": 61312 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.057487621927718e-08, |
|
"loss": 0.0468, |
|
"step": 61376 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 8.962613774496723e-08, |
|
"loss": 0.0807, |
|
"step": 61440 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.86773992706573e-08, |
|
"loss": 0.073, |
|
"step": 61504 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.772866079634736e-08, |
|
"loss": 0.073, |
|
"step": 61568 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.677992232203742e-08, |
|
"loss": 0.052, |
|
"step": 61632 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.583118384772747e-08, |
|
"loss": 0.0783, |
|
"step": 61696 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.488244537341752e-08, |
|
"loss": 0.0215, |
|
"step": 61760 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.393370689910759e-08, |
|
"loss": 0.0627, |
|
"step": 61824 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.298496842479764e-08, |
|
"loss": 0.072, |
|
"step": 61888 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.20362299504877e-08, |
|
"loss": 0.0395, |
|
"step": 61952 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.108749147617777e-08, |
|
"loss": 0.1183, |
|
"step": 62016 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.013875300186783e-08, |
|
"loss": 0.0483, |
|
"step": 62080 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.919001452755788e-08, |
|
"loss": 0.0571, |
|
"step": 62144 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.824127605324795e-08, |
|
"loss": 0.0984, |
|
"step": 62208 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.7292537578938e-08, |
|
"loss": 0.0771, |
|
"step": 62272 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.634379910462806e-08, |
|
"loss": 0.0559, |
|
"step": 62336 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.539506063031811e-08, |
|
"loss": 0.0519, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.444632215600819e-08, |
|
"loss": 0.0566, |
|
"step": 62464 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.349758368169824e-08, |
|
"loss": 0.0373, |
|
"step": 62528 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.25488452073883e-08, |
|
"loss": 0.0581, |
|
"step": 62592 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.160010673307836e-08, |
|
"loss": 0.1356, |
|
"step": 62656 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.06513682587684e-08, |
|
"loss": 0.0374, |
|
"step": 62720 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.970262978445847e-08, |
|
"loss": 0.0505, |
|
"step": 62784 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.875389131014853e-08, |
|
"loss": 0.0789, |
|
"step": 62848 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.78051528358386e-08, |
|
"loss": 0.0444, |
|
"step": 62912 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.685641436152865e-08, |
|
"loss": 0.0152, |
|
"step": 62976 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.590767588721871e-08, |
|
"loss": 0.0627, |
|
"step": 63040 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.495893741290877e-08, |
|
"loss": 0.0492, |
|
"step": 63104 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.401019893859883e-08, |
|
"loss": 0.0256, |
|
"step": 63168 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.306146046428888e-08, |
|
"loss": 0.0931, |
|
"step": 63232 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.211272198997894e-08, |
|
"loss": 0.0815, |
|
"step": 63296 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 6.116398351566901e-08, |
|
"loss": 0.0261, |
|
"step": 63360 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 6.021524504135907e-08, |
|
"loss": 0.0329, |
|
"step": 63424 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.9266506567049123e-08, |
|
"loss": 0.0543, |
|
"step": 63488 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.831776809273918e-08, |
|
"loss": 0.0497, |
|
"step": 63552 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.7369029618429245e-08, |
|
"loss": 0.0509, |
|
"step": 63616 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.64202911441193e-08, |
|
"loss": 0.0352, |
|
"step": 63680 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.547155266980936e-08, |
|
"loss": 0.0703, |
|
"step": 63744 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.4522814195499424e-08, |
|
"loss": 0.0203, |
|
"step": 63808 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.357407572118948e-08, |
|
"loss": 0.0462, |
|
"step": 63872 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.262533724687954e-08, |
|
"loss": 0.0323, |
|
"step": 63936 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.167659877256959e-08, |
|
"loss": 0.0481, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.0727860298259654e-08, |
|
"loss": 0.0198, |
|
"step": 64064 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.977912182394971e-08, |
|
"loss": 0.0386, |
|
"step": 64128 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.883038334963977e-08, |
|
"loss": 0.0516, |
|
"step": 64192 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.7881644875329834e-08, |
|
"loss": 0.0183, |
|
"step": 64256 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.693290640101989e-08, |
|
"loss": 0.0362, |
|
"step": 64320 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.598416792670995e-08, |
|
"loss": 0.0174, |
|
"step": 64384 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.503542945240001e-08, |
|
"loss": 0.1017, |
|
"step": 64448 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.408669097809007e-08, |
|
"loss": 0.0516, |
|
"step": 64512 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.313795250378013e-08, |
|
"loss": 0.028, |
|
"step": 64576 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.2189214029470186e-08, |
|
"loss": 0.0999, |
|
"step": 64640 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.124047555516025e-08, |
|
"loss": 0.0623, |
|
"step": 64704 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.029173708085031e-08, |
|
"loss": 0.069, |
|
"step": 64768 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.9342998606540365e-08, |
|
"loss": 0.031, |
|
"step": 64832 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.839426013223043e-08, |
|
"loss": 0.0489, |
|
"step": 64896 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.744552165792048e-08, |
|
"loss": 0.0379, |
|
"step": 64960 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.649678318361054e-08, |
|
"loss": 0.0248, |
|
"step": 65024 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.5562868747961694e-08, |
|
"loss": 0.0731, |
|
"step": 65088 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.461413027365175e-08, |
|
"loss": 0.0469, |
|
"step": 65152 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.366539179934181e-08, |
|
"loss": 0.036, |
|
"step": 65216 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.271665332503187e-08, |
|
"loss": 0.0428, |
|
"step": 65280 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.176791485072193e-08, |
|
"loss": 0.0546, |
|
"step": 65344 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.081917637641199e-08, |
|
"loss": 0.059, |
|
"step": 65408 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.9870437902102046e-08, |
|
"loss": 0.0494, |
|
"step": 65472 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.8921699427792107e-08, |
|
"loss": 0.059, |
|
"step": 65536 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.7972960953482164e-08, |
|
"loss": 0.0715, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.7024222479172225e-08, |
|
"loss": 0.0433, |
|
"step": 65664 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.6075484004862282e-08, |
|
"loss": 0.0305, |
|
"step": 65728 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.5126745530552343e-08, |
|
"loss": 0.0879, |
|
"step": 65792 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.41780070562424e-08, |
|
"loss": 0.0691, |
|
"step": 65856 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.3229268581932462e-08, |
|
"loss": 0.0584, |
|
"step": 65920 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.228053010762252e-08, |
|
"loss": 0.0669, |
|
"step": 65984 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.1331791633312577e-08, |
|
"loss": 0.0379, |
|
"step": 66048 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.0383053159002638e-08, |
|
"loss": 0.0294, |
|
"step": 66112 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.9434314684692695e-08, |
|
"loss": 0.0282, |
|
"step": 66176 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.8485576210382756e-08, |
|
"loss": 0.0697, |
|
"step": 66240 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.7536837736072817e-08, |
|
"loss": 0.0586, |
|
"step": 66304 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.6588099261762874e-08, |
|
"loss": 0.0259, |
|
"step": 66368 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.5639360787452932e-08, |
|
"loss": 0.0476, |
|
"step": 66432 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.4690622313142993e-08, |
|
"loss": 0.0248, |
|
"step": 66496 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.374188383883305e-08, |
|
"loss": 0.0416, |
|
"step": 66560 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.279314536452311e-08, |
|
"loss": 0.0324, |
|
"step": 66624 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.1844406890213169e-08, |
|
"loss": 0.009, |
|
"step": 66688 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.089566841590323e-08, |
|
"loss": 0.0341, |
|
"step": 66752 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 9.946929941593287e-09, |
|
"loss": 0.0234, |
|
"step": 66816 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.998191467283346e-09, |
|
"loss": 0.0638, |
|
"step": 66880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 8.049452992973405e-09, |
|
"loss": 0.0757, |
|
"step": 66944 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.1007145186634646e-09, |
|
"loss": 0.0294, |
|
"step": 67008 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 6.151976044353523e-09, |
|
"loss": 0.0739, |
|
"step": 67072 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.203237570043583e-09, |
|
"loss": 0.0545, |
|
"step": 67136 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.254499095733641e-09, |
|
"loss": 0.0294, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.3057606214237005e-09, |
|
"loss": 0.0236, |
|
"step": 67264 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.3570221471137597e-09, |
|
"loss": 0.0805, |
|
"step": 67328 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.4082836728038187e-09, |
|
"loss": 0.0691, |
|
"step": 67392 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.5954519849387766e-10, |
|
"loss": 0.0591, |
|
"step": 67456 |
|
} |
|
], |
|
"logging_steps": 64, |
|
"max_steps": 67458, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 22486, |
|
"total_flos": 1.432248707211264e+20, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|