|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 22486, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.990957336416734e-07, |
|
"loss": 0.1541, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.981469951673633e-07, |
|
"loss": 0.0788, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.971982566930536e-07, |
|
"loss": 0.2255, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.962495182187434e-07, |
|
"loss": 0.1454, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.953007797444335e-07, |
|
"loss": 0.081, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.943520412701236e-07, |
|
"loss": 0.1782, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.934033027958137e-07, |
|
"loss": 0.1119, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.924545643215036e-07, |
|
"loss": 0.1648, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.915058258471937e-07, |
|
"loss": 0.1536, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.905570873728838e-07, |
|
"loss": 0.1527, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.896083488985739e-07, |
|
"loss": 0.1153, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.88659610424264e-07, |
|
"loss": 0.0826, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.87710871949954e-07, |
|
"loss": 0.135, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.867621334756441e-07, |
|
"loss": 0.1657, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.85813395001334e-07, |
|
"loss": 0.1777, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.848646565270243e-07, |
|
"loss": 0.0975, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.839159180527142e-07, |
|
"loss": 0.1336, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.829671795784043e-07, |
|
"loss": 0.0785, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.820184411040944e-07, |
|
"loss": 0.164, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.810697026297845e-07, |
|
"loss": 0.0864, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.801209641554746e-07, |
|
"loss": 0.128, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.791722256811644e-07, |
|
"loss": 0.0897, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.782234872068545e-07, |
|
"loss": 0.0989, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.772747487325446e-07, |
|
"loss": 0.1309, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.763408342968958e-07, |
|
"loss": 0.2144, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.75392095822586e-07, |
|
"loss": 0.0691, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.744433573482758e-07, |
|
"loss": 0.1476, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.73494618873966e-07, |
|
"loss": 0.1541, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.72545880399656e-07, |
|
"loss": 0.1108, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.71597141925346e-07, |
|
"loss": 0.112, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.706484034510362e-07, |
|
"loss": 0.1901, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.696996649767262e-07, |
|
"loss": 0.1207, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.687509265024163e-07, |
|
"loss": 0.128, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.678021880281062e-07, |
|
"loss": 0.2029, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.668534495537965e-07, |
|
"loss": 0.1629, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.659047110794864e-07, |
|
"loss": 0.1662, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.649559726051765e-07, |
|
"loss": 0.1392, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.640072341308666e-07, |
|
"loss": 0.081, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.630584956565567e-07, |
|
"loss": 0.0894, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.621097571822468e-07, |
|
"loss": 0.1017, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.611610187079368e-07, |
|
"loss": 0.1476, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.602122802336267e-07, |
|
"loss": 0.1592, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.592635417593168e-07, |
|
"loss": 0.146, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.58314803285007e-07, |
|
"loss": 0.1552, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.57366064810697e-07, |
|
"loss": 0.0498, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.56417326336387e-07, |
|
"loss": 0.0999, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.55468587862077e-07, |
|
"loss": 0.1196, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.545198493877673e-07, |
|
"loss": 0.1172, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.535711109134573e-07, |
|
"loss": 0.1107, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.526223724391472e-07, |
|
"loss": 0.182, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.516736339648373e-07, |
|
"loss": 0.0687, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.507248954905273e-07, |
|
"loss": 0.1274, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.497761570162175e-07, |
|
"loss": 0.1374, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.488274185419075e-07, |
|
"loss": 0.159, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.478786800675975e-07, |
|
"loss": 0.0687, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.469299415932877e-07, |
|
"loss": 0.1446, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.459812031189777e-07, |
|
"loss": 0.1047, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.450324646446678e-07, |
|
"loss": 0.1445, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.440837261703578e-07, |
|
"loss": 0.1033, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.431349876960478e-07, |
|
"loss": 0.1514, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.421862492217379e-07, |
|
"loss": 0.1245, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.41237510747428e-07, |
|
"loss": 0.2032, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.402887722731181e-07, |
|
"loss": 0.0935, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.393400337988081e-07, |
|
"loss": 0.1492, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.383912953244981e-07, |
|
"loss": 0.1011, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.374425568501883e-07, |
|
"loss": 0.1279, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.364938183758783e-07, |
|
"loss": 0.0842, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.355599039402295e-07, |
|
"loss": 0.1482, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.346111654659194e-07, |
|
"loss": 0.1444, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.336624269916095e-07, |
|
"loss": 0.1124, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.327285125559607e-07, |
|
"loss": 0.1552, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.317797740816507e-07, |
|
"loss": 0.113, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.308310356073408e-07, |
|
"loss": 0.1406, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.298822971330309e-07, |
|
"loss": 0.1464, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.289335586587209e-07, |
|
"loss": 0.0889, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.279848201844111e-07, |
|
"loss": 0.1028, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.270360817101011e-07, |
|
"loss": 0.1816, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.26087343235791e-07, |
|
"loss": 0.0867, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.251386047614812e-07, |
|
"loss": 0.1426, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.241898662871712e-07, |
|
"loss": 0.1404, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.232411278128613e-07, |
|
"loss": 0.0599, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.222923893385513e-07, |
|
"loss": 0.0753, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.213436508642414e-07, |
|
"loss": 0.1109, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.203949123899315e-07, |
|
"loss": 0.1836, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.194461739156215e-07, |
|
"loss": 0.0633, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.184974354413117e-07, |
|
"loss": 0.1392, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.175486969670017e-07, |
|
"loss": 0.11, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.165999584926916e-07, |
|
"loss": 0.1901, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.156512200183818e-07, |
|
"loss": 0.1399, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.147024815440718e-07, |
|
"loss": 0.1312, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.137537430697619e-07, |
|
"loss": 0.0884, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.12805004595452e-07, |
|
"loss": 0.1663, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.11856266121142e-07, |
|
"loss": 0.0728, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.109075276468321e-07, |
|
"loss": 0.1709, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.099587891725221e-07, |
|
"loss": 0.1476, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.090100506982122e-07, |
|
"loss": 0.0778, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.080613122239022e-07, |
|
"loss": 0.1081, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.071125737495922e-07, |
|
"loss": 0.1059, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.061638352752824e-07, |
|
"loss": 0.1143, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.052150968009724e-07, |
|
"loss": 0.1205, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.042663583266624e-07, |
|
"loss": 0.1479, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.033176198523526e-07, |
|
"loss": 0.0817, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.023688813780426e-07, |
|
"loss": 0.1398, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.014201429037327e-07, |
|
"loss": 0.1608, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.004714044294228e-07, |
|
"loss": 0.1345, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.995226659551127e-07, |
|
"loss": 0.1773, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 8.985739274808028e-07, |
|
"loss": 0.1769, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.976251890064928e-07, |
|
"loss": 0.1345, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.96676450532183e-07, |
|
"loss": 0.1293, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 8.95727712057873e-07, |
|
"loss": 0.1388, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.94778973583563e-07, |
|
"loss": 0.1004, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.938302351092532e-07, |
|
"loss": 0.1363, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.928814966349432e-07, |
|
"loss": 0.0933, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 8.919327581606333e-07, |
|
"loss": 0.1617, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.909840196863233e-07, |
|
"loss": 0.1085, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.900352812120133e-07, |
|
"loss": 0.1836, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.890865427377034e-07, |
|
"loss": 0.1464, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.881378042633935e-07, |
|
"loss": 0.1166, |
|
"step": 7552 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.871890657890836e-07, |
|
"loss": 0.1071, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.862403273147736e-07, |
|
"loss": 0.1564, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.852915888404636e-07, |
|
"loss": 0.0937, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.843428503661538e-07, |
|
"loss": 0.1095, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.833941118918438e-07, |
|
"loss": 0.1313, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.824453734175337e-07, |
|
"loss": 0.0819, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.814966349432239e-07, |
|
"loss": 0.1273, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.805478964689139e-07, |
|
"loss": 0.1079, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.79599157994604e-07, |
|
"loss": 0.0566, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.786504195202941e-07, |
|
"loss": 0.0882, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.777016810459841e-07, |
|
"loss": 0.1801, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.767529425716742e-07, |
|
"loss": 0.1006, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.758042040973643e-07, |
|
"loss": 0.1033, |
|
"step": 8384 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.748554656230544e-07, |
|
"loss": 0.0907, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.739067271487443e-07, |
|
"loss": 0.0548, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.729579886744343e-07, |
|
"loss": 0.1173, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.720092502001245e-07, |
|
"loss": 0.1415, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.710605117258145e-07, |
|
"loss": 0.1035, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.701117732515046e-07, |
|
"loss": 0.1016, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.691630347771947e-07, |
|
"loss": 0.111, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.682291203415458e-07, |
|
"loss": 0.1427, |
|
"step": 8896 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.672803818672359e-07, |
|
"loss": 0.1181, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.66331643392926e-07, |
|
"loss": 0.097, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.65382904918616e-07, |
|
"loss": 0.1367, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.64434166444306e-07, |
|
"loss": 0.1212, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.634854279699961e-07, |
|
"loss": 0.1393, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.625366894956861e-07, |
|
"loss": 0.1322, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.615879510213763e-07, |
|
"loss": 0.0786, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.606392125470663e-07, |
|
"loss": 0.1263, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.596904740727563e-07, |
|
"loss": 0.1537, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.587417355984465e-07, |
|
"loss": 0.1267, |
|
"step": 9536 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.577929971241365e-07, |
|
"loss": 0.1087, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.568442586498266e-07, |
|
"loss": 0.1097, |
|
"step": 9664 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.558955201755165e-07, |
|
"loss": 0.1423, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.549467817012066e-07, |
|
"loss": 0.0911, |
|
"step": 9792 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.539980432268967e-07, |
|
"loss": 0.0748, |
|
"step": 9856 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.530493047525867e-07, |
|
"loss": 0.112, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.521005662782769e-07, |
|
"loss": 0.1765, |
|
"step": 9984 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.511518278039669e-07, |
|
"loss": 0.1539, |
|
"step": 10048 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.502030893296569e-07, |
|
"loss": 0.1382, |
|
"step": 10112 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.492543508553471e-07, |
|
"loss": 0.1146, |
|
"step": 10176 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.48305612381037e-07, |
|
"loss": 0.1589, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.473568739067271e-07, |
|
"loss": 0.1184, |
|
"step": 10304 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.464081354324172e-07, |
|
"loss": 0.0892, |
|
"step": 10368 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.454593969581072e-07, |
|
"loss": 0.1883, |
|
"step": 10432 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.445106584837973e-07, |
|
"loss": 0.1441, |
|
"step": 10496 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.435619200094873e-07, |
|
"loss": 0.1233, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.426131815351774e-07, |
|
"loss": 0.1376, |
|
"step": 10624 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.416644430608675e-07, |
|
"loss": 0.057, |
|
"step": 10688 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.407157045865575e-07, |
|
"loss": 0.0966, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.397669661122477e-07, |
|
"loss": 0.1026, |
|
"step": 10816 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.388182276379376e-07, |
|
"loss": 0.1161, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.378694891636276e-07, |
|
"loss": 0.1889, |
|
"step": 10944 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.369207506893178e-07, |
|
"loss": 0.0808, |
|
"step": 11008 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.359868362536689e-07, |
|
"loss": 0.8512, |
|
"step": 11072 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.35038097779359e-07, |
|
"loss": 0.1572, |
|
"step": 11136 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.34089359305049e-07, |
|
"loss": 0.1883, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.331406208307391e-07, |
|
"loss": 0.0772, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.321918823564291e-07, |
|
"loss": 0.0924, |
|
"step": 11328 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.312431438821193e-07, |
|
"loss": 0.1308, |
|
"step": 11392 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.302944054078093e-07, |
|
"loss": 0.2317, |
|
"step": 11456 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.293456669334992e-07, |
|
"loss": 0.1581, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.283969284591894e-07, |
|
"loss": 0.1068, |
|
"step": 11584 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.274481899848794e-07, |
|
"loss": 0.0793, |
|
"step": 11648 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.264994515105695e-07, |
|
"loss": 0.1407, |
|
"step": 11712 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.255507130362596e-07, |
|
"loss": 0.2219, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.246019745619496e-07, |
|
"loss": 0.1364, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.236532360876397e-07, |
|
"loss": 0.0842, |
|
"step": 11904 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.227193216519909e-07, |
|
"loss": 0.0893, |
|
"step": 11968 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.217705831776809e-07, |
|
"loss": 0.1798, |
|
"step": 12032 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.20821844703371e-07, |
|
"loss": 0.1263, |
|
"step": 12096 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.19873106229061e-07, |
|
"loss": 0.1289, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.18924367754751e-07, |
|
"loss": 0.1712, |
|
"step": 12224 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.179756292804412e-07, |
|
"loss": 0.2207, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.170268908061312e-07, |
|
"loss": 0.1219, |
|
"step": 12352 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.160781523318212e-07, |
|
"loss": 0.1446, |
|
"step": 12416 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.151294138575113e-07, |
|
"loss": 0.076, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.141806753832014e-07, |
|
"loss": 0.1448, |
|
"step": 12544 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.132319369088915e-07, |
|
"loss": 0.1613, |
|
"step": 12608 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.122831984345814e-07, |
|
"loss": 0.1241, |
|
"step": 12672 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.113344599602715e-07, |
|
"loss": 0.1833, |
|
"step": 12736 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.103857214859616e-07, |
|
"loss": 0.2071, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.094369830116516e-07, |
|
"loss": 0.1467, |
|
"step": 12864 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.084882445373418e-07, |
|
"loss": 0.0889, |
|
"step": 12928 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.075395060630318e-07, |
|
"loss": 0.141, |
|
"step": 12992 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.065907675887218e-07, |
|
"loss": 0.106, |
|
"step": 13056 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.05642029114412e-07, |
|
"loss": 0.1359, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.04693290640102e-07, |
|
"loss": 0.2273, |
|
"step": 13184 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.03744552165792e-07, |
|
"loss": 0.1071, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.02795813691482e-07, |
|
"loss": 0.1106, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.018470752171721e-07, |
|
"loss": 0.1035, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.008983367428622e-07, |
|
"loss": 0.1459, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.999495982685522e-07, |
|
"loss": 0.1739, |
|
"step": 13504 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.990008597942423e-07, |
|
"loss": 0.116, |
|
"step": 13568 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.980521213199324e-07, |
|
"loss": 0.1017, |
|
"step": 13632 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.971033828456224e-07, |
|
"loss": 0.1414, |
|
"step": 13696 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.961546443713126e-07, |
|
"loss": 0.2619, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.952059058970026e-07, |
|
"loss": 0.1166, |
|
"step": 13824 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.942571674226925e-07, |
|
"loss": 0.142, |
|
"step": 13888 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.933084289483827e-07, |
|
"loss": 0.2068, |
|
"step": 13952 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.923596904740727e-07, |
|
"loss": 0.1119, |
|
"step": 14016 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.914109519997628e-07, |
|
"loss": 0.143, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.904770375641139e-07, |
|
"loss": 0.1718, |
|
"step": 14144 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.89528299089804e-07, |
|
"loss": 0.0871, |
|
"step": 14208 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.88579560615494e-07, |
|
"loss": 0.1059, |
|
"step": 14272 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.876308221411842e-07, |
|
"loss": 0.1276, |
|
"step": 14336 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.866820836668742e-07, |
|
"loss": 0.1503, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.857333451925641e-07, |
|
"loss": 0.2201, |
|
"step": 14464 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.847846067182543e-07, |
|
"loss": 0.1737, |
|
"step": 14528 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.838358682439443e-07, |
|
"loss": 0.1133, |
|
"step": 14592 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.828871297696344e-07, |
|
"loss": 0.1378, |
|
"step": 14656 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.819383912953245e-07, |
|
"loss": 0.166, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.809896528210145e-07, |
|
"loss": 0.2001, |
|
"step": 14784 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.800409143467046e-07, |
|
"loss": 0.158, |
|
"step": 14848 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.790921758723946e-07, |
|
"loss": 0.1009, |
|
"step": 14912 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.781434373980848e-07, |
|
"loss": 0.2009, |
|
"step": 14976 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.771946989237747e-07, |
|
"loss": 0.1437, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.762459604494647e-07, |
|
"loss": 0.1653, |
|
"step": 15104 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.752972219751549e-07, |
|
"loss": 0.1736, |
|
"step": 15168 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.743484835008449e-07, |
|
"loss": 0.2137, |
|
"step": 15232 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.73399745026535e-07, |
|
"loss": 0.1282, |
|
"step": 15296 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.724510065522251e-07, |
|
"loss": 0.1766, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.715022680779151e-07, |
|
"loss": 0.0876, |
|
"step": 15424 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.705535296036052e-07, |
|
"loss": 0.1338, |
|
"step": 15488 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.696047911292953e-07, |
|
"loss": 0.2006, |
|
"step": 15552 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.686560526549854e-07, |
|
"loss": 0.1505, |
|
"step": 15616 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.677073141806753e-07, |
|
"loss": 0.1223, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.667585757063653e-07, |
|
"loss": 0.1693, |
|
"step": 15744 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.658098372320555e-07, |
|
"loss": 0.1872, |
|
"step": 15808 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.648610987577455e-07, |
|
"loss": 0.1109, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.639123602834355e-07, |
|
"loss": 0.1351, |
|
"step": 15936 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.629636218091257e-07, |
|
"loss": 0.1336, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.620148833348157e-07, |
|
"loss": 0.2153, |
|
"step": 16064 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.610661448605058e-07, |
|
"loss": 0.2274, |
|
"step": 16128 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.601174063861959e-07, |
|
"loss": 0.105, |
|
"step": 16192 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.591686679118858e-07, |
|
"loss": 0.1016, |
|
"step": 16256 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.582199294375759e-07, |
|
"loss": 0.116, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.57271190963266e-07, |
|
"loss": 0.1086, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.563224524889561e-07, |
|
"loss": 0.1153, |
|
"step": 16448 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.553737140146461e-07, |
|
"loss": 0.1044, |
|
"step": 16512 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.544249755403361e-07, |
|
"loss": 0.2228, |
|
"step": 16576 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.534762370660263e-07, |
|
"loss": 0.1002, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.525274985917163e-07, |
|
"loss": 0.1919, |
|
"step": 16704 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.515787601174063e-07, |
|
"loss": 0.13, |
|
"step": 16768 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.506300216430964e-07, |
|
"loss": 0.1165, |
|
"step": 16832 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.496812831687864e-07, |
|
"loss": 0.1385, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.487325446944765e-07, |
|
"loss": 0.1836, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.477838062201666e-07, |
|
"loss": 0.1852, |
|
"step": 17024 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.468498917845177e-07, |
|
"loss": 0.1721, |
|
"step": 17088 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.459011533102078e-07, |
|
"loss": 0.1164, |
|
"step": 17152 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.449524148358979e-07, |
|
"loss": 0.0989, |
|
"step": 17216 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.440036763615879e-07, |
|
"loss": 0.1524, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.430549378872781e-07, |
|
"loss": 0.174, |
|
"step": 17344 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.42106199412968e-07, |
|
"loss": 0.1509, |
|
"step": 17408 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.41157460938658e-07, |
|
"loss": 0.1264, |
|
"step": 17472 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.402087224643482e-07, |
|
"loss": 0.1148, |
|
"step": 17536 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.392599839900382e-07, |
|
"loss": 0.1227, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.383112455157283e-07, |
|
"loss": 0.1462, |
|
"step": 17664 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.373625070414183e-07, |
|
"loss": 0.1673, |
|
"step": 17728 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.364137685671084e-07, |
|
"loss": 0.1274, |
|
"step": 17792 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.354650300927985e-07, |
|
"loss": 0.162, |
|
"step": 17856 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.345162916184885e-07, |
|
"loss": 0.148, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.335675531441787e-07, |
|
"loss": 0.157, |
|
"step": 17984 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.326188146698686e-07, |
|
"loss": 0.1419, |
|
"step": 18048 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.316700761955586e-07, |
|
"loss": 0.1916, |
|
"step": 18112 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.307213377212488e-07, |
|
"loss": 0.1772, |
|
"step": 18176 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.297725992469388e-07, |
|
"loss": 0.1243, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.288238607726288e-07, |
|
"loss": 0.1738, |
|
"step": 18304 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.27875122298319e-07, |
|
"loss": 0.165, |
|
"step": 18368 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.26926383824009e-07, |
|
"loss": 0.0869, |
|
"step": 18432 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.259776453496991e-07, |
|
"loss": 0.1613, |
|
"step": 18496 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.25028906875389e-07, |
|
"loss": 0.1646, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.240801684010791e-07, |
|
"loss": 0.1232, |
|
"step": 18624 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.231314299267692e-07, |
|
"loss": 0.1695, |
|
"step": 18688 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.221826914524592e-07, |
|
"loss": 0.1305, |
|
"step": 18752 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.212339529781494e-07, |
|
"loss": 0.2047, |
|
"step": 18816 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.202852145038394e-07, |
|
"loss": 0.1881, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.193364760295294e-07, |
|
"loss": 0.1513, |
|
"step": 18944 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.183877375552196e-07, |
|
"loss": 0.1355, |
|
"step": 19008 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.174389990809096e-07, |
|
"loss": 0.1733, |
|
"step": 19072 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.164902606065996e-07, |
|
"loss": 0.1647, |
|
"step": 19136 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.155415221322897e-07, |
|
"loss": 0.132, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.145927836579797e-07, |
|
"loss": 0.2464, |
|
"step": 19264 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.136440451836698e-07, |
|
"loss": 0.1954, |
|
"step": 19328 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.126953067093598e-07, |
|
"loss": 0.1751, |
|
"step": 19392 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.1174656823505e-07, |
|
"loss": 0.1182, |
|
"step": 19456 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.1079782976074e-07, |
|
"loss": 0.0678, |
|
"step": 19520 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.0984909128643e-07, |
|
"loss": 0.1656, |
|
"step": 19584 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.089003528121202e-07, |
|
"loss": 0.219, |
|
"step": 19648 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.079516143378101e-07, |
|
"loss": 0.15, |
|
"step": 19712 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.070028758635002e-07, |
|
"loss": 0.1845, |
|
"step": 19776 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.060541373891903e-07, |
|
"loss": 0.1766, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.051053989148803e-07, |
|
"loss": 0.1355, |
|
"step": 19904 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.041566604405704e-07, |
|
"loss": 0.1268, |
|
"step": 19968 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.032079219662605e-07, |
|
"loss": 0.191, |
|
"step": 20032 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.022591834919506e-07, |
|
"loss": 0.2032, |
|
"step": 20096 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.013104450176406e-07, |
|
"loss": 0.2632, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.003617065433306e-07, |
|
"loss": 0.2028, |
|
"step": 20224 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.994129680690208e-07, |
|
"loss": 0.135, |
|
"step": 20288 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.984642295947107e-07, |
|
"loss": 0.1635, |
|
"step": 20352 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.975154911204007e-07, |
|
"loss": 0.1986, |
|
"step": 20416 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.965667526460909e-07, |
|
"loss": 0.1786, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.956180141717809e-07, |
|
"loss": 0.116, |
|
"step": 20544 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.94669275697471e-07, |
|
"loss": 0.2206, |
|
"step": 20608 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.937205372231611e-07, |
|
"loss": 0.0995, |
|
"step": 20672 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.927717987488511e-07, |
|
"loss": 0.1634, |
|
"step": 20736 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.918230602745412e-07, |
|
"loss": 0.1429, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.908743218002312e-07, |
|
"loss": 0.2244, |
|
"step": 20864 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.899255833259213e-07, |
|
"loss": 0.1666, |
|
"step": 20928 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.889768448516113e-07, |
|
"loss": 0.1413, |
|
"step": 20992 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.880281063773013e-07, |
|
"loss": 0.1824, |
|
"step": 21056 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.870941919416525e-07, |
|
"loss": 0.1737, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.861454534673426e-07, |
|
"loss": 0.171, |
|
"step": 21184 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.851967149930327e-07, |
|
"loss": 0.1571, |
|
"step": 21248 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.842479765187227e-07, |
|
"loss": 0.115, |
|
"step": 21312 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.832992380444128e-07, |
|
"loss": 0.2192, |
|
"step": 21376 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.823504995701029e-07, |
|
"loss": 0.146, |
|
"step": 21440 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.81401761095793e-07, |
|
"loss": 0.1586, |
|
"step": 21504 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.804530226214829e-07, |
|
"loss": 0.1035, |
|
"step": 21568 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.795191081858341e-07, |
|
"loss": 0.1898, |
|
"step": 21632 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.785703697115241e-07, |
|
"loss": 0.2901, |
|
"step": 21696 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.776216312372143e-07, |
|
"loss": 0.1986, |
|
"step": 21760 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.766728927629043e-07, |
|
"loss": 0.1774, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.757241542885943e-07, |
|
"loss": 0.1945, |
|
"step": 21888 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.747754158142845e-07, |
|
"loss": 0.1453, |
|
"step": 21952 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.738266773399745e-07, |
|
"loss": 0.1882, |
|
"step": 22016 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.728779388656646e-07, |
|
"loss": 0.2266, |
|
"step": 22080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.719292003913545e-07, |
|
"loss": 0.2192, |
|
"step": 22144 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.709804619170446e-07, |
|
"loss": 0.1191, |
|
"step": 22208 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.700317234427347e-07, |
|
"loss": 0.1323, |
|
"step": 22272 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.690829849684247e-07, |
|
"loss": 0.2345, |
|
"step": 22336 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.681342464941149e-07, |
|
"loss": 0.122, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.671855080198049e-07, |
|
"loss": 0.1643, |
|
"step": 22464 |
|
} |
|
], |
|
"logging_steps": 64, |
|
"max_steps": 67458, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 22486, |
|
"total_flos": 4.77416235737088e+19, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|