|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999898011218766, |
|
"eval_steps": 500, |
|
"global_step": 9804, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7619047619047613e-08, |
|
"loss": 5.1575, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0204081632653061e-07, |
|
"loss": 4.7952, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.564625850340136e-07, |
|
"loss": 4.5513, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.0918367346938776e-07, |
|
"loss": 3.9771, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.619047619047619e-07, |
|
"loss": 3.2318, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.163265306122449e-07, |
|
"loss": 2.508, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.707482993197279e-07, |
|
"loss": 2.0866, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.2517006802721085e-07, |
|
"loss": 1.8228, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.795918367346938e-07, |
|
"loss": 1.6854, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.340136054421769e-07, |
|
"loss": 1.4892, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.884353741496599e-07, |
|
"loss": 1.2407, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.428571428571429e-07, |
|
"loss": 1.0248, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.95578231292517e-07, |
|
"loss": 0.577, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.401, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.04421768707483e-07, |
|
"loss": 0.3855, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.58843537414966e-07, |
|
"loss": 0.3437, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.132653061224489e-07, |
|
"loss": 0.3528, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.67687074829932e-07, |
|
"loss": 0.4408, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.985895627644569e-07, |
|
"loss": 0.4262, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.951177172615818e-07, |
|
"loss": 0.3917, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.916458717587067e-07, |
|
"loss": 0.4139, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.881740262558316e-07, |
|
"loss": 0.3101, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.847021807529563e-07, |
|
"loss": 0.4566, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.812303352500815e-07, |
|
"loss": 0.2914, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.777584897472062e-07, |
|
"loss": 0.4062, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.74286644244331e-07, |
|
"loss": 0.3887, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.70814798741456e-07, |
|
"loss": 0.3655, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.67342953238581e-07, |
|
"loss": 0.3668, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.638711077357056e-07, |
|
"loss": 0.3761, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.603992622328305e-07, |
|
"loss": 0.3528, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.569274167299554e-07, |
|
"loss": 0.2583, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.534555712270804e-07, |
|
"loss": 0.3145, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.499837257242053e-07, |
|
"loss": 0.2842, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.465118802213301e-07, |
|
"loss": 0.2259, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.430400347184549e-07, |
|
"loss": 0.3493, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.395681892155799e-07, |
|
"loss": 0.3421, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.360963437127047e-07, |
|
"loss": 0.4288, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.326244982098295e-07, |
|
"loss": 0.2881, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.291526527069546e-07, |
|
"loss": 0.3361, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.256808072040794e-07, |
|
"loss": 0.2797, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.222089617012042e-07, |
|
"loss": 0.2531, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.187371161983291e-07, |
|
"loss": 0.282, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.15265270695454e-07, |
|
"loss": 0.3408, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.117934251925788e-07, |
|
"loss": 0.2354, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.083215796897038e-07, |
|
"loss": 0.3474, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.048497341868287e-07, |
|
"loss": 0.3546, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.013778886839535e-07, |
|
"loss": 0.3966, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.979060431810784e-07, |
|
"loss": 0.3376, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.944341976782033e-07, |
|
"loss": 0.3938, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.909623521753281e-07, |
|
"loss": 0.3343, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.87490506672453e-07, |
|
"loss": 0.3399, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.840186611695779e-07, |
|
"loss": 0.3072, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.805468156667028e-07, |
|
"loss": 0.262, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.770749701638277e-07, |
|
"loss": 0.2995, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.736031246609525e-07, |
|
"loss": 0.2804, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 8.701312791580774e-07, |
|
"loss": 0.3693, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.666594336552023e-07, |
|
"loss": 0.2589, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.631875881523272e-07, |
|
"loss": 0.2638, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 8.59715742649452e-07, |
|
"loss": 0.3516, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.56243897146577e-07, |
|
"loss": 0.369, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.527720516437018e-07, |
|
"loss": 0.3453, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 8.493002061408266e-07, |
|
"loss": 0.3813, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.458283606379516e-07, |
|
"loss": 0.2657, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.423565151350764e-07, |
|
"loss": 0.3514, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 8.388846696322013e-07, |
|
"loss": 0.2214, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.354128241293263e-07, |
|
"loss": 0.4351, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.319409786264511e-07, |
|
"loss": 0.2986, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 8.284691331235759e-07, |
|
"loss": 0.2211, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.249972876207008e-07, |
|
"loss": 0.2291, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.215254421178257e-07, |
|
"loss": 0.3048, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.180535966149505e-07, |
|
"loss": 0.3369, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.145817511120755e-07, |
|
"loss": 0.2413, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.111099056092004e-07, |
|
"loss": 0.2656, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.076380601063252e-07, |
|
"loss": 0.2661, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 8.041662146034501e-07, |
|
"loss": 0.3069, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 8.00694369100575e-07, |
|
"loss": 0.2357, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.972225235976998e-07, |
|
"loss": 0.3374, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 7.937506780948248e-07, |
|
"loss": 0.2678, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.902788325919496e-07, |
|
"loss": 0.2813, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.868069870890745e-07, |
|
"loss": 0.386, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 7.833351415861994e-07, |
|
"loss": 0.2247, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.798632960833242e-07, |
|
"loss": 0.3458, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.763914505804491e-07, |
|
"loss": 0.2967, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.72919605077574e-07, |
|
"loss": 0.277, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.694477595746989e-07, |
|
"loss": 0.2149, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.659759140718237e-07, |
|
"loss": 0.3624, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.625040685689487e-07, |
|
"loss": 0.3259, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.590322230660735e-07, |
|
"loss": 0.2905, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.555603775631984e-07, |
|
"loss": 0.2397, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.520885320603233e-07, |
|
"loss": 0.4047, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.486166865574481e-07, |
|
"loss": 0.258, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.451448410545731e-07, |
|
"loss": 0.3667, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.41672995551698e-07, |
|
"loss": 0.3022, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.382011500488228e-07, |
|
"loss": 0.2435, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.347293045459477e-07, |
|
"loss": 0.3491, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.312574590430725e-07, |
|
"loss": 0.2885, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.277856135401974e-07, |
|
"loss": 0.3822, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.243137680373224e-07, |
|
"loss": 0.3265, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.208419225344472e-07, |
|
"loss": 0.2879, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.173700770315721e-07, |
|
"loss": 0.3678, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.13898231528697e-07, |
|
"loss": 0.2696, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.104263860258218e-07, |
|
"loss": 0.3353, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.069545405229467e-07, |
|
"loss": 0.3552, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.034826950200716e-07, |
|
"loss": 0.2246, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.000108495171965e-07, |
|
"loss": 0.2622, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.965390040143213e-07, |
|
"loss": 0.286, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.930671585114463e-07, |
|
"loss": 0.2755, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.895953130085711e-07, |
|
"loss": 0.3111, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.861234675056959e-07, |
|
"loss": 0.2431, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.826516220028209e-07, |
|
"loss": 0.2097, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.791797764999457e-07, |
|
"loss": 0.3134, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.757079309970706e-07, |
|
"loss": 0.2535, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.722360854941956e-07, |
|
"loss": 0.3329, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.687642399913204e-07, |
|
"loss": 0.2857, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.652923944884452e-07, |
|
"loss": 0.2697, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.618205489855701e-07, |
|
"loss": 0.2815, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.58348703482695e-07, |
|
"loss": 0.2856, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.548768579798198e-07, |
|
"loss": 0.2912, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.514050124769448e-07, |
|
"loss": 0.2916, |
|
"step": 3808 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.479331669740697e-07, |
|
"loss": 0.308, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.444613214711945e-07, |
|
"loss": 0.2934, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.409894759683194e-07, |
|
"loss": 0.2869, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.375176304654442e-07, |
|
"loss": 0.3232, |
|
"step": 3936 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.340457849625691e-07, |
|
"loss": 0.2486, |
|
"step": 3968 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.305739394596941e-07, |
|
"loss": 0.3123, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.271020939568189e-07, |
|
"loss": 0.3183, |
|
"step": 4032 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.236302484539438e-07, |
|
"loss": 0.3688, |
|
"step": 4064 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.201584029510687e-07, |
|
"loss": 0.3236, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.166865574481935e-07, |
|
"loss": 0.2206, |
|
"step": 4128 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.132147119453183e-07, |
|
"loss": 0.2519, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.097428664424433e-07, |
|
"loss": 0.2467, |
|
"step": 4192 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.062710209395682e-07, |
|
"loss": 0.2555, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.02799175436693e-07, |
|
"loss": 0.2391, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.99327329933818e-07, |
|
"loss": 0.3167, |
|
"step": 4288 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.958554844309428e-07, |
|
"loss": 0.2342, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5.923836389280676e-07, |
|
"loss": 0.1986, |
|
"step": 4352 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.889117934251926e-07, |
|
"loss": 0.2359, |
|
"step": 4384 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.854399479223174e-07, |
|
"loss": 0.2453, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5.819681024194423e-07, |
|
"loss": 0.3077, |
|
"step": 4448 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.784962569165672e-07, |
|
"loss": 0.2052, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.750244114136921e-07, |
|
"loss": 0.2313, |
|
"step": 4512 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.715525659108169e-07, |
|
"loss": 0.3616, |
|
"step": 4544 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.680807204079418e-07, |
|
"loss": 0.2848, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.646088749050667e-07, |
|
"loss": 0.2016, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 5.611370294021915e-07, |
|
"loss": 0.2467, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.576651838993165e-07, |
|
"loss": 0.3043, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.541933383964414e-07, |
|
"loss": 0.2283, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5.507214928935662e-07, |
|
"loss": 0.1914, |
|
"step": 4736 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.472496473906911e-07, |
|
"loss": 0.1776, |
|
"step": 4768 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.437778018878159e-07, |
|
"loss": 0.2395, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.403059563849408e-07, |
|
"loss": 0.3197, |
|
"step": 4832 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.368341108820658e-07, |
|
"loss": 0.2135, |
|
"step": 4864 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.333622653791906e-07, |
|
"loss": 0.2104, |
|
"step": 4896 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5.298904198763155e-07, |
|
"loss": 0.2355, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.264185743734404e-07, |
|
"loss": 0.34, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.229467288705652e-07, |
|
"loss": 0.3004, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5.1947488336769e-07, |
|
"loss": 0.3025, |
|
"step": 5024 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.16003037864815e-07, |
|
"loss": 0.2561, |
|
"step": 5056 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.125311923619399e-07, |
|
"loss": 0.2891, |
|
"step": 5088 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.090593468590647e-07, |
|
"loss": 0.2668, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.055875013561897e-07, |
|
"loss": 0.2539, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.021156558533145e-07, |
|
"loss": 0.2294, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.986438103504394e-07, |
|
"loss": 0.2286, |
|
"step": 5216 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.951719648475642e-07, |
|
"loss": 0.2987, |
|
"step": 5248 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.917001193446891e-07, |
|
"loss": 0.2871, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.882282738418141e-07, |
|
"loss": 0.3253, |
|
"step": 5312 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.847564283389389e-07, |
|
"loss": 0.2106, |
|
"step": 5344 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.812845828360638e-07, |
|
"loss": 0.2428, |
|
"step": 5376 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.778127373331887e-07, |
|
"loss": 0.1568, |
|
"step": 5408 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.743408918303135e-07, |
|
"loss": 0.2879, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.708690463274384e-07, |
|
"loss": 0.2221, |
|
"step": 5472 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.673972008245633e-07, |
|
"loss": 0.3336, |
|
"step": 5504 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.639253553216881e-07, |
|
"loss": 0.2265, |
|
"step": 5536 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.6045350981881303e-07, |
|
"loss": 0.2408, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5698166431593795e-07, |
|
"loss": 0.3562, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5350981881306276e-07, |
|
"loss": 0.1839, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.500379733101877e-07, |
|
"loss": 0.2341, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4656612780731254e-07, |
|
"loss": 0.2683, |
|
"step": 5696 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.430942823044374e-07, |
|
"loss": 0.3978, |
|
"step": 5728 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.396224368015623e-07, |
|
"loss": 0.2493, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.361505912986872e-07, |
|
"loss": 0.28, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.3267874579581205e-07, |
|
"loss": 0.2263, |
|
"step": 5824 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.292069002929369e-07, |
|
"loss": 0.3391, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.2573505479006183e-07, |
|
"loss": 0.3003, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.2226320928718675e-07, |
|
"loss": 0.2635, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.1879136378431156e-07, |
|
"loss": 0.2997, |
|
"step": 5952 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.153195182814365e-07, |
|
"loss": 0.2135, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.1184767277856134e-07, |
|
"loss": 0.2651, |
|
"step": 6016 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.083758272756862e-07, |
|
"loss": 0.1867, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.049039817728111e-07, |
|
"loss": 0.1986, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.01432136269936e-07, |
|
"loss": 0.2905, |
|
"step": 6112 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9796029076706085e-07, |
|
"loss": 0.2226, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.944884452641857e-07, |
|
"loss": 0.2472, |
|
"step": 6176 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9101659976131063e-07, |
|
"loss": 0.2103, |
|
"step": 6208 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.8754475425843544e-07, |
|
"loss": 0.2976, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.8407290875556036e-07, |
|
"loss": 0.2777, |
|
"step": 6272 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.806010632526853e-07, |
|
"loss": 0.2452, |
|
"step": 6304 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.771292177498101e-07, |
|
"loss": 0.2243, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.73657372246935e-07, |
|
"loss": 0.3171, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.7018552674405987e-07, |
|
"loss": 0.2472, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.6671368124118474e-07, |
|
"loss": 0.2778, |
|
"step": 6432 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.6335033091027445e-07, |
|
"loss": 0.2834, |
|
"step": 6464 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.5987848540739937e-07, |
|
"loss": 0.23, |
|
"step": 6496 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.5640663990452423e-07, |
|
"loss": 0.2185, |
|
"step": 6528 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.529347944016491e-07, |
|
"loss": 0.282, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.4946294889877396e-07, |
|
"loss": 0.1885, |
|
"step": 6592 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.459911033958989e-07, |
|
"loss": 0.263, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.425192578930238e-07, |
|
"loss": 0.2431, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.390474123901486e-07, |
|
"loss": 0.2472, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.355755668872735e-07, |
|
"loss": 0.284, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.321037213843984e-07, |
|
"loss": 0.2947, |
|
"step": 6752 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.2863187588152325e-07, |
|
"loss": 0.2448, |
|
"step": 6784 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.251600303786481e-07, |
|
"loss": 0.3035, |
|
"step": 6816 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.2168818487577303e-07, |
|
"loss": 0.3378, |
|
"step": 6848 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.182163393728979e-07, |
|
"loss": 0.2878, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.1474449387002276e-07, |
|
"loss": 0.1969, |
|
"step": 6912 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.112726483671477e-07, |
|
"loss": 0.2282, |
|
"step": 6944 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.078008028642725e-07, |
|
"loss": 0.2472, |
|
"step": 6976 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.043289573613974e-07, |
|
"loss": 0.3127, |
|
"step": 7008 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.008571118585223e-07, |
|
"loss": 0.3757, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9738526635564714e-07, |
|
"loss": 0.2801, |
|
"step": 7072 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.9402191602473685e-07, |
|
"loss": 0.3094, |
|
"step": 7104 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.9055007052186177e-07, |
|
"loss": 0.3006, |
|
"step": 7136 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.8707822501898663e-07, |
|
"loss": 0.2431, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.836063795161115e-07, |
|
"loss": 0.217, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.801345340132364e-07, |
|
"loss": 0.1981, |
|
"step": 7232 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.766626885103613e-07, |
|
"loss": 0.242, |
|
"step": 7264 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.7319084300748614e-07, |
|
"loss": 0.2426, |
|
"step": 7296 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.69718997504611e-07, |
|
"loss": 0.2771, |
|
"step": 7328 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.662471520017359e-07, |
|
"loss": 0.2508, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.6277530649886084e-07, |
|
"loss": 0.2495, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5930346099598565e-07, |
|
"loss": 0.2353, |
|
"step": 7424 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5583161549311057e-07, |
|
"loss": 0.2047, |
|
"step": 7456 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.5235976999023543e-07, |
|
"loss": 0.36, |
|
"step": 7488 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.488879244873603e-07, |
|
"loss": 0.2685, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.4541607898448516e-07, |
|
"loss": 0.2505, |
|
"step": 7552 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.419442334816101e-07, |
|
"loss": 0.2223, |
|
"step": 7584 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.3847238797873494e-07, |
|
"loss": 0.2466, |
|
"step": 7616 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.350005424758598e-07, |
|
"loss": 0.2872, |
|
"step": 7648 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.315286969729847e-07, |
|
"loss": 0.2257, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2805685147010956e-07, |
|
"loss": 0.2488, |
|
"step": 7712 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2458500596723445e-07, |
|
"loss": 0.3667, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2111316046435932e-07, |
|
"loss": 0.2617, |
|
"step": 7776 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1764131496148418e-07, |
|
"loss": 0.1686, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.141694694586091e-07, |
|
"loss": 0.2216, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.1069762395573396e-07, |
|
"loss": 0.2418, |
|
"step": 7872 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0722577845285885e-07, |
|
"loss": 0.2023, |
|
"step": 7904 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0375393294998372e-07, |
|
"loss": 0.3171, |
|
"step": 7936 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.0028208744710858e-07, |
|
"loss": 0.2518, |
|
"step": 7968 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9681024194423347e-07, |
|
"loss": 0.2388, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.9333839644135836e-07, |
|
"loss": 0.2612, |
|
"step": 8032 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.8986655093848323e-07, |
|
"loss": 0.3058, |
|
"step": 8064 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8639470543560812e-07, |
|
"loss": 0.2292, |
|
"step": 8096 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.8292285993273298e-07, |
|
"loss": 0.3214, |
|
"step": 8128 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.7945101442985785e-07, |
|
"loss": 0.2081, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7597916892698276e-07, |
|
"loss": 0.3473, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.7250732342410763e-07, |
|
"loss": 0.2174, |
|
"step": 8224 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.690354779212325e-07, |
|
"loss": 0.2021, |
|
"step": 8256 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6556363241835738e-07, |
|
"loss": 0.2808, |
|
"step": 8288 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6209178691548225e-07, |
|
"loss": 0.2753, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.586199414126071e-07, |
|
"loss": 0.1965, |
|
"step": 8352 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.5514809590973203e-07, |
|
"loss": 0.2076, |
|
"step": 8384 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.516762504068569e-07, |
|
"loss": 0.2856, |
|
"step": 8416 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4820440490398176e-07, |
|
"loss": 0.2812, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4473255940110665e-07, |
|
"loss": 0.28, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.412607138982315e-07, |
|
"loss": 0.2871, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.377888683953564e-07, |
|
"loss": 0.2063, |
|
"step": 8544 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.343170228924813e-07, |
|
"loss": 0.2929, |
|
"step": 8576 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.3084517738960616e-07, |
|
"loss": 0.2784, |
|
"step": 8608 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2737333188673102e-07, |
|
"loss": 0.2889, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.239014863838559e-07, |
|
"loss": 0.2367, |
|
"step": 8672 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2042964088098078e-07, |
|
"loss": 0.2572, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1695779537810567e-07, |
|
"loss": 0.2167, |
|
"step": 8736 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1348594987523056e-07, |
|
"loss": 0.2448, |
|
"step": 8768 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1001410437235542e-07, |
|
"loss": 0.2384, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.065422588694803e-07, |
|
"loss": 0.2742, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0307041336660519e-07, |
|
"loss": 0.2615, |
|
"step": 8864 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.959856786373005e-08, |
|
"loss": 0.2511, |
|
"step": 8896 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.612672236085493e-08, |
|
"loss": 0.218, |
|
"step": 8928 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.265487685797982e-08, |
|
"loss": 0.1846, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.918303135510468e-08, |
|
"loss": 0.2521, |
|
"step": 8992 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.571118585222958e-08, |
|
"loss": 0.1836, |
|
"step": 9024 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.223934034935445e-08, |
|
"loss": 0.2421, |
|
"step": 9056 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.876749484647933e-08, |
|
"loss": 0.2631, |
|
"step": 9088 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.529564934360421e-08, |
|
"loss": 0.2602, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.182380384072909e-08, |
|
"loss": 0.2896, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.835195833785396e-08, |
|
"loss": 0.2901, |
|
"step": 9184 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.488011283497884e-08, |
|
"loss": 0.2488, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.140826733210372e-08, |
|
"loss": 0.2209, |
|
"step": 9248 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.7936421829228595e-08, |
|
"loss": 0.2494, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.446457632635348e-08, |
|
"loss": 0.251, |
|
"step": 9312 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 5.099273082347835e-08, |
|
"loss": 0.2243, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.752088532060323e-08, |
|
"loss": 0.259, |
|
"step": 9376 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.404903981772811e-08, |
|
"loss": 0.267, |
|
"step": 9408 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.057719431485299e-08, |
|
"loss": 0.2674, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7105348811977866e-08, |
|
"loss": 0.2383, |
|
"step": 9472 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.363350330910274e-08, |
|
"loss": 0.2545, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.016165780622762e-08, |
|
"loss": 0.2904, |
|
"step": 9536 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.6689812303352498e-08, |
|
"loss": 0.2211, |
|
"step": 9568 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.321796680047738e-08, |
|
"loss": 0.2802, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.9746121297602256e-08, |
|
"loss": 0.2471, |
|
"step": 9632 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.6274275794727136e-08, |
|
"loss": 0.2593, |
|
"step": 9664 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2802430291852012e-08, |
|
"loss": 0.1877, |
|
"step": 9696 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.33058478897689e-09, |
|
"loss": 0.2619, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.858739286101768e-09, |
|
"loss": 0.1832, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.3868937832266464e-09, |
|
"loss": 0.1479, |
|
"step": 9792 |
|
} |
|
], |
|
"logging_steps": 32, |
|
"max_steps": 9805, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9804, |
|
"total_flos": 4.16311373758464e+19, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|