|
{ |
|
"best_metric": 4.34278678894043, |
|
"best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/binding-case/lstm/2/checkpoints/checkpoint-228960", |
|
"epoch": 0.025000606015738065, |
|
"eval_steps": 10, |
|
"global_step": 228960, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999998362119627e-05, |
|
"loss": 10.8202, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999161405248948e-05, |
|
"loss": 7.5553, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.998322810497896e-05, |
|
"loss": 7.0623, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.997484215746844e-05, |
|
"loss": 6.9941, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.996645620995792e-05, |
|
"loss": 6.9472, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99580702624474e-05, |
|
"loss": 6.9029, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994968431493688e-05, |
|
"loss": 6.7368, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994129836742636e-05, |
|
"loss": 6.6252, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.993291241991584e-05, |
|
"loss": 6.5249, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.992452647240532e-05, |
|
"loss": 6.4532, |
|
"step": 4608 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99161405248948e-05, |
|
"loss": 6.3903, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.990775457738428e-05, |
|
"loss": 6.3296, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.989936862987376e-05, |
|
"loss": 6.2678, |
|
"step": 6144 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.989098268236324e-05, |
|
"loss": 6.2035, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.988259673485272e-05, |
|
"loss": 6.1464, |
|
"step": 7168 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.98742107873422e-05, |
|
"loss": 6.0874, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.986582483983168e-05, |
|
"loss": 6.0408, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.985743889232116e-05, |
|
"loss": 5.9952, |
|
"step": 8704 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.984905294481064e-05, |
|
"loss": 5.9558, |
|
"step": 9216 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.984066699730012e-05, |
|
"loss": 5.9191, |
|
"step": 9728 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.983229742859333e-05, |
|
"loss": 5.8856, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.982391148108281e-05, |
|
"loss": 5.8505, |
|
"step": 10752 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.981552553357229e-05, |
|
"loss": 5.8194, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.980713958606178e-05, |
|
"loss": 5.7915, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9798770017354986e-05, |
|
"loss": 5.7743, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9790384069844466e-05, |
|
"loss": 5.7353, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9781998122333946e-05, |
|
"loss": 5.7141, |
|
"step": 13312 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9773612174823426e-05, |
|
"loss": 5.6887, |
|
"step": 13824 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9765242606116635e-05, |
|
"loss": 5.6688, |
|
"step": 14336 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9756856658606115e-05, |
|
"loss": 5.6429, |
|
"step": 14848 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9748470711095595e-05, |
|
"loss": 5.6182, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.974008476358507e-05, |
|
"loss": 5.6126, |
|
"step": 15872 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.973169881607455e-05, |
|
"loss": 5.5839, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.972331286856403e-05, |
|
"loss": 5.5749, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9714926921053515e-05, |
|
"loss": 5.5602, |
|
"step": 17408 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9706540973542995e-05, |
|
"loss": 5.5357, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9698171404836204e-05, |
|
"loss": 5.5246, |
|
"step": 18432 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.968980183612942e-05, |
|
"loss": 5.4853, |
|
"step": 18944 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.96814158886189e-05, |
|
"loss": 5.4867, |
|
"step": 19456 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.967302994110837e-05, |
|
"loss": 5.4507, |
|
"step": 19968 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.966464399359785e-05, |
|
"loss": 5.4514, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.965627442489107e-05, |
|
"loss": 5.4323, |
|
"step": 20992 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.964788847738054e-05, |
|
"loss": 5.4321, |
|
"step": 21504 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.963950252987002e-05, |
|
"loss": 5.4055, |
|
"step": 22016 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.96311165823595e-05, |
|
"loss": 5.3987, |
|
"step": 22528 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.962274701365272e-05, |
|
"loss": 5.3903, |
|
"step": 23040 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.96143610661422e-05, |
|
"loss": 5.3724, |
|
"step": 23552 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.960597511863168e-05, |
|
"loss": 5.3771, |
|
"step": 24064 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.959758917112116e-05, |
|
"loss": 5.3481, |
|
"step": 24576 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.958920322361064e-05, |
|
"loss": 5.3317, |
|
"step": 25088 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.958083365490385e-05, |
|
"loss": 5.3419, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.957244770739333e-05, |
|
"loss": 5.336, |
|
"step": 26112 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.956406175988281e-05, |
|
"loss": 5.3088, |
|
"step": 26624 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.955567581237229e-05, |
|
"loss": 5.2988, |
|
"step": 27136 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.954728986486177e-05, |
|
"loss": 5.2973, |
|
"step": 27648 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.953890391735125e-05, |
|
"loss": 5.2805, |
|
"step": 28160 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9530517969840727e-05, |
|
"loss": 5.295, |
|
"step": 28672 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9522132022330207e-05, |
|
"loss": 5.2557, |
|
"step": 29184 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.951376245362342e-05, |
|
"loss": 5.2554, |
|
"step": 29696 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.950539288491663e-05, |
|
"loss": 5.248, |
|
"step": 30208 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.949700693740611e-05, |
|
"loss": 5.236, |
|
"step": 30720 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.948862098989559e-05, |
|
"loss": 5.2383, |
|
"step": 31232 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.948023504238507e-05, |
|
"loss": 5.2159, |
|
"step": 31744 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.947184909487455e-05, |
|
"loss": 5.2093, |
|
"step": 32256 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.946347952616776e-05, |
|
"loss": 5.2024, |
|
"step": 32768 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.945509357865724e-05, |
|
"loss": 5.2154, |
|
"step": 33280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.944670763114672e-05, |
|
"loss": 5.1879, |
|
"step": 33792 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.94383216836362e-05, |
|
"loss": 5.1856, |
|
"step": 34304 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.942995211492941e-05, |
|
"loss": 5.1594, |
|
"step": 34816 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.942156616741889e-05, |
|
"loss": 5.167, |
|
"step": 35328 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.941318021990837e-05, |
|
"loss": 5.1745, |
|
"step": 35840 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9404794272397856e-05, |
|
"loss": 5.1674, |
|
"step": 36352 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9396408324887336e-05, |
|
"loss": 5.1439, |
|
"step": 36864 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9388038756180545e-05, |
|
"loss": 5.1575, |
|
"step": 37376 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9379652808670025e-05, |
|
"loss": 5.1534, |
|
"step": 37888 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9371266861159505e-05, |
|
"loss": 5.1289, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9362880913648985e-05, |
|
"loss": 5.1265, |
|
"step": 38912 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9354511344942194e-05, |
|
"loss": 5.1098, |
|
"step": 39424 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9346125397431674e-05, |
|
"loss": 5.0966, |
|
"step": 39936 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9337739449921154e-05, |
|
"loss": 5.0944, |
|
"step": 40448 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9329353502410634e-05, |
|
"loss": 5.0978, |
|
"step": 40960 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9320967554900114e-05, |
|
"loss": 5.0972, |
|
"step": 41472 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9312581607389594e-05, |
|
"loss": 5.103, |
|
"step": 41984 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.930421203868281e-05, |
|
"loss": 5.0877, |
|
"step": 42496 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.929582609117229e-05, |
|
"loss": 5.0679, |
|
"step": 43008 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.928744014366177e-05, |
|
"loss": 5.0684, |
|
"step": 43520 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.927905419615125e-05, |
|
"loss": 5.0691, |
|
"step": 44032 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.927066824864073e-05, |
|
"loss": 5.0567, |
|
"step": 44544 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.926229867993394e-05, |
|
"loss": 5.049, |
|
"step": 45056 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.925392911122715e-05, |
|
"loss": 5.0376, |
|
"step": 45568 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.924554316371663e-05, |
|
"loss": 5.0369, |
|
"step": 46080 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.923715721620611e-05, |
|
"loss": 5.039, |
|
"step": 46592 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.922877126869559e-05, |
|
"loss": 5.0247, |
|
"step": 47104 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.92204016999888e-05, |
|
"loss": 5.0286, |
|
"step": 47616 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.921201575247828e-05, |
|
"loss": 5.015, |
|
"step": 48128 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.920362980496776e-05, |
|
"loss": 5.0109, |
|
"step": 48640 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.919524385745724e-05, |
|
"loss": 4.9984, |
|
"step": 49152 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.918685790994672e-05, |
|
"loss": 4.996, |
|
"step": 49664 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.91784719624362e-05, |
|
"loss": 4.9887, |
|
"step": 50176 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9170086014925676e-05, |
|
"loss": 4.9917, |
|
"step": 50688 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9161700067415156e-05, |
|
"loss": 4.9933, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.915333049870837e-05, |
|
"loss": 4.9709, |
|
"step": 51712 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.914494455119785e-05, |
|
"loss": 4.9769, |
|
"step": 52224 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9136558603687325e-05, |
|
"loss": 4.9642, |
|
"step": 52736 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.912818903498054e-05, |
|
"loss": 4.9613, |
|
"step": 53248 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.911980308747002e-05, |
|
"loss": 4.9545, |
|
"step": 53760 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.91114171399595e-05, |
|
"loss": 4.9482, |
|
"step": 54272 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.910303119244898e-05, |
|
"loss": 4.9476, |
|
"step": 54784 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.909464524493846e-05, |
|
"loss": 4.9395, |
|
"step": 55296 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.908625929742794e-05, |
|
"loss": 4.9365, |
|
"step": 55808 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.907787334991742e-05, |
|
"loss": 4.9292, |
|
"step": 56320 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.90694874024069e-05, |
|
"loss": 4.9193, |
|
"step": 56832 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.906111783370011e-05, |
|
"loss": 4.9308, |
|
"step": 57344 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.905273188618959e-05, |
|
"loss": 4.9186, |
|
"step": 57856 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.904434593867907e-05, |
|
"loss": 4.9197, |
|
"step": 58368 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.903595999116855e-05, |
|
"loss": 4.9151, |
|
"step": 58880 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.902759042246176e-05, |
|
"loss": 4.916, |
|
"step": 59392 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.901920447495124e-05, |
|
"loss": 4.9066, |
|
"step": 59904 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.901081852744072e-05, |
|
"loss": 4.8912, |
|
"step": 60416 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.90024325799302e-05, |
|
"loss": 4.9003, |
|
"step": 60928 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8994046632419686e-05, |
|
"loss": 4.9049, |
|
"step": 61440 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8985660684909166e-05, |
|
"loss": 4.8902, |
|
"step": 61952 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8977291116202375e-05, |
|
"loss": 4.883, |
|
"step": 62464 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8968905168691855e-05, |
|
"loss": 4.8822, |
|
"step": 62976 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8960519221181335e-05, |
|
"loss": 4.8787, |
|
"step": 63488 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8952133273670815e-05, |
|
"loss": 4.8699, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8943747326160294e-05, |
|
"loss": 4.8723, |
|
"step": 64512 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8935377757453504e-05, |
|
"loss": 4.8611, |
|
"step": 65024 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.892700818874671e-05, |
|
"loss": 4.8637, |
|
"step": 65536 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.891862224123619e-05, |
|
"loss": 4.8533, |
|
"step": 66048 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.891023629372567e-05, |
|
"loss": 4.871, |
|
"step": 66560 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.890185034621515e-05, |
|
"loss": 4.8541, |
|
"step": 67072 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.889346439870464e-05, |
|
"loss": 4.8452, |
|
"step": 67584 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.888507845119412e-05, |
|
"loss": 4.8415, |
|
"step": 68096 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.88766925036836e-05, |
|
"loss": 4.8438, |
|
"step": 68608 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.886830655617308e-05, |
|
"loss": 4.8436, |
|
"step": 69120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.885993698746629e-05, |
|
"loss": 4.8439, |
|
"step": 69632 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.885155103995577e-05, |
|
"loss": 4.8322, |
|
"step": 70144 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.884316509244525e-05, |
|
"loss": 4.833, |
|
"step": 70656 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.883477914493473e-05, |
|
"loss": 4.8285, |
|
"step": 71168 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.882640957622794e-05, |
|
"loss": 4.8268, |
|
"step": 71680 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.881802362871742e-05, |
|
"loss": 4.8145, |
|
"step": 72192 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.88096376812069e-05, |
|
"loss": 4.8258, |
|
"step": 72704 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.880125173369638e-05, |
|
"loss": 4.8122, |
|
"step": 73216 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8792882164989586e-05, |
|
"loss": 4.7994, |
|
"step": 73728 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.878449621747907e-05, |
|
"loss": 4.813, |
|
"step": 74240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.877611026996855e-05, |
|
"loss": 4.8029, |
|
"step": 74752 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.876772432245803e-05, |
|
"loss": 4.8033, |
|
"step": 75264 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.875933837494751e-05, |
|
"loss": 4.8089, |
|
"step": 75776 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8750952427436986e-05, |
|
"loss": 4.7896, |
|
"step": 76288 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 4.756977081298828, |
|
"eval_runtime": 297.0801, |
|
"eval_samples_per_second": 1284.472, |
|
"eval_steps_per_second": 40.141, |
|
"step": 76320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8742566479926466e-05, |
|
"loss": 4.7824, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8734180532415946e-05, |
|
"loss": 4.785, |
|
"step": 77312 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.872581096370916e-05, |
|
"loss": 4.7887, |
|
"step": 77824 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.871744139500237e-05, |
|
"loss": 4.7825, |
|
"step": 78336 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.870905544749185e-05, |
|
"loss": 4.7872, |
|
"step": 78848 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.870066949998133e-05, |
|
"loss": 4.7708, |
|
"step": 79360 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.869228355247081e-05, |
|
"loss": 4.7773, |
|
"step": 79872 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.868389760496029e-05, |
|
"loss": 4.7645, |
|
"step": 80384 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.867551165744977e-05, |
|
"loss": 4.773, |
|
"step": 80896 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.866712570993925e-05, |
|
"loss": 4.7612, |
|
"step": 81408 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.865875614123246e-05, |
|
"loss": 4.7583, |
|
"step": 81920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.865037019372194e-05, |
|
"loss": 4.7589, |
|
"step": 82432 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8642000625015155e-05, |
|
"loss": 4.753, |
|
"step": 82944 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8633614677504635e-05, |
|
"loss": 4.7484, |
|
"step": 83456 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.862522872999411e-05, |
|
"loss": 4.7442, |
|
"step": 83968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.861684278248359e-05, |
|
"loss": 4.7312, |
|
"step": 84480 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.860845683497307e-05, |
|
"loss": 4.7409, |
|
"step": 84992 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.860007088746255e-05, |
|
"loss": 4.735, |
|
"step": 85504 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.859168493995203e-05, |
|
"loss": 4.7282, |
|
"step": 86016 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.858329899244151e-05, |
|
"loss": 4.7471, |
|
"step": 86528 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8574913044930995e-05, |
|
"loss": 4.7299, |
|
"step": 87040 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8566527097420475e-05, |
|
"loss": 4.733, |
|
"step": 87552 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8558141149909955e-05, |
|
"loss": 4.7264, |
|
"step": 88064 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8549771581203164e-05, |
|
"loss": 4.7431, |
|
"step": 88576 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.854140201249637e-05, |
|
"loss": 4.7129, |
|
"step": 89088 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.853301606498585e-05, |
|
"loss": 4.7131, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.852463011747533e-05, |
|
"loss": 4.7177, |
|
"step": 90112 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.851624416996481e-05, |
|
"loss": 4.7192, |
|
"step": 90624 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.850785822245429e-05, |
|
"loss": 4.7046, |
|
"step": 91136 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.849947227494377e-05, |
|
"loss": 4.7045, |
|
"step": 91648 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.849108632743325e-05, |
|
"loss": 4.7123, |
|
"step": 92160 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.848270037992273e-05, |
|
"loss": 4.7097, |
|
"step": 92672 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.847433081121595e-05, |
|
"loss": 4.711, |
|
"step": 93184 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.846594486370543e-05, |
|
"loss": 4.7071, |
|
"step": 93696 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.845755891619491e-05, |
|
"loss": 4.6983, |
|
"step": 94208 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.844917296868439e-05, |
|
"loss": 4.7047, |
|
"step": 94720 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.84408033999776e-05, |
|
"loss": 4.6755, |
|
"step": 95232 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.843241745246708e-05, |
|
"loss": 4.6972, |
|
"step": 95744 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.842403150495656e-05, |
|
"loss": 4.6709, |
|
"step": 96256 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.841564555744604e-05, |
|
"loss": 4.6846, |
|
"step": 96768 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.840725960993552e-05, |
|
"loss": 4.6764, |
|
"step": 97280 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8398873662425e-05, |
|
"loss": 4.6888, |
|
"step": 97792 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.839048771491447e-05, |
|
"loss": 4.6775, |
|
"step": 98304 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.838210176740395e-05, |
|
"loss": 4.6735, |
|
"step": 98816 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8373732198697167e-05, |
|
"loss": 4.6763, |
|
"step": 99328 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8365346251186646e-05, |
|
"loss": 4.6684, |
|
"step": 99840 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8356960303676126e-05, |
|
"loss": 4.682, |
|
"step": 100352 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8348574356165606e-05, |
|
"loss": 4.6582, |
|
"step": 100864 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8340188408655086e-05, |
|
"loss": 4.6516, |
|
"step": 101376 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8331802461144566e-05, |
|
"loss": 4.6727, |
|
"step": 101888 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8323416513634046e-05, |
|
"loss": 4.6695, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8315046944927255e-05, |
|
"loss": 4.6538, |
|
"step": 102912 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8306660997416735e-05, |
|
"loss": 4.6483, |
|
"step": 103424 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8298275049906215e-05, |
|
"loss": 4.656, |
|
"step": 103936 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8289905481199424e-05, |
|
"loss": 4.6427, |
|
"step": 104448 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8281519533688904e-05, |
|
"loss": 4.6653, |
|
"step": 104960 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8273133586178384e-05, |
|
"loss": 4.6398, |
|
"step": 105472 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.826474763866787e-05, |
|
"loss": 4.6462, |
|
"step": 105984 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.825636169115735e-05, |
|
"loss": 4.6412, |
|
"step": 106496 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.824797574364683e-05, |
|
"loss": 4.634, |
|
"step": 107008 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.823960617494004e-05, |
|
"loss": 4.6454, |
|
"step": 107520 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.823122022742952e-05, |
|
"loss": 4.6314, |
|
"step": 108032 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8222834279919e-05, |
|
"loss": 4.6268, |
|
"step": 108544 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.821444833240848e-05, |
|
"loss": 4.6282, |
|
"step": 109056 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.820606238489796e-05, |
|
"loss": 4.6439, |
|
"step": 109568 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.819767643738744e-05, |
|
"loss": 4.6249, |
|
"step": 110080 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.818929048987692e-05, |
|
"loss": 4.6211, |
|
"step": 110592 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.81809045423664e-05, |
|
"loss": 4.6151, |
|
"step": 111104 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.817255135246334e-05, |
|
"loss": 4.618, |
|
"step": 111616 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.816416540495282e-05, |
|
"loss": 4.6373, |
|
"step": 112128 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8155779457442305e-05, |
|
"loss": 4.6324, |
|
"step": 112640 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8147393509931785e-05, |
|
"loss": 4.6143, |
|
"step": 113152 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8139007562421265e-05, |
|
"loss": 4.6287, |
|
"step": 113664 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8130621614910745e-05, |
|
"loss": 4.632, |
|
"step": 114176 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8122252046203954e-05, |
|
"loss": 4.6175, |
|
"step": 114688 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8113866098693434e-05, |
|
"loss": 4.6175, |
|
"step": 115200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8105480151182914e-05, |
|
"loss": 4.6072, |
|
"step": 115712 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8097094203672393e-05, |
|
"loss": 4.5947, |
|
"step": 116224 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8088708256161873e-05, |
|
"loss": 4.5987, |
|
"step": 116736 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.808032230865135e-05, |
|
"loss": 4.6039, |
|
"step": 117248 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8071936361140827e-05, |
|
"loss": 4.6066, |
|
"step": 117760 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.8063550413630307e-05, |
|
"loss": 4.6237, |
|
"step": 118272 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.805518084492352e-05, |
|
"loss": 4.609, |
|
"step": 118784 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.804681127621674e-05, |
|
"loss": 4.59, |
|
"step": 119296 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.803842532870622e-05, |
|
"loss": 4.5987, |
|
"step": 119808 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.80300393811957e-05, |
|
"loss": 4.6011, |
|
"step": 120320 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.802165343368518e-05, |
|
"loss": 4.5985, |
|
"step": 120832 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.801326748617466e-05, |
|
"loss": 4.5894, |
|
"step": 121344 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.800488153866413e-05, |
|
"loss": 4.5868, |
|
"step": 121856 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.799651196995735e-05, |
|
"loss": 4.5881, |
|
"step": 122368 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.798812602244683e-05, |
|
"loss": 4.5923, |
|
"step": 122880 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.79797400749363e-05, |
|
"loss": 4.5833, |
|
"step": 123392 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.797135412742578e-05, |
|
"loss": 4.5894, |
|
"step": 123904 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7963000937522725e-05, |
|
"loss": 4.5841, |
|
"step": 124416 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.795461499001221e-05, |
|
"loss": 4.582, |
|
"step": 124928 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.794622904250169e-05, |
|
"loss": 4.5716, |
|
"step": 125440 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.793784309499117e-05, |
|
"loss": 4.5778, |
|
"step": 125952 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.792945714748065e-05, |
|
"loss": 4.5731, |
|
"step": 126464 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.792107119997013e-05, |
|
"loss": 4.5754, |
|
"step": 126976 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7912685252459605e-05, |
|
"loss": 4.5787, |
|
"step": 127488 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7904299304949085e-05, |
|
"loss": 4.5653, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7895913357438565e-05, |
|
"loss": 4.5691, |
|
"step": 128512 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7887527409928045e-05, |
|
"loss": 4.5652, |
|
"step": 129024 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7879141462417525e-05, |
|
"loss": 4.5641, |
|
"step": 129536 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7870755514907005e-05, |
|
"loss": 4.5573, |
|
"step": 130048 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.786240232500395e-05, |
|
"loss": 4.5585, |
|
"step": 130560 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.785401637749343e-05, |
|
"loss": 4.5627, |
|
"step": 131072 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.784563042998291e-05, |
|
"loss": 4.5528, |
|
"step": 131584 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.783724448247239e-05, |
|
"loss": 4.5567, |
|
"step": 132096 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.782885853496187e-05, |
|
"loss": 4.5498, |
|
"step": 132608 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.782047258745135e-05, |
|
"loss": 4.5415, |
|
"step": 133120 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.781208663994083e-05, |
|
"loss": 4.5631, |
|
"step": 133632 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.780370069243031e-05, |
|
"loss": 4.5487, |
|
"step": 134144 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.779533112372352e-05, |
|
"loss": 4.5546, |
|
"step": 134656 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7786945176213e-05, |
|
"loss": 4.549, |
|
"step": 135168 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.777855922870248e-05, |
|
"loss": 4.5603, |
|
"step": 135680 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.777017328119196e-05, |
|
"loss": 4.5498, |
|
"step": 136192 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.776178733368144e-05, |
|
"loss": 4.5354, |
|
"step": 136704 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.775340138617092e-05, |
|
"loss": 4.548, |
|
"step": 137216 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.77450154386604e-05, |
|
"loss": 4.5517, |
|
"step": 137728 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.773666224875734e-05, |
|
"loss": 4.5378, |
|
"step": 138240 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.772829268005055e-05, |
|
"loss": 4.5408, |
|
"step": 138752 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.771990673254003e-05, |
|
"loss": 4.5351, |
|
"step": 139264 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.771152078502951e-05, |
|
"loss": 4.5433, |
|
"step": 139776 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.770313483751899e-05, |
|
"loss": 4.5304, |
|
"step": 140288 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.769474889000847e-05, |
|
"loss": 4.5364, |
|
"step": 140800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.768636294249795e-05, |
|
"loss": 4.5291, |
|
"step": 141312 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.767797699498743e-05, |
|
"loss": 4.5349, |
|
"step": 141824 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.766959104747691e-05, |
|
"loss": 4.5229, |
|
"step": 142336 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.766120509996639e-05, |
|
"loss": 4.5462, |
|
"step": 142848 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.765281915245587e-05, |
|
"loss": 4.5354, |
|
"step": 143360 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.764443320494535e-05, |
|
"loss": 4.5208, |
|
"step": 143872 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.763606363623857e-05, |
|
"loss": 4.5247, |
|
"step": 144384 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.762767768872805e-05, |
|
"loss": 4.5259, |
|
"step": 144896 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.761929174121753e-05, |
|
"loss": 4.5286, |
|
"step": 145408 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.761090579370701e-05, |
|
"loss": 4.5371, |
|
"step": 145920 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.760253622500022e-05, |
|
"loss": 4.5188, |
|
"step": 146432 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.75941502774897e-05, |
|
"loss": 4.5247, |
|
"step": 146944 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7585780708782906e-05, |
|
"loss": 4.5206, |
|
"step": 147456 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7577394761272386e-05, |
|
"loss": 4.5246, |
|
"step": 147968 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7569008813761866e-05, |
|
"loss": 4.5155, |
|
"step": 148480 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7560622866251346e-05, |
|
"loss": 4.524, |
|
"step": 148992 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7552236918740826e-05, |
|
"loss": 4.5126, |
|
"step": 149504 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7543850971230306e-05, |
|
"loss": 4.5083, |
|
"step": 150016 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7535465023719786e-05, |
|
"loss": 4.5211, |
|
"step": 150528 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7527079076209266e-05, |
|
"loss": 4.5121, |
|
"step": 151040 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7518693128698745e-05, |
|
"loss": 4.513, |
|
"step": 151552 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.751032355999196e-05, |
|
"loss": 4.5134, |
|
"step": 152064 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.7501937612481435e-05, |
|
"loss": 4.5056, |
|
"step": 152576 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 4.477574825286865, |
|
"eval_runtime": 297.0621, |
|
"eval_samples_per_second": 1284.549, |
|
"eval_steps_per_second": 40.143, |
|
"step": 152640 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7493551664970914e-05, |
|
"loss": 4.4977, |
|
"step": 153088 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7485165717460394e-05, |
|
"loss": 4.4976, |
|
"step": 153600 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7476779769949874e-05, |
|
"loss": 4.5111, |
|
"step": 154112 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7468393822439354e-05, |
|
"loss": 4.5006, |
|
"step": 154624 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7460007874928834e-05, |
|
"loss": 4.5118, |
|
"step": 155136 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7451621927418314e-05, |
|
"loss": 4.4969, |
|
"step": 155648 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7443235979907794e-05, |
|
"loss": 4.501, |
|
"step": 156160 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7434850032397274e-05, |
|
"loss": 4.4915, |
|
"step": 156672 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7426464084886754e-05, |
|
"loss": 4.5066, |
|
"step": 157184 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7418078137376234e-05, |
|
"loss": 4.4929, |
|
"step": 157696 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.740969218986572e-05, |
|
"loss": 4.4928, |
|
"step": 158208 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.74013062423552e-05, |
|
"loss": 4.4918, |
|
"step": 158720 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.739293667364841e-05, |
|
"loss": 4.4886, |
|
"step": 159232 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.738455072613789e-05, |
|
"loss": 4.4846, |
|
"step": 159744 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.737616477862737e-05, |
|
"loss": 4.4863, |
|
"step": 160256 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.736777883111685e-05, |
|
"loss": 4.4744, |
|
"step": 160768 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.735939288360632e-05, |
|
"loss": 4.4831, |
|
"step": 161280 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.73510069360958e-05, |
|
"loss": 4.4739, |
|
"step": 161792 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.734262098858528e-05, |
|
"loss": 4.4726, |
|
"step": 162304 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.733423504107476e-05, |
|
"loss": 4.4999, |
|
"step": 162816 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.732584909356424e-05, |
|
"loss": 4.4791, |
|
"step": 163328 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.731746314605372e-05, |
|
"loss": 4.4753, |
|
"step": 163840 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.73090771985432e-05, |
|
"loss": 4.4806, |
|
"step": 164352 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.730070762983642e-05, |
|
"loss": 4.4943, |
|
"step": 164864 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.72923216823259e-05, |
|
"loss": 4.4687, |
|
"step": 165376 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.728393573481538e-05, |
|
"loss": 4.4713, |
|
"step": 165888 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.727554978730486e-05, |
|
"loss": 4.4727, |
|
"step": 166400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.726716383979434e-05, |
|
"loss": 4.4781, |
|
"step": 166912 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.725877789228382e-05, |
|
"loss": 4.4625, |
|
"step": 167424 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.72503919447733e-05, |
|
"loss": 4.467, |
|
"step": 167936 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.724200599726278e-05, |
|
"loss": 4.4717, |
|
"step": 168448 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.723363642855599e-05, |
|
"loss": 4.4739, |
|
"step": 168960 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7225266859849196e-05, |
|
"loss": 4.4803, |
|
"step": 169472 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7216880912338676e-05, |
|
"loss": 4.4712, |
|
"step": 169984 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7208494964828156e-05, |
|
"loss": 4.4633, |
|
"step": 170496 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.720010901731764e-05, |
|
"loss": 4.471, |
|
"step": 171008 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.719172306980712e-05, |
|
"loss": 4.4517, |
|
"step": 171520 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.71833371222966e-05, |
|
"loss": 4.4649, |
|
"step": 172032 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.717495117478608e-05, |
|
"loss": 4.4457, |
|
"step": 172544 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.716656522727556e-05, |
|
"loss": 4.4546, |
|
"step": 173056 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.715819565856877e-05, |
|
"loss": 4.4561, |
|
"step": 173568 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.714980971105825e-05, |
|
"loss": 4.4646, |
|
"step": 174080 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.714142376354773e-05, |
|
"loss": 4.4558, |
|
"step": 174592 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.713303781603721e-05, |
|
"loss": 4.4519, |
|
"step": 175104 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.712466824733042e-05, |
|
"loss": 4.4541, |
|
"step": 175616 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.71162822998199e-05, |
|
"loss": 4.4497, |
|
"step": 176128 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.710789635230938e-05, |
|
"loss": 4.4615, |
|
"step": 176640 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.709951040479886e-05, |
|
"loss": 4.4431, |
|
"step": 177152 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.709114083609208e-05, |
|
"loss": 4.432, |
|
"step": 177664 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.708275488858156e-05, |
|
"loss": 4.4573, |
|
"step": 178176 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7074368941071037e-05, |
|
"loss": 4.4522, |
|
"step": 178688 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7065982993560517e-05, |
|
"loss": 4.4412, |
|
"step": 179200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7057613424853726e-05, |
|
"loss": 4.4388, |
|
"step": 179712 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7049227477343206e-05, |
|
"loss": 4.4437, |
|
"step": 180224 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7040841529832686e-05, |
|
"loss": 4.4328, |
|
"step": 180736 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7032471961125895e-05, |
|
"loss": 4.4518, |
|
"step": 181248 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7024086013615375e-05, |
|
"loss": 4.4335, |
|
"step": 181760 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7015700066104855e-05, |
|
"loss": 4.4405, |
|
"step": 182272 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.7007314118594335e-05, |
|
"loss": 4.4331, |
|
"step": 182784 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6998928171083814e-05, |
|
"loss": 4.4343, |
|
"step": 183296 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6990542223573294e-05, |
|
"loss": 4.4365, |
|
"step": 183808 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6982156276062774e-05, |
|
"loss": 4.4315, |
|
"step": 184320 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6973786707355983e-05, |
|
"loss": 4.4264, |
|
"step": 184832 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6965400759845463e-05, |
|
"loss": 4.4266, |
|
"step": 185344 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.695703119113868e-05, |
|
"loss": 4.4399, |
|
"step": 185856 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.694864524362816e-05, |
|
"loss": 4.4287, |
|
"step": 186368 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.694025929611763e-05, |
|
"loss": 4.4238, |
|
"step": 186880 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.693187334860711e-05, |
|
"loss": 4.4194, |
|
"step": 187392 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.692348740109659e-05, |
|
"loss": 4.4221, |
|
"step": 187904 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.691510145358607e-05, |
|
"loss": 4.4433, |
|
"step": 188416 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.690671550607555e-05, |
|
"loss": 4.4348, |
|
"step": 188928 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.689832955856503e-05, |
|
"loss": 4.4235, |
|
"step": 189440 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.688995998985825e-05, |
|
"loss": 4.4357, |
|
"step": 189952 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.688157404234773e-05, |
|
"loss": 4.4385, |
|
"step": 190464 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.687318809483721e-05, |
|
"loss": 4.4213, |
|
"step": 190976 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.686480214732669e-05, |
|
"loss": 4.4258, |
|
"step": 191488 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.68564325786199e-05, |
|
"loss": 4.4208, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.684804663110938e-05, |
|
"loss": 4.4086, |
|
"step": 192512 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.683966068359886e-05, |
|
"loss": 4.4177, |
|
"step": 193024 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.683127473608834e-05, |
|
"loss": 4.4128, |
|
"step": 193536 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.682288878857782e-05, |
|
"loss": 4.4205, |
|
"step": 194048 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6814519219871026e-05, |
|
"loss": 4.4343, |
|
"step": 194560 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6806133272360506e-05, |
|
"loss": 4.4284, |
|
"step": 195072 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6797747324849986e-05, |
|
"loss": 4.4035, |
|
"step": 195584 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.6789361377339466e-05, |
|
"loss": 4.42, |
|
"step": 196096 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.678097542982895e-05, |
|
"loss": 4.4154, |
|
"step": 196608 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.677258948231843e-05, |
|
"loss": 4.4206, |
|
"step": 197120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.676420353480791e-05, |
|
"loss": 4.4114, |
|
"step": 197632 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.675583396610112e-05, |
|
"loss": 4.404, |
|
"step": 198144 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.67474480185906e-05, |
|
"loss": 4.4118, |
|
"step": 198656 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.673906207108008e-05, |
|
"loss": 4.4095, |
|
"step": 199168 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.673067612356956e-05, |
|
"loss": 4.4063, |
|
"step": 199680 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.672229017605904e-05, |
|
"loss": 4.4138, |
|
"step": 200192 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.671392060735225e-05, |
|
"loss": 4.4065, |
|
"step": 200704 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.670553465984173e-05, |
|
"loss": 4.4101, |
|
"step": 201216 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.669714871233121e-05, |
|
"loss": 4.3996, |
|
"step": 201728 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.668876276482069e-05, |
|
"loss": 4.4007, |
|
"step": 202240 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.668037681731017e-05, |
|
"loss": 4.4011, |
|
"step": 202752 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6672007248603386e-05, |
|
"loss": 4.4081, |
|
"step": 203264 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6663621301092866e-05, |
|
"loss": 4.408, |
|
"step": 203776 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6655235353582346e-05, |
|
"loss": 4.3932, |
|
"step": 204288 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.664684940607182e-05, |
|
"loss": 4.3984, |
|
"step": 204800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.66384634585613e-05, |
|
"loss": 4.3962, |
|
"step": 205312 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.663007751105078e-05, |
|
"loss": 4.3986, |
|
"step": 205824 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6621707942343995e-05, |
|
"loss": 4.3861, |
|
"step": 206336 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.661332199483347e-05, |
|
"loss": 4.3883, |
|
"step": 206848 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.660493604732295e-05, |
|
"loss": 4.4003, |
|
"step": 207360 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.659655009981243e-05, |
|
"loss": 4.3908, |
|
"step": 207872 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.658816415230191e-05, |
|
"loss": 4.3896, |
|
"step": 208384 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6579794583595124e-05, |
|
"loss": 4.3874, |
|
"step": 208896 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6571408636084604e-05, |
|
"loss": 4.3808, |
|
"step": 209408 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6563022688574084e-05, |
|
"loss": 4.3965, |
|
"step": 209920 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6554636741063564e-05, |
|
"loss": 4.3927, |
|
"step": 210432 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6546250793553044e-05, |
|
"loss": 4.3884, |
|
"step": 210944 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6537864846042524e-05, |
|
"loss": 4.3887, |
|
"step": 211456 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6529478898532004e-05, |
|
"loss": 4.4013, |
|
"step": 211968 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6521092951021484e-05, |
|
"loss": 4.3911, |
|
"step": 212480 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.651272338231469e-05, |
|
"loss": 4.3778, |
|
"step": 212992 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.650433743480417e-05, |
|
"loss": 4.3874, |
|
"step": 213504 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.649595148729365e-05, |
|
"loss": 4.3921, |
|
"step": 214016 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.648756553978313e-05, |
|
"loss": 4.382, |
|
"step": 214528 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.647919597107634e-05, |
|
"loss": 4.3833, |
|
"step": 215040 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.647081002356582e-05, |
|
"loss": 4.3802, |
|
"step": 215552 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.646242407605531e-05, |
|
"loss": 4.3885, |
|
"step": 216064 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.645403812854479e-05, |
|
"loss": 4.3743, |
|
"step": 216576 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.644565218103427e-05, |
|
"loss": 4.3876, |
|
"step": 217088 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.643728261232748e-05, |
|
"loss": 4.368, |
|
"step": 217600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.642889666481696e-05, |
|
"loss": 4.3854, |
|
"step": 218112 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.642051071730644e-05, |
|
"loss": 4.369, |
|
"step": 218624 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6412141148599646e-05, |
|
"loss": 4.3929, |
|
"step": 219136 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6403755201089126e-05, |
|
"loss": 4.3848, |
|
"step": 219648 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6395369253578606e-05, |
|
"loss": 4.3755, |
|
"step": 220160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6386983306068086e-05, |
|
"loss": 4.3691, |
|
"step": 220672 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6378597358557566e-05, |
|
"loss": 4.3782, |
|
"step": 221184 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6370211411047046e-05, |
|
"loss": 4.3766, |
|
"step": 221696 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6361825463536526e-05, |
|
"loss": 4.3883, |
|
"step": 222208 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6353439516026006e-05, |
|
"loss": 4.3685, |
|
"step": 222720 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6345053568515486e-05, |
|
"loss": 4.3799, |
|
"step": 223232 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6336667621004966e-05, |
|
"loss": 4.3754, |
|
"step": 223744 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.632829805229818e-05, |
|
"loss": 4.3824, |
|
"step": 224256 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6319912104787655e-05, |
|
"loss": 4.3649, |
|
"step": 224768 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6311526157277135e-05, |
|
"loss": 4.3765, |
|
"step": 225280 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6303140209766615e-05, |
|
"loss": 4.3704, |
|
"step": 225792 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.629477064105983e-05, |
|
"loss": 4.3645, |
|
"step": 226304 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6286384693549304e-05, |
|
"loss": 4.3775, |
|
"step": 226816 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6277998746038784e-05, |
|
"loss": 4.3721, |
|
"step": 227328 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6269612798528264e-05, |
|
"loss": 4.3629, |
|
"step": 227840 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.6261226851017744e-05, |
|
"loss": 4.3735, |
|
"step": 228352 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.625284090350723e-05, |
|
"loss": 4.3639, |
|
"step": 228864 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 4.34278678894043, |
|
"eval_runtime": 293.9914, |
|
"eval_samples_per_second": 1297.967, |
|
"eval_steps_per_second": 40.562, |
|
"step": 228960 |
|
} |
|
], |
|
"logging_steps": 512, |
|
"max_steps": 3052726, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 10, |
|
"total_flos": 9.39388847862785e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|