|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 39800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.1048, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.1654638051986694, |
|
"eval_runtime": 35.8, |
|
"eval_samples_per_second": 22.207, |
|
"eval_steps_per_second": 1.397, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.937185929648241e-05, |
|
"loss": 1.5993, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.0762, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0294471979141235, |
|
"eval_runtime": 35.6457, |
|
"eval_samples_per_second": 22.303, |
|
"eval_steps_per_second": 1.403, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.874371859296483e-05, |
|
"loss": 1.1714, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.0863, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9574553370475769, |
|
"eval_runtime": 35.6866, |
|
"eval_samples_per_second": 22.277, |
|
"eval_steps_per_second": 1.401, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.8115577889447235e-05, |
|
"loss": 1.0539, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.0769, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9043193459510803, |
|
"eval_runtime": 35.4817, |
|
"eval_samples_per_second": 22.406, |
|
"eval_steps_per_second": 1.409, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.0792, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8518753051757812, |
|
"eval_runtime": 35.6246, |
|
"eval_samples_per_second": 22.316, |
|
"eval_steps_per_second": 1.404, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.748743718592965e-05, |
|
"loss": 0.9762, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.0563, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8147059679031372, |
|
"eval_runtime": 35.5873, |
|
"eval_samples_per_second": 22.339, |
|
"eval_steps_per_second": 1.405, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 4.685929648241206e-05, |
|
"loss": 0.9072, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.0856, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7833035588264465, |
|
"eval_runtime": 35.6208, |
|
"eval_samples_per_second": 22.318, |
|
"eval_steps_per_second": 1.404, |
|
"step": 2786 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 4.6231155778894475e-05, |
|
"loss": 0.8502, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.091, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7526289820671082, |
|
"eval_runtime": 35.6502, |
|
"eval_samples_per_second": 22.3, |
|
"eval_steps_per_second": 1.403, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 4.5603015075376884e-05, |
|
"loss": 0.8081, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.1344, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7389398813247681, |
|
"eval_runtime": 35.7611, |
|
"eval_samples_per_second": 22.231, |
|
"eval_steps_per_second": 1.398, |
|
"step": 3582 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.1271, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.718666672706604, |
|
"eval_runtime": 35.7669, |
|
"eval_samples_per_second": 22.227, |
|
"eval_steps_per_second": 1.398, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 4.49748743718593e-05, |
|
"loss": 0.7683, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.1299, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7038307189941406, |
|
"eval_runtime": 35.6501, |
|
"eval_samples_per_second": 22.3, |
|
"eval_steps_per_second": 1.403, |
|
"step": 4378 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.434673366834171e-05, |
|
"loss": 0.7318, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.1213, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6900615692138672, |
|
"eval_runtime": 35.6533, |
|
"eval_samples_per_second": 22.298, |
|
"eval_steps_per_second": 1.402, |
|
"step": 4776 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 4.3718592964824124e-05, |
|
"loss": 0.6998, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.1583, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6753336787223816, |
|
"eval_runtime": 35.6402, |
|
"eval_samples_per_second": 22.306, |
|
"eval_steps_per_second": 1.403, |
|
"step": 5174 |
|
}, |
|
{ |
|
"epoch": 13.82, |
|
"learning_rate": 4.309045226130653e-05, |
|
"loss": 0.6683, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.145, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6631056070327759, |
|
"eval_runtime": 35.7643, |
|
"eval_samples_per_second": 22.229, |
|
"eval_steps_per_second": 1.398, |
|
"step": 5572 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.1516, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6530159711837769, |
|
"eval_runtime": 35.7246, |
|
"eval_samples_per_second": 22.254, |
|
"eval_steps_per_second": 1.4, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 15.08, |
|
"learning_rate": 4.246231155778895e-05, |
|
"loss": 0.6406, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.1599, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6454012393951416, |
|
"eval_runtime": 35.7142, |
|
"eval_samples_per_second": 22.26, |
|
"eval_steps_per_second": 1.4, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 4.183417085427136e-05, |
|
"loss": 0.6128, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.1478, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6383265256881714, |
|
"eval_runtime": 35.664, |
|
"eval_samples_per_second": 22.291, |
|
"eval_steps_per_second": 1.402, |
|
"step": 6766 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 4.120603015075377e-05, |
|
"loss": 0.5911, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 0.1571, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6369075179100037, |
|
"eval_runtime": 35.6591, |
|
"eval_samples_per_second": 22.294, |
|
"eval_steps_per_second": 1.402, |
|
"step": 7164 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 4.057788944723618e-05, |
|
"loss": 0.5721, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 0.1668, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6339399814605713, |
|
"eval_runtime": 35.6693, |
|
"eval_samples_per_second": 22.288, |
|
"eval_steps_per_second": 1.402, |
|
"step": 7562 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 0.1611, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6295469403266907, |
|
"eval_runtime": 35.5912, |
|
"eval_samples_per_second": 22.337, |
|
"eval_steps_per_second": 1.405, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 20.1, |
|
"learning_rate": 3.9949748743718597e-05, |
|
"loss": 0.547, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_bleu": 0.1722, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6267198324203491, |
|
"eval_runtime": 35.8366, |
|
"eval_samples_per_second": 22.184, |
|
"eval_steps_per_second": 1.395, |
|
"step": 8358 |
|
}, |
|
{ |
|
"epoch": 21.36, |
|
"learning_rate": 3.9321608040201005e-05, |
|
"loss": 0.529, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_bleu": 0.1656, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6275119781494141, |
|
"eval_runtime": 35.667, |
|
"eval_samples_per_second": 22.289, |
|
"eval_steps_per_second": 1.402, |
|
"step": 8756 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 3.869346733668342e-05, |
|
"loss": 0.5115, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_bleu": 0.1684, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6284548044204712, |
|
"eval_runtime": 35.6512, |
|
"eval_samples_per_second": 22.299, |
|
"eval_steps_per_second": 1.402, |
|
"step": 9154 |
|
}, |
|
{ |
|
"epoch": 23.87, |
|
"learning_rate": 3.806532663316583e-05, |
|
"loss": 0.4934, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_bleu": 0.1696, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6268807053565979, |
|
"eval_runtime": 35.7633, |
|
"eval_samples_per_second": 22.229, |
|
"eval_steps_per_second": 1.398, |
|
"step": 9552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_bleu": 0.182, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6358157992362976, |
|
"eval_runtime": 36.0997, |
|
"eval_samples_per_second": 22.022, |
|
"eval_steps_per_second": 1.385, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 25.13, |
|
"learning_rate": 3.7437185929648245e-05, |
|
"loss": 0.4773, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_bleu": 0.1699, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6337732672691345, |
|
"eval_runtime": 35.9321, |
|
"eval_samples_per_second": 22.125, |
|
"eval_steps_per_second": 1.392, |
|
"step": 10348 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 3.6809045226130654e-05, |
|
"loss": 0.4591, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_bleu": 0.1855, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6358336806297302, |
|
"eval_runtime": 35.7576, |
|
"eval_samples_per_second": 22.233, |
|
"eval_steps_per_second": 1.398, |
|
"step": 10746 |
|
}, |
|
{ |
|
"epoch": 27.64, |
|
"learning_rate": 3.618090452261307e-05, |
|
"loss": 0.4449, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_bleu": 0.1759, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6440271735191345, |
|
"eval_runtime": 35.6217, |
|
"eval_samples_per_second": 22.318, |
|
"eval_steps_per_second": 1.404, |
|
"step": 11144 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"learning_rate": 3.555276381909548e-05, |
|
"loss": 0.4285, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_bleu": 0.1786, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6438360810279846, |
|
"eval_runtime": 35.5837, |
|
"eval_samples_per_second": 22.342, |
|
"eval_steps_per_second": 1.405, |
|
"step": 11542 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_bleu": 0.1874, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.647448718547821, |
|
"eval_runtime": 35.5495, |
|
"eval_samples_per_second": 22.363, |
|
"eval_steps_per_second": 1.406, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 30.15, |
|
"learning_rate": 3.4924623115577894e-05, |
|
"loss": 0.4137, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_bleu": 0.1968, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.651654839515686, |
|
"eval_runtime": 35.6719, |
|
"eval_samples_per_second": 22.286, |
|
"eval_steps_per_second": 1.402, |
|
"step": 12338 |
|
}, |
|
{ |
|
"epoch": 31.41, |
|
"learning_rate": 3.42964824120603e-05, |
|
"loss": 0.4012, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_bleu": 0.1735, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6562197804450989, |
|
"eval_runtime": 35.627, |
|
"eval_samples_per_second": 22.315, |
|
"eval_steps_per_second": 1.403, |
|
"step": 12736 |
|
}, |
|
{ |
|
"epoch": 32.66, |
|
"learning_rate": 3.366834170854272e-05, |
|
"loss": 0.3858, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_bleu": 0.18, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6581218838691711, |
|
"eval_runtime": 35.6835, |
|
"eval_samples_per_second": 22.279, |
|
"eval_steps_per_second": 1.401, |
|
"step": 13134 |
|
}, |
|
{ |
|
"epoch": 33.92, |
|
"learning_rate": 3.3040201005025127e-05, |
|
"loss": 0.3753, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_bleu": 0.1837, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6713840961456299, |
|
"eval_runtime": 36.1204, |
|
"eval_samples_per_second": 22.01, |
|
"eval_steps_per_second": 1.384, |
|
"step": 13532 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_bleu": 0.177, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6749709844589233, |
|
"eval_runtime": 35.6201, |
|
"eval_samples_per_second": 22.319, |
|
"eval_steps_per_second": 1.404, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 35.18, |
|
"learning_rate": 3.241206030150754e-05, |
|
"loss": 0.3613, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_bleu": 0.177, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6772740483283997, |
|
"eval_runtime": 35.673, |
|
"eval_samples_per_second": 22.286, |
|
"eval_steps_per_second": 1.402, |
|
"step": 14328 |
|
}, |
|
{ |
|
"epoch": 36.43, |
|
"learning_rate": 3.178391959798995e-05, |
|
"loss": 0.3493, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_bleu": 0.1859, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.6915194392204285, |
|
"eval_runtime": 36.1411, |
|
"eval_samples_per_second": 21.997, |
|
"eval_steps_per_second": 1.383, |
|
"step": 14726 |
|
}, |
|
{ |
|
"epoch": 37.69, |
|
"learning_rate": 3.1155778894472366e-05, |
|
"loss": 0.339, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_bleu": 0.1756, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7031569480895996, |
|
"eval_runtime": 36.1897, |
|
"eval_samples_per_second": 21.968, |
|
"eval_steps_per_second": 1.382, |
|
"step": 15124 |
|
}, |
|
{ |
|
"epoch": 38.94, |
|
"learning_rate": 3.0527638190954775e-05, |
|
"loss": 0.3263, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_bleu": 0.1844, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7003222703933716, |
|
"eval_runtime": 36.2516, |
|
"eval_samples_per_second": 21.93, |
|
"eval_steps_per_second": 1.379, |
|
"step": 15522 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_bleu": 0.1795, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7169303297996521, |
|
"eval_runtime": 36.3049, |
|
"eval_samples_per_second": 21.898, |
|
"eval_steps_per_second": 1.377, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 40.2, |
|
"learning_rate": 2.989949748743719e-05, |
|
"loss": 0.3153, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_bleu": 0.1903, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7180814743041992, |
|
"eval_runtime": 36.2151, |
|
"eval_samples_per_second": 21.952, |
|
"eval_steps_per_second": 1.381, |
|
"step": 16318 |
|
}, |
|
{ |
|
"epoch": 41.46, |
|
"learning_rate": 2.9271356783919603e-05, |
|
"loss": 0.3047, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_bleu": 0.1864, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7283141613006592, |
|
"eval_runtime": 36.2969, |
|
"eval_samples_per_second": 21.903, |
|
"eval_steps_per_second": 1.378, |
|
"step": 16716 |
|
}, |
|
{ |
|
"epoch": 42.71, |
|
"learning_rate": 2.8643216080402015e-05, |
|
"loss": 0.2933, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_bleu": 0.188, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7462304830551147, |
|
"eval_runtime": 35.9176, |
|
"eval_samples_per_second": 22.134, |
|
"eval_steps_per_second": 1.392, |
|
"step": 17114 |
|
}, |
|
{ |
|
"epoch": 43.97, |
|
"learning_rate": 2.8015075376884427e-05, |
|
"loss": 0.2888, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_bleu": 0.1841, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7420201301574707, |
|
"eval_runtime": 36.2045, |
|
"eval_samples_per_second": 21.959, |
|
"eval_steps_per_second": 1.381, |
|
"step": 17512 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_bleu": 0.1748, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7574421167373657, |
|
"eval_runtime": 36.2479, |
|
"eval_samples_per_second": 21.932, |
|
"eval_steps_per_second": 1.379, |
|
"step": 17910 |
|
}, |
|
{ |
|
"epoch": 45.23, |
|
"learning_rate": 2.738693467336684e-05, |
|
"loss": 0.2762, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_bleu": 0.1747, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7617235779762268, |
|
"eval_runtime": 36.159, |
|
"eval_samples_per_second": 21.986, |
|
"eval_steps_per_second": 1.383, |
|
"step": 18308 |
|
}, |
|
{ |
|
"epoch": 46.48, |
|
"learning_rate": 2.6758793969849248e-05, |
|
"loss": 0.2671, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_bleu": 0.1743, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7677585482597351, |
|
"eval_runtime": 36.1812, |
|
"eval_samples_per_second": 21.973, |
|
"eval_steps_per_second": 1.382, |
|
"step": 18706 |
|
}, |
|
{ |
|
"epoch": 47.74, |
|
"learning_rate": 2.613065326633166e-05, |
|
"loss": 0.2585, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_bleu": 0.1902, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7697047591209412, |
|
"eval_runtime": 36.1036, |
|
"eval_samples_per_second": 22.02, |
|
"eval_steps_per_second": 1.385, |
|
"step": 19104 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"learning_rate": 2.5502512562814072e-05, |
|
"loss": 0.252, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_bleu": 0.208, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7865097522735596, |
|
"eval_runtime": 36.1641, |
|
"eval_samples_per_second": 21.983, |
|
"eval_steps_per_second": 1.383, |
|
"step": 19502 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_bleu": 0.1777, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8058604001998901, |
|
"eval_runtime": 36.0936, |
|
"eval_samples_per_second": 22.026, |
|
"eval_steps_per_second": 1.385, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 50.25, |
|
"learning_rate": 2.4874371859296484e-05, |
|
"loss": 0.2411, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_bleu": 0.212, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.7906444072723389, |
|
"eval_runtime": 36.7349, |
|
"eval_samples_per_second": 21.642, |
|
"eval_steps_per_second": 1.361, |
|
"step": 20298 |
|
}, |
|
{ |
|
"epoch": 51.51, |
|
"learning_rate": 2.4246231155778896e-05, |
|
"loss": 0.2358, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_bleu": 0.1778, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8143441081047058, |
|
"eval_runtime": 36.1702, |
|
"eval_samples_per_second": 21.979, |
|
"eval_steps_per_second": 1.382, |
|
"step": 20696 |
|
}, |
|
{ |
|
"epoch": 52.76, |
|
"learning_rate": 2.361809045226131e-05, |
|
"loss": 0.2273, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_bleu": 0.218, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8184289932250977, |
|
"eval_runtime": 36.341, |
|
"eval_samples_per_second": 21.876, |
|
"eval_steps_per_second": 1.376, |
|
"step": 21094 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_bleu": 0.2243, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8261227607727051, |
|
"eval_runtime": 36.1608, |
|
"eval_samples_per_second": 21.985, |
|
"eval_steps_per_second": 1.383, |
|
"step": 21492 |
|
}, |
|
{ |
|
"epoch": 54.02, |
|
"learning_rate": 2.298994974874372e-05, |
|
"loss": 0.223, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_bleu": 0.2196, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8429352641105652, |
|
"eval_runtime": 35.7014, |
|
"eval_samples_per_second": 22.268, |
|
"eval_steps_per_second": 1.401, |
|
"step": 21890 |
|
}, |
|
{ |
|
"epoch": 55.28, |
|
"learning_rate": 2.2361809045226133e-05, |
|
"loss": 0.2131, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_bleu": 0.2402, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.847459077835083, |
|
"eval_runtime": 35.7383, |
|
"eval_samples_per_second": 22.245, |
|
"eval_steps_per_second": 1.399, |
|
"step": 22288 |
|
}, |
|
{ |
|
"epoch": 56.53, |
|
"learning_rate": 2.1733668341708545e-05, |
|
"loss": 0.2083, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_bleu": 0.2163, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8617640733718872, |
|
"eval_runtime": 35.56, |
|
"eval_samples_per_second": 22.357, |
|
"eval_steps_per_second": 1.406, |
|
"step": 22686 |
|
}, |
|
{ |
|
"epoch": 57.79, |
|
"learning_rate": 2.1105527638190957e-05, |
|
"loss": 0.202, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_bleu": 0.2164, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8572230339050293, |
|
"eval_runtime": 35.5968, |
|
"eval_samples_per_second": 22.333, |
|
"eval_steps_per_second": 1.405, |
|
"step": 23084 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_bleu": 0.217, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.873598575592041, |
|
"eval_runtime": 35.5807, |
|
"eval_samples_per_second": 22.344, |
|
"eval_steps_per_second": 1.405, |
|
"step": 23482 |
|
}, |
|
{ |
|
"epoch": 59.05, |
|
"learning_rate": 2.047738693467337e-05, |
|
"loss": 0.1968, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_bleu": 0.2166, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8894439339637756, |
|
"eval_runtime": 35.6815, |
|
"eval_samples_per_second": 22.28, |
|
"eval_steps_per_second": 1.401, |
|
"step": 23880 |
|
}, |
|
{ |
|
"epoch": 60.3, |
|
"learning_rate": 1.984924623115578e-05, |
|
"loss": 0.1904, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_bleu": 0.2241, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.8927697539329529, |
|
"eval_runtime": 35.5709, |
|
"eval_samples_per_second": 22.35, |
|
"eval_steps_per_second": 1.406, |
|
"step": 24278 |
|
}, |
|
{ |
|
"epoch": 61.56, |
|
"learning_rate": 1.9221105527638193e-05, |
|
"loss": 0.1847, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_bleu": 0.2219, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9057827591896057, |
|
"eval_runtime": 35.9775, |
|
"eval_samples_per_second": 22.097, |
|
"eval_steps_per_second": 1.39, |
|
"step": 24676 |
|
}, |
|
{ |
|
"epoch": 62.81, |
|
"learning_rate": 1.8592964824120602e-05, |
|
"loss": 0.1803, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_bleu": 0.2336, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9056702852249146, |
|
"eval_runtime": 35.7167, |
|
"eval_samples_per_second": 22.259, |
|
"eval_steps_per_second": 1.4, |
|
"step": 25074 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_bleu": 0.2156, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9173711538314819, |
|
"eval_runtime": 35.7315, |
|
"eval_samples_per_second": 22.249, |
|
"eval_steps_per_second": 1.399, |
|
"step": 25472 |
|
}, |
|
{ |
|
"epoch": 64.07, |
|
"learning_rate": 1.7964824120603014e-05, |
|
"loss": 0.1758, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_bleu": 0.1951, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.922991156578064, |
|
"eval_runtime": 35.7331, |
|
"eval_samples_per_second": 22.248, |
|
"eval_steps_per_second": 1.399, |
|
"step": 25870 |
|
}, |
|
{ |
|
"epoch": 65.33, |
|
"learning_rate": 1.7336683417085427e-05, |
|
"loss": 0.1701, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_bleu": 0.2249, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9349916577339172, |
|
"eval_runtime": 35.6317, |
|
"eval_samples_per_second": 22.312, |
|
"eval_steps_per_second": 1.403, |
|
"step": 26268 |
|
}, |
|
{ |
|
"epoch": 66.58, |
|
"learning_rate": 1.670854271356784e-05, |
|
"loss": 0.1673, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_bleu": 0.2224, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9416642189025879, |
|
"eval_runtime": 35.7511, |
|
"eval_samples_per_second": 22.237, |
|
"eval_steps_per_second": 1.399, |
|
"step": 26666 |
|
}, |
|
{ |
|
"epoch": 67.84, |
|
"learning_rate": 1.608040201005025e-05, |
|
"loss": 0.1614, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_bleu": 0.2161, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9508859515190125, |
|
"eval_runtime": 35.6367, |
|
"eval_samples_per_second": 22.308, |
|
"eval_steps_per_second": 1.403, |
|
"step": 27064 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_bleu": 0.2183, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9652993083000183, |
|
"eval_runtime": 35.6263, |
|
"eval_samples_per_second": 22.315, |
|
"eval_steps_per_second": 1.403, |
|
"step": 27462 |
|
}, |
|
{ |
|
"epoch": 69.1, |
|
"learning_rate": 1.5452261306532663e-05, |
|
"loss": 0.1578, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_bleu": 0.2113, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9633088111877441, |
|
"eval_runtime": 35.7594, |
|
"eval_samples_per_second": 22.232, |
|
"eval_steps_per_second": 1.398, |
|
"step": 27860 |
|
}, |
|
{ |
|
"epoch": 70.35, |
|
"learning_rate": 1.4824120603015077e-05, |
|
"loss": 0.1536, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_bleu": 0.2177, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9783052802085876, |
|
"eval_runtime": 35.5411, |
|
"eval_samples_per_second": 22.368, |
|
"eval_steps_per_second": 1.407, |
|
"step": 28258 |
|
}, |
|
{ |
|
"epoch": 71.61, |
|
"learning_rate": 1.4195979899497489e-05, |
|
"loss": 0.1513, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_bleu": 0.2179, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9754663109779358, |
|
"eval_runtime": 35.6862, |
|
"eval_samples_per_second": 22.278, |
|
"eval_steps_per_second": 1.401, |
|
"step": 28656 |
|
}, |
|
{ |
|
"epoch": 72.86, |
|
"learning_rate": 1.3567839195979901e-05, |
|
"loss": 0.147, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_bleu": 0.2273, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9910703897476196, |
|
"eval_runtime": 35.9105, |
|
"eval_samples_per_second": 22.138, |
|
"eval_steps_per_second": 1.392, |
|
"step": 29054 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_bleu": 0.2157, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9854773283004761, |
|
"eval_runtime": 35.6352, |
|
"eval_samples_per_second": 22.309, |
|
"eval_steps_per_second": 1.403, |
|
"step": 29452 |
|
}, |
|
{ |
|
"epoch": 74.12, |
|
"learning_rate": 1.2939698492462313e-05, |
|
"loss": 0.1443, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_bleu": 0.2169, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 0.9998270273208618, |
|
"eval_runtime": 35.64, |
|
"eval_samples_per_second": 22.306, |
|
"eval_steps_per_second": 1.403, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 75.38, |
|
"learning_rate": 1.2311557788944725e-05, |
|
"loss": 0.1401, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_bleu": 0.2124, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0127789974212646, |
|
"eval_runtime": 35.7323, |
|
"eval_samples_per_second": 22.249, |
|
"eval_steps_per_second": 1.399, |
|
"step": 30248 |
|
}, |
|
{ |
|
"epoch": 76.63, |
|
"learning_rate": 1.1683417085427137e-05, |
|
"loss": 0.1377, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_bleu": 0.2159, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0114222764968872, |
|
"eval_runtime": 35.7601, |
|
"eval_samples_per_second": 22.231, |
|
"eval_steps_per_second": 1.398, |
|
"step": 30646 |
|
}, |
|
{ |
|
"epoch": 77.89, |
|
"learning_rate": 1.105527638190955e-05, |
|
"loss": 0.1342, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_bleu": 0.2152, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0248533487319946, |
|
"eval_runtime": 35.6808, |
|
"eval_samples_per_second": 22.281, |
|
"eval_steps_per_second": 1.401, |
|
"step": 31044 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_bleu": 0.2233, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0258084535598755, |
|
"eval_runtime": 35.6984, |
|
"eval_samples_per_second": 22.27, |
|
"eval_steps_per_second": 1.401, |
|
"step": 31442 |
|
}, |
|
{ |
|
"epoch": 79.15, |
|
"learning_rate": 1.042713567839196e-05, |
|
"loss": 0.1336, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_bleu": 0.2194, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0308655500411987, |
|
"eval_runtime": 35.6369, |
|
"eval_samples_per_second": 22.308, |
|
"eval_steps_per_second": 1.403, |
|
"step": 31840 |
|
}, |
|
{ |
|
"epoch": 80.4, |
|
"learning_rate": 9.798994974874372e-06, |
|
"loss": 0.1307, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_bleu": 0.2122, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.032060146331787, |
|
"eval_runtime": 35.5991, |
|
"eval_samples_per_second": 22.332, |
|
"eval_steps_per_second": 1.405, |
|
"step": 32238 |
|
}, |
|
{ |
|
"epoch": 81.66, |
|
"learning_rate": 9.170854271356784e-06, |
|
"loss": 0.1277, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_bleu": 0.2191, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.034020185470581, |
|
"eval_runtime": 35.662, |
|
"eval_samples_per_second": 22.293, |
|
"eval_steps_per_second": 1.402, |
|
"step": 32636 |
|
}, |
|
{ |
|
"epoch": 82.91, |
|
"learning_rate": 8.542713567839196e-06, |
|
"loss": 0.1262, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_bleu": 0.2123, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0493375062942505, |
|
"eval_runtime": 35.6012, |
|
"eval_samples_per_second": 22.331, |
|
"eval_steps_per_second": 1.404, |
|
"step": 33034 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_bleu": 0.2273, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0544501543045044, |
|
"eval_runtime": 35.7115, |
|
"eval_samples_per_second": 22.262, |
|
"eval_steps_per_second": 1.4, |
|
"step": 33432 |
|
}, |
|
{ |
|
"epoch": 84.17, |
|
"learning_rate": 7.914572864321608e-06, |
|
"loss": 0.1233, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_bleu": 0.2184, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0550196170806885, |
|
"eval_runtime": 35.6365, |
|
"eval_samples_per_second": 22.309, |
|
"eval_steps_per_second": 1.403, |
|
"step": 33830 |
|
}, |
|
{ |
|
"epoch": 85.43, |
|
"learning_rate": 7.28643216080402e-06, |
|
"loss": 0.1233, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_bleu": 0.2241, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0545953512191772, |
|
"eval_runtime": 35.7457, |
|
"eval_samples_per_second": 22.24, |
|
"eval_steps_per_second": 1.399, |
|
"step": 34228 |
|
}, |
|
{ |
|
"epoch": 86.68, |
|
"learning_rate": 6.658291457286432e-06, |
|
"loss": 0.1205, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_bleu": 0.2246, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0695993900299072, |
|
"eval_runtime": 35.6623, |
|
"eval_samples_per_second": 22.292, |
|
"eval_steps_per_second": 1.402, |
|
"step": 34626 |
|
}, |
|
{ |
|
"epoch": 87.94, |
|
"learning_rate": 6.030150753768844e-06, |
|
"loss": 0.1189, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_bleu": 0.2237, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0730416774749756, |
|
"eval_runtime": 35.8819, |
|
"eval_samples_per_second": 22.156, |
|
"eval_steps_per_second": 1.393, |
|
"step": 35024 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_bleu": 0.2308, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.068780541419983, |
|
"eval_runtime": 35.7114, |
|
"eval_samples_per_second": 22.262, |
|
"eval_steps_per_second": 1.4, |
|
"step": 35422 |
|
}, |
|
{ |
|
"epoch": 89.2, |
|
"learning_rate": 5.402010050251256e-06, |
|
"loss": 0.1173, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_bleu": 0.2267, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0783226490020752, |
|
"eval_runtime": 35.7523, |
|
"eval_samples_per_second": 22.236, |
|
"eval_steps_per_second": 1.399, |
|
"step": 35820 |
|
}, |
|
{ |
|
"epoch": 90.45, |
|
"learning_rate": 4.773869346733668e-06, |
|
"loss": 0.1154, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_bleu": 0.2262, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0766741037368774, |
|
"eval_runtime": 35.7535, |
|
"eval_samples_per_second": 22.236, |
|
"eval_steps_per_second": 1.398, |
|
"step": 36218 |
|
}, |
|
{ |
|
"epoch": 91.71, |
|
"learning_rate": 4.1457286432160804e-06, |
|
"loss": 0.115, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_bleu": 0.2214, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0834720134735107, |
|
"eval_runtime": 35.7567, |
|
"eval_samples_per_second": 22.234, |
|
"eval_steps_per_second": 1.398, |
|
"step": 36616 |
|
}, |
|
{ |
|
"epoch": 92.96, |
|
"learning_rate": 3.5175879396984926e-06, |
|
"loss": 0.1136, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_bleu": 0.2284, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0788373947143555, |
|
"eval_runtime": 35.8397, |
|
"eval_samples_per_second": 22.182, |
|
"eval_steps_per_second": 1.395, |
|
"step": 37014 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_bleu": 0.2269, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0876238346099854, |
|
"eval_runtime": 35.7552, |
|
"eval_samples_per_second": 22.235, |
|
"eval_steps_per_second": 1.398, |
|
"step": 37412 |
|
}, |
|
{ |
|
"epoch": 94.22, |
|
"learning_rate": 2.8894472361809047e-06, |
|
"loss": 0.1126, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_bleu": 0.2212, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0935641527175903, |
|
"eval_runtime": 35.7207, |
|
"eval_samples_per_second": 22.256, |
|
"eval_steps_per_second": 1.4, |
|
"step": 37810 |
|
}, |
|
{ |
|
"epoch": 95.48, |
|
"learning_rate": 2.261306532663317e-06, |
|
"loss": 0.1118, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_bleu": 0.2207, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0917831659317017, |
|
"eval_runtime": 35.5884, |
|
"eval_samples_per_second": 22.339, |
|
"eval_steps_per_second": 1.405, |
|
"step": 38208 |
|
}, |
|
{ |
|
"epoch": 96.73, |
|
"learning_rate": 1.6331658291457288e-06, |
|
"loss": 0.111, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_bleu": 0.2217, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0943822860717773, |
|
"eval_runtime": 35.7423, |
|
"eval_samples_per_second": 22.243, |
|
"eval_steps_per_second": 1.399, |
|
"step": 38606 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"learning_rate": 1.0050251256281407e-06, |
|
"loss": 0.1106, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_bleu": 0.2203, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0962368249893188, |
|
"eval_runtime": 35.6215, |
|
"eval_samples_per_second": 22.318, |
|
"eval_steps_per_second": 1.404, |
|
"step": 39004 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_bleu": 0.2182, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.0994266271591187, |
|
"eval_runtime": 35.6909, |
|
"eval_samples_per_second": 22.275, |
|
"eval_steps_per_second": 1.401, |
|
"step": 39402 |
|
}, |
|
{ |
|
"epoch": 99.25, |
|
"learning_rate": 3.7688442211055275e-07, |
|
"loss": 0.1088, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_bleu": 0.2193, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 1.099919319152832, |
|
"eval_runtime": 35.8327, |
|
"eval_samples_per_second": 22.186, |
|
"eval_steps_per_second": 1.395, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 39800, |
|
"total_flos": 2.262748628238336e+17, |
|
"train_loss": 0.35148891755683936, |
|
"train_runtime": 21066.5572, |
|
"train_samples_per_second": 30.204, |
|
"train_steps_per_second": 1.889 |
|
} |
|
], |
|
"max_steps": 39800, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.262748628238336e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|