{ "best_metric": 47.3367, "best_model_checkpoint": "/scratch/st-amuham01-1/fenimi/transformers/examples/pytorch/translation/similar-languages-task/model_out_es-pt-tok/checkpoint-1704014", "epoch": 35.0, "global_step": 3371314, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9500000000000004e-05, "loss": 0.8042, "step": 131078 }, { "epoch": 1.0, "eval_bleu": 46.137, "eval_gen_len": 73.7186, "eval_loss": 0.7053773999214172, "eval_runtime": 93.6018, "eval_samples_per_second": 10.363, "eval_steps_per_second": 0.331, "step": 131078 }, { "epoch": 2.0, "learning_rate": 4.9e-05, "loss": 0.6488, "step": 262156 }, { "epoch": 2.0, "eval_bleu": 46.1742, "eval_gen_len": 73.8876, "eval_loss": 0.6584964990615845, "eval_runtime": 91.805, "eval_samples_per_second": 10.566, "eval_steps_per_second": 0.338, "step": 262156 }, { "epoch": 3.0, "learning_rate": 4.85e-05, "loss": 0.6069, "step": 393234 }, { "epoch": 3.0, "eval_bleu": 46.0956, "eval_gen_len": 73.666, "eval_loss": 0.6464588046073914, "eval_runtime": 87.4692, "eval_samples_per_second": 11.09, "eval_steps_per_second": 0.354, "step": 393234 }, { "epoch": 4.0, "learning_rate": 4.8e-05, "loss": 0.5821, "step": 524312 }, { "epoch": 4.0, "eval_bleu": 46.326, "eval_gen_len": 73.932, "eval_loss": 0.6305918097496033, "eval_runtime": 90.9772, "eval_samples_per_second": 10.662, "eval_steps_per_second": 0.341, "step": 524312 }, { "epoch": 5.0, "learning_rate": 4.75e-05, "loss": 0.5648, "step": 655390 }, { "epoch": 5.0, "eval_bleu": 46.3267, "eval_gen_len": 73.6505, "eval_loss": 0.617319643497467, "eval_runtime": 86.5507, "eval_samples_per_second": 11.207, "eval_steps_per_second": 0.358, "step": 655390 }, { "epoch": 6.0, "learning_rate": 4.7e-05, "loss": 0.5515, "step": 786468 }, { "epoch": 6.0, "eval_bleu": 46.6537, "eval_gen_len": 73.8722, "eval_loss": 0.6139377355575562, "eval_runtime": 88.9058, "eval_samples_per_second": 10.91, "eval_steps_per_second": 0.349, "step": 786468 }, { "epoch": 7.0, "learning_rate": 4.6500000000000005e-05, "loss": 0.541, "step": 917546 }, { "epoch": 7.0, "eval_bleu": 46.5628, "eval_gen_len": 73.8412, "eval_loss": 0.6046749353408813, "eval_runtime": 87.2322, "eval_samples_per_second": 11.12, "eval_steps_per_second": 0.355, "step": 917546 }, { "epoch": 8.0, "learning_rate": 4.600000000000001e-05, "loss": 0.5323, "step": 1048624 }, { "epoch": 8.0, "eval_bleu": 46.5538, "eval_gen_len": 73.8216, "eval_loss": 0.5961582064628601, "eval_runtime": 88.6262, "eval_samples_per_second": 10.945, "eval_steps_per_second": 0.35, "step": 1048624 }, { "epoch": 9.0, "learning_rate": 4.55e-05, "loss": 0.525, "step": 1179702 }, { "epoch": 9.0, "eval_bleu": 46.543, "eval_gen_len": 74.0711, "eval_loss": 0.5961570739746094, "eval_runtime": 90.5754, "eval_samples_per_second": 10.709, "eval_steps_per_second": 0.342, "step": 1179702 }, { "epoch": 10.0, "learning_rate": 4.5e-05, "loss": 0.5184, "step": 1310780 }, { "epoch": 10.0, "eval_bleu": 46.772, "eval_gen_len": 73.8258, "eval_loss": 0.5992215275764465, "eval_runtime": 86.6687, "eval_samples_per_second": 11.192, "eval_steps_per_second": 0.358, "step": 1310780 }, { "epoch": 11.0, "learning_rate": 4.4500000000000004e-05, "loss": 0.5126, "step": 1441858 }, { "epoch": 11.0, "eval_bleu": 46.875, "eval_gen_len": 74.0124, "eval_loss": 0.5917361378669739, "eval_runtime": 91.2818, "eval_samples_per_second": 10.626, "eval_steps_per_second": 0.34, "step": 1441858 }, { "epoch": 12.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.5075, "step": 1572936 }, { "epoch": 12.0, "eval_bleu": 46.5982, "eval_gen_len": 74.0639, "eval_loss": 0.59002286195755, "eval_runtime": 95.1271, "eval_samples_per_second": 10.197, "eval_steps_per_second": 0.326, "step": 1572936 }, { "epoch": 13.0, "learning_rate": 4.35e-05, "loss": 0.503, "step": 1704014 }, { "epoch": 13.0, "eval_bleu": 47.3367, "eval_gen_len": 73.9134, "eval_loss": 0.591410756111145, "eval_runtime": 87.7758, "eval_samples_per_second": 11.051, "eval_steps_per_second": 0.353, "step": 1704014 }, { "epoch": 14.0, "learning_rate": 4.3e-05, "loss": 0.4987, "step": 1835092 }, { "epoch": 14.0, "eval_bleu": 46.5688, "eval_gen_len": 73.9072, "eval_loss": 0.5959733128547668, "eval_runtime": 95.2654, "eval_samples_per_second": 10.182, "eval_steps_per_second": 0.325, "step": 1835092 }, { "epoch": 15.0, "learning_rate": 4.25e-05, "loss": 0.4947, "step": 1966170 }, { "epoch": 15.0, "eval_bleu": 46.8246, "eval_gen_len": 74.066, "eval_loss": 0.5951239466667175, "eval_runtime": 89.8542, "eval_samples_per_second": 10.795, "eval_steps_per_second": 0.345, "step": 1966170 }, { "epoch": 16.0, "learning_rate": 4.2e-05, "loss": 0.4912, "step": 2097248 }, { "epoch": 16.0, "eval_bleu": 46.6078, "eval_gen_len": 74.1052, "eval_loss": 0.5862768888473511, "eval_runtime": 90.8564, "eval_samples_per_second": 10.676, "eval_steps_per_second": 0.341, "step": 2097248 }, { "epoch": 17.0, "learning_rate": 4.15e-05, "loss": 0.4877, "step": 2228326 }, { "epoch": 17.0, "eval_bleu": 46.4987, "eval_gen_len": 73.8402, "eval_loss": 0.5898135304450989, "eval_runtime": 93.1614, "eval_samples_per_second": 10.412, "eval_steps_per_second": 0.333, "step": 2228326 }, { "epoch": 24.0, "learning_rate": 3.780262784911128e-05, "loss": 0.4915, "step": 2323575 }, { "epoch": 24.0, "eval_bleu": 46.9695, "eval_gen_len": 73.9588, "eval_loss": 0.5856001973152161, "eval_runtime": 77.7022, "eval_samples_per_second": 12.484, "eval_steps_per_second": 0.399, "step": 2323575 }, { "epoch": 25.0, "learning_rate": 3.730262784911128e-05, "loss": 0.4858, "step": 2418824 }, { "epoch": 25.0, "eval_bleu": 46.9834, "eval_gen_len": 73.7845, "eval_loss": 0.5880559682846069, "eval_runtime": 76.2261, "eval_samples_per_second": 12.725, "eval_steps_per_second": 0.407, "step": 2418824 }, { "epoch": 26.0, "learning_rate": 3.6802627849111274e-05, "loss": 0.4819, "step": 2514073 }, { "epoch": 26.0, "eval_bleu": 47.2378, "eval_gen_len": 73.7866, "eval_loss": 0.5938757061958313, "eval_runtime": 77.0503, "eval_samples_per_second": 12.589, "eval_steps_per_second": 0.402, "step": 2514073 }, { "epoch": 27.0, "learning_rate": 3.6302627849111276e-05, "loss": 0.4785, "step": 2609322 }, { "epoch": 27.0, "eval_bleu": 47.1579, "eval_gen_len": 74.1351, "eval_loss": 0.593238115310669, "eval_runtime": 79.7831, "eval_samples_per_second": 12.158, "eval_steps_per_second": 0.389, "step": 2609322 }, { "epoch": 28.0, "learning_rate": 3.5802627849111284e-05, "loss": 0.4757, "step": 2704571 }, { "epoch": 28.0, "eval_bleu": 47.0974, "eval_gen_len": 73.8289, "eval_loss": 0.5946824550628662, "eval_runtime": 77.1019, "eval_samples_per_second": 12.581, "eval_steps_per_second": 0.402, "step": 2704571 }, { "epoch": 29.0, "learning_rate": 3.530262784911128e-05, "loss": 0.4729, "step": 2799820 }, { "epoch": 29.0, "eval_bleu": 47.2814, "eval_gen_len": 74.1485, "eval_loss": 0.5900229811668396, "eval_runtime": 76.5406, "eval_samples_per_second": 12.673, "eval_steps_per_second": 0.405, "step": 2799820 }, { "epoch": 30.0, "learning_rate": 3.480262784911128e-05, "loss": 0.4702, "step": 2895069 }, { "epoch": 30.0, "eval_bleu": 47.1382, "eval_gen_len": 73.8495, "eval_loss": 0.5961056351661682, "eval_runtime": 77.883, "eval_samples_per_second": 12.455, "eval_steps_per_second": 0.398, "step": 2895069 }, { "epoch": 31.0, "learning_rate": 3.430262784911128e-05, "loss": 0.4678, "step": 2990318 }, { "epoch": 31.0, "eval_bleu": 46.9917, "eval_gen_len": 74.0526, "eval_loss": 0.5983180999755859, "eval_runtime": 77.7736, "eval_samples_per_second": 12.472, "eval_steps_per_second": 0.399, "step": 2990318 }, { "epoch": 32.0, "learning_rate": 3.3802627849111276e-05, "loss": 0.4654, "step": 3085567 }, { "epoch": 32.0, "eval_bleu": 47.1391, "eval_gen_len": 73.8381, "eval_loss": 0.5979074835777283, "eval_runtime": 79.7831, "eval_samples_per_second": 12.158, "eval_steps_per_second": 0.389, "step": 3085567 }, { "epoch": 33.0, "learning_rate": 3.330262784911128e-05, "loss": 0.4632, "step": 3180816 }, { "epoch": 33.0, "eval_bleu": 46.5598, "eval_gen_len": 73.8753, "eval_loss": 0.5982452630996704, "eval_runtime": 78.0544, "eval_samples_per_second": 12.427, "eval_steps_per_second": 0.397, "step": 3180816 }, { "epoch": 34.0, "learning_rate": 3.280262784911128e-05, "loss": 0.461, "step": 3276065 }, { "epoch": 34.0, "eval_bleu": 46.503, "eval_gen_len": 73.9577, "eval_loss": 0.5941172242164612, "eval_runtime": 77.6074, "eval_samples_per_second": 12.499, "eval_steps_per_second": 0.399, "step": 3276065 }, { "epoch": 35.0, "learning_rate": 3.230262784911128e-05, "loss": 0.459, "step": 3371314 }, { "epoch": 35.0, "eval_bleu": 47.3154, "eval_gen_len": 74.0649, "eval_loss": 0.598816990852356, "eval_runtime": 79.9962, "eval_samples_per_second": 12.126, "eval_steps_per_second": 0.388, "step": 3371314 } ], "max_steps": 9524900, "num_train_epochs": 100, "total_flos": 1.0411885225994306e+19, "trial_name": null, "trial_params": null }