{ "best_metric": 26.9203, "best_model_checkpoint": "./ko-en_mbartLarge_exp20p_linear_decay/checkpoint-11000", "epoch": 3.4806822137138878, "eval_steps": 1000, "global_step": 15000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 5e-05, "loss": 1.557, "step": 500 }, { "epoch": 0.23, "learning_rate": 4.941300774829773e-05, "loss": 1.4396, "step": 1000 }, { "epoch": 0.23, "eval_bleu": 21.7052, "eval_gen_len": 18.6047, "eval_loss": 1.381515622138977, "eval_runtime": 1113.9878, "eval_samples_per_second": 15.473, "eval_steps_per_second": 0.968, "step": 1000 }, { "epoch": 0.35, "learning_rate": 4.8826015496595445e-05, "loss": 1.3838, "step": 1500 }, { "epoch": 0.46, "learning_rate": 4.823902324489317e-05, "loss": 1.338, "step": 2000 }, { "epoch": 0.46, "eval_bleu": 23.7087, "eval_gen_len": 18.9939, "eval_loss": 1.3043569326400757, "eval_runtime": 1109.0361, "eval_samples_per_second": 15.542, "eval_steps_per_second": 0.972, "step": 2000 }, { "epoch": 0.58, "learning_rate": 4.7652030993190894e-05, "loss": 1.3081, "step": 2500 }, { "epoch": 0.7, "learning_rate": 4.706503874148862e-05, "loss": 1.2938, "step": 3000 }, { "epoch": 0.7, "eval_bleu": 24.6339, "eval_gen_len": 18.8866, "eval_loss": 1.2555760145187378, "eval_runtime": 1131.9789, "eval_samples_per_second": 15.227, "eval_steps_per_second": 0.952, "step": 3000 }, { "epoch": 0.81, "learning_rate": 4.6478046489786336e-05, "loss": 1.2587, "step": 3500 }, { "epoch": 0.93, "learning_rate": 4.589105423808406e-05, "loss": 1.251, "step": 4000 }, { "epoch": 0.93, "eval_bleu": 25.2975, "eval_gen_len": 19.0918, "eval_loss": 1.2229138612747192, "eval_runtime": 1120.4864, "eval_samples_per_second": 15.383, "eval_steps_per_second": 0.962, "step": 4000 }, { "epoch": 1.04, "learning_rate": 4.530406198638178e-05, "loss": 1.135, "step": 4500 }, { "epoch": 1.16, "learning_rate": 4.47170697346795e-05, "loss": 0.9843, "step": 5000 }, { "epoch": 1.16, "eval_bleu": 25.609, "eval_gen_len": 18.7589, "eval_loss": 1.2308591604232788, "eval_runtime": 1118.1785, "eval_samples_per_second": 15.415, "eval_steps_per_second": 0.964, "step": 5000 }, { "epoch": 1.28, "learning_rate": 4.413007748297723e-05, "loss": 0.9916, "step": 5500 }, { "epoch": 1.39, "learning_rate": 4.354308523127495e-05, "loss": 0.9874, "step": 6000 }, { "epoch": 1.39, "eval_bleu": 26.1792, "eval_gen_len": 18.8287, "eval_loss": 1.2100664377212524, "eval_runtime": 1093.6874, "eval_samples_per_second": 15.76, "eval_steps_per_second": 0.986, "step": 6000 }, { "epoch": 1.51, "learning_rate": 4.295609297957267e-05, "loss": 0.9931, "step": 6500 }, { "epoch": 1.62, "learning_rate": 4.2369100727870395e-05, "loss": 0.9838, "step": 7000 }, { "epoch": 1.62, "eval_bleu": 26.024, "eval_gen_len": 18.4025, "eval_loss": 1.2053465843200684, "eval_runtime": 1074.6707, "eval_samples_per_second": 16.039, "eval_steps_per_second": 1.003, "step": 7000 }, { "epoch": 1.74, "learning_rate": 4.178210847616812e-05, "loss": 0.9931, "step": 7500 }, { "epoch": 1.86, "learning_rate": 4.1195116224465844e-05, "loss": 0.9927, "step": 8000 }, { "epoch": 1.86, "eval_bleu": 26.3148, "eval_gen_len": 19.09, "eval_loss": 1.1906589269638062, "eval_runtime": 1112.0878, "eval_samples_per_second": 15.5, "eval_steps_per_second": 0.969, "step": 8000 }, { "epoch": 1.97, "learning_rate": 4.060812397276356e-05, "loss": 0.9921, "step": 8500 }, { "epoch": 2.09, "learning_rate": 4.0021131721061286e-05, "loss": 0.7835, "step": 9000 }, { "epoch": 2.09, "eval_bleu": 26.5613, "eval_gen_len": 18.7196, "eval_loss": 1.2300479412078857, "eval_runtime": 1083.4234, "eval_samples_per_second": 15.91, "eval_steps_per_second": 0.995, "step": 9000 }, { "epoch": 2.2, "learning_rate": 3.9434139469359004e-05, "loss": 0.7268, "step": 9500 }, { "epoch": 2.32, "learning_rate": 3.884714721765673e-05, "loss": 0.7437, "step": 10000 }, { "epoch": 2.32, "eval_bleu": 26.8232, "eval_gen_len": 18.6513, "eval_loss": 1.2358065843582153, "eval_runtime": 1078.4504, "eval_samples_per_second": 15.983, "eval_steps_per_second": 1.0, "step": 10000 }, { "epoch": 2.44, "learning_rate": 3.826015496595445e-05, "loss": 0.7532, "step": 10500 }, { "epoch": 2.55, "learning_rate": 3.767316271425218e-05, "loss": 0.7585, "step": 11000 }, { "epoch": 2.55, "eval_bleu": 26.9203, "eval_gen_len": 18.7513, "eval_loss": 1.2291104793548584, "eval_runtime": 1083.6296, "eval_samples_per_second": 15.907, "eval_steps_per_second": 0.995, "step": 11000 }, { "epoch": 2.67, "learning_rate": 3.7086170462549895e-05, "loss": 0.7627, "step": 11500 }, { "epoch": 2.78, "learning_rate": 3.649917821084762e-05, "loss": 0.7631, "step": 12000 }, { "epoch": 2.78, "eval_bleu": 26.8668, "eval_gen_len": 18.5441, "eval_loss": 1.217042088508606, "eval_runtime": 1079.6719, "eval_samples_per_second": 15.965, "eval_steps_per_second": 0.998, "step": 12000 }, { "epoch": 2.9, "learning_rate": 3.591218595914534e-05, "loss": 0.772, "step": 12500 }, { "epoch": 3.02, "learning_rate": 3.532519370744307e-05, "loss": 0.7428, "step": 13000 }, { "epoch": 3.02, "eval_bleu": 26.2506, "eval_gen_len": 18.6959, "eval_loss": 1.3271690607070923, "eval_runtime": 1081.5187, "eval_samples_per_second": 15.938, "eval_steps_per_second": 0.997, "step": 13000 }, { "epoch": 3.13, "learning_rate": 3.473820145574079e-05, "loss": 0.5342, "step": 13500 }, { "epoch": 3.25, "learning_rate": 3.415120920403851e-05, "loss": 0.5502, "step": 14000 }, { "epoch": 3.25, "eval_bleu": 26.419, "eval_gen_len": 18.6722, "eval_loss": 1.3392120599746704, "eval_runtime": 1080.7472, "eval_samples_per_second": 15.949, "eval_steps_per_second": 0.997, "step": 14000 }, { "epoch": 3.36, "learning_rate": 3.356421695233623e-05, "loss": 0.5541, "step": 14500 }, { "epoch": 3.48, "learning_rate": 3.2977224700633954e-05, "loss": 0.5577, "step": 15000 }, { "epoch": 3.48, "eval_bleu": 26.1621, "eval_gen_len": 18.7036, "eval_loss": 1.320376992225647, "eval_runtime": 1082.8765, "eval_samples_per_second": 15.918, "eval_steps_per_second": 0.995, "step": 15000 }, { "epoch": 3.48, "step": 15000, "total_flos": 1.04022228860928e+18, "train_loss": 0.9628536214192709, "train_runtime": 41969.1276, "train_samples_per_second": 32.857, "train_steps_per_second": 1.027 } ], "logging_steps": 500, "max_steps": 43090, "num_train_epochs": 10, "save_steps": 1000, "total_flos": 1.04022228860928e+18, "trial_name": null, "trial_params": null }