{ "best_metric": 0.8995155096054077, "best_model_checkpoint": "./mbartLarge_koja_37p/checkpoint-33000", "epoch": 4.844107803417298, "eval_steps": 5500, "global_step": 55000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.6666666666666667e-05, "loss": 3.5939, "step": 500 }, { "epoch": 0.09, "learning_rate": 3.3333333333333335e-05, "loss": 3.0254, "step": 1000 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 1.9133, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.9936851145520225e-05, "loss": 1.672, "step": 2000 }, { "epoch": 0.22, "learning_rate": 4.987370229104044e-05, "loss": 1.56, "step": 2500 }, { "epoch": 0.26, "learning_rate": 4.9810553436560664e-05, "loss": 1.4874, "step": 3000 }, { "epoch": 0.31, "learning_rate": 4.974740458208088e-05, "loss": 1.4345, "step": 3500 }, { "epoch": 0.35, "learning_rate": 4.96842557276011e-05, "loss": 1.3676, "step": 4000 }, { "epoch": 0.4, "learning_rate": 4.9621106873121326e-05, "loss": 1.3183, "step": 4500 }, { "epoch": 0.44, "learning_rate": 4.955795801864154e-05, "loss": 1.2987, "step": 5000 }, { "epoch": 0.48, "learning_rate": 4.9494809164161765e-05, "loss": 1.2712, "step": 5500 }, { "epoch": 0.48, "eval_bleu": 3.0858, "eval_gen_len": 18.1085, "eval_loss": 1.2203737497329712, "eval_runtime": 1528.8304, "eval_samples_per_second": 14.853, "eval_steps_per_second": 0.929, "step": 5500 }, { "epoch": 0.53, "learning_rate": 4.943166030968199e-05, "loss": 1.2405, "step": 6000 }, { "epoch": 0.57, "learning_rate": 4.9368511455202204e-05, "loss": 1.2262, "step": 6500 }, { "epoch": 0.62, "learning_rate": 4.930536260072243e-05, "loss": 1.1959, "step": 7000 }, { "epoch": 0.66, "learning_rate": 4.924221374624264e-05, "loss": 1.182, "step": 7500 }, { "epoch": 0.7, "learning_rate": 4.9179064891762866e-05, "loss": 1.1683, "step": 8000 }, { "epoch": 0.75, "learning_rate": 4.911591603728309e-05, "loss": 1.1489, "step": 8500 }, { "epoch": 0.79, "learning_rate": 4.9052767182803305e-05, "loss": 1.1365, "step": 9000 }, { "epoch": 0.84, "learning_rate": 4.898961832832353e-05, "loss": 1.1313, "step": 9500 }, { "epoch": 0.88, "learning_rate": 4.8926469473843744e-05, "loss": 1.1172, "step": 10000 }, { "epoch": 0.92, "learning_rate": 4.886332061936397e-05, "loss": 1.1123, "step": 10500 }, { "epoch": 0.97, "learning_rate": 4.880017176488419e-05, "loss": 1.0946, "step": 11000 }, { "epoch": 0.97, "eval_bleu": 3.3162, "eval_gen_len": 17.8651, "eval_loss": 1.0577712059020996, "eval_runtime": 1538.4057, "eval_samples_per_second": 14.76, "eval_steps_per_second": 0.923, "step": 11000 }, { "epoch": 1.01, "learning_rate": 4.8737022910404406e-05, "loss": 1.0856, "step": 11500 }, { "epoch": 1.06, "learning_rate": 4.867387405592463e-05, "loss": 1.0716, "step": 12000 }, { "epoch": 1.1, "learning_rate": 4.8610725201444845e-05, "loss": 1.0736, "step": 12500 }, { "epoch": 1.14, "learning_rate": 4.854757634696507e-05, "loss": 1.0367, "step": 13000 }, { "epoch": 1.19, "learning_rate": 4.848442749248529e-05, "loss": 1.0204, "step": 13500 }, { "epoch": 1.23, "learning_rate": 4.842127863800551e-05, "loss": 1.0533, "step": 14000 }, { "epoch": 1.28, "learning_rate": 4.835812978352573e-05, "loss": 1.0426, "step": 14500 }, { "epoch": 1.32, "learning_rate": 4.829498092904595e-05, "loss": 1.0261, "step": 15000 }, { "epoch": 1.37, "learning_rate": 4.823183207456617e-05, "loss": 1.0111, "step": 15500 }, { "epoch": 1.41, "learning_rate": 4.816868322008639e-05, "loss": 0.9862, "step": 16000 }, { "epoch": 1.45, "learning_rate": 4.810553436560661e-05, "loss": 0.9546, "step": 16500 }, { "epoch": 1.45, "eval_bleu": 5.6024, "eval_gen_len": 17.902, "eval_loss": 0.9687988758087158, "eval_runtime": 1530.7999, "eval_samples_per_second": 14.833, "eval_steps_per_second": 0.928, "step": 16500 }, { "epoch": 1.5, "learning_rate": 4.804238551112683e-05, "loss": 0.9642, "step": 17000 }, { "epoch": 1.54, "learning_rate": 4.7979236656647054e-05, "loss": 0.9364, "step": 17500 }, { "epoch": 1.59, "learning_rate": 4.791608780216727e-05, "loss": 0.931, "step": 18000 }, { "epoch": 1.63, "learning_rate": 4.785293894768749e-05, "loss": 0.915, "step": 18500 }, { "epoch": 1.67, "learning_rate": 4.778979009320771e-05, "loss": 0.9043, "step": 19000 }, { "epoch": 1.72, "learning_rate": 4.772664123872793e-05, "loss": 0.9158, "step": 19500 }, { "epoch": 1.76, "learning_rate": 4.7663492384248155e-05, "loss": 0.9068, "step": 20000 }, { "epoch": 1.81, "learning_rate": 4.760034352976837e-05, "loss": 0.9314, "step": 20500 }, { "epoch": 1.85, "learning_rate": 4.7537194675288594e-05, "loss": 0.9112, "step": 21000 }, { "epoch": 1.89, "learning_rate": 4.747404582080881e-05, "loss": 0.8862, "step": 21500 }, { "epoch": 1.94, "learning_rate": 4.741089696632903e-05, "loss": 0.89, "step": 22000 }, { "epoch": 1.94, "eval_bleu": 5.1453, "eval_gen_len": 17.6144, "eval_loss": 0.941440761089325, "eval_runtime": 1490.4507, "eval_samples_per_second": 15.235, "eval_steps_per_second": 0.953, "step": 22000 }, { "epoch": 1.98, "learning_rate": 4.7347748111849256e-05, "loss": 0.8726, "step": 22500 }, { "epoch": 2.03, "learning_rate": 4.728459925736947e-05, "loss": 0.8749, "step": 23000 }, { "epoch": 2.07, "learning_rate": 4.7221450402889695e-05, "loss": 0.858, "step": 23500 }, { "epoch": 2.11, "learning_rate": 4.715830154840992e-05, "loss": 0.8479, "step": 24000 }, { "epoch": 2.16, "learning_rate": 4.7095152693930134e-05, "loss": 0.8394, "step": 24500 }, { "epoch": 2.2, "learning_rate": 4.703200383945036e-05, "loss": 0.8472, "step": 25000 }, { "epoch": 2.25, "learning_rate": 4.696885498497057e-05, "loss": 0.7793, "step": 25500 }, { "epoch": 2.29, "learning_rate": 4.6905706130490796e-05, "loss": 0.7753, "step": 26000 }, { "epoch": 2.33, "learning_rate": 4.684255727601102e-05, "loss": 0.7655, "step": 26500 }, { "epoch": 2.38, "learning_rate": 4.6779408421531235e-05, "loss": 0.7666, "step": 27000 }, { "epoch": 2.42, "learning_rate": 4.671625956705146e-05, "loss": 0.834, "step": 27500 }, { "epoch": 2.42, "eval_bleu": 5.3985, "eval_gen_len": 17.6899, "eval_loss": 0.9212985038757324, "eval_runtime": 1489.7985, "eval_samples_per_second": 15.242, "eval_steps_per_second": 0.953, "step": 27500 }, { "epoch": 2.47, "learning_rate": 4.6653110712571674e-05, "loss": 0.8098, "step": 28000 }, { "epoch": 2.51, "learning_rate": 4.65899618580919e-05, "loss": 0.8153, "step": 28500 }, { "epoch": 2.55, "learning_rate": 4.652681300361212e-05, "loss": 0.7837, "step": 29000 }, { "epoch": 2.6, "learning_rate": 4.6463664149132336e-05, "loss": 0.8255, "step": 29500 }, { "epoch": 2.64, "learning_rate": 4.640051529465256e-05, "loss": 0.8345, "step": 30000 }, { "epoch": 2.69, "learning_rate": 4.6337366440172775e-05, "loss": 0.8061, "step": 30500 }, { "epoch": 2.73, "learning_rate": 4.6274217585693e-05, "loss": 0.7743, "step": 31000 }, { "epoch": 2.77, "learning_rate": 4.621106873121322e-05, "loss": 0.7759, "step": 31500 }, { "epoch": 2.82, "learning_rate": 4.614791987673344e-05, "loss": 0.7777, "step": 32000 }, { "epoch": 2.86, "learning_rate": 4.608477102225366e-05, "loss": 0.7537, "step": 32500 }, { "epoch": 2.91, "learning_rate": 4.602162216777388e-05, "loss": 0.7439, "step": 33000 }, { "epoch": 2.91, "eval_bleu": 6.2934, "eval_gen_len": 17.4862, "eval_loss": 0.8995155096054077, "eval_runtime": 1459.4956, "eval_samples_per_second": 15.558, "eval_steps_per_second": 0.973, "step": 33000 }, { "epoch": 2.95, "learning_rate": 4.59584733132941e-05, "loss": 0.7424, "step": 33500 }, { "epoch": 2.99, "learning_rate": 4.589532445881432e-05, "loss": 0.7416, "step": 34000 }, { "epoch": 3.04, "learning_rate": 4.583217560433454e-05, "loss": 0.7722, "step": 34500 }, { "epoch": 3.08, "learning_rate": 4.576902674985476e-05, "loss": 0.7967, "step": 35000 }, { "epoch": 3.13, "learning_rate": 4.5705877895374984e-05, "loss": 0.7323, "step": 35500 }, { "epoch": 3.17, "learning_rate": 4.56427290408952e-05, "loss": 0.7171, "step": 36000 }, { "epoch": 3.21, "learning_rate": 4.557958018641542e-05, "loss": 0.7044, "step": 36500 }, { "epoch": 3.26, "learning_rate": 4.551643133193564e-05, "loss": 0.6828, "step": 37000 }, { "epoch": 3.3, "learning_rate": 4.545328247745586e-05, "loss": 0.7149, "step": 37500 }, { "epoch": 3.35, "learning_rate": 4.5390133622976085e-05, "loss": 0.6622, "step": 38000 }, { "epoch": 3.39, "learning_rate": 4.53269847684963e-05, "loss": 0.6803, "step": 38500 }, { "epoch": 3.39, "eval_bleu": 6.3565, "eval_gen_len": 17.8899, "eval_loss": 0.9016386270523071, "eval_runtime": 1493.3684, "eval_samples_per_second": 15.205, "eval_steps_per_second": 0.951, "step": 38500 }, { "epoch": 3.43, "learning_rate": 4.5263835914016524e-05, "loss": 0.7079, "step": 39000 }, { "epoch": 3.48, "learning_rate": 4.520068705953675e-05, "loss": 0.6978, "step": 39500 }, { "epoch": 3.52, "learning_rate": 4.513753820505696e-05, "loss": 0.7528, "step": 40000 }, { "epoch": 3.57, "learning_rate": 4.5074389350577186e-05, "loss": 0.7558, "step": 40500 }, { "epoch": 3.61, "learning_rate": 4.50112404960974e-05, "loss": 0.7742, "step": 41000 }, { "epoch": 3.66, "learning_rate": 4.4948091641617625e-05, "loss": 0.7505, "step": 41500 }, { "epoch": 3.7, "learning_rate": 4.488494278713785e-05, "loss": 0.7185, "step": 42000 }, { "epoch": 3.74, "learning_rate": 4.4821793932658064e-05, "loss": 0.7024, "step": 42500 }, { "epoch": 3.79, "learning_rate": 4.475864507817829e-05, "loss": 0.7262, "step": 43000 }, { "epoch": 3.83, "learning_rate": 4.46954962236985e-05, "loss": 0.7394, "step": 43500 }, { "epoch": 3.88, "learning_rate": 4.4632347369218726e-05, "loss": 0.733, "step": 44000 }, { "epoch": 3.88, "eval_bleu": 7.0351, "eval_gen_len": 17.6112, "eval_loss": 0.9225591421127319, "eval_runtime": 1483.9686, "eval_samples_per_second": 15.302, "eval_steps_per_second": 0.957, "step": 44000 }, { "epoch": 3.92, "learning_rate": 4.456919851473895e-05, "loss": 0.742, "step": 44500 }, { "epoch": 3.96, "learning_rate": 4.4506049660259165e-05, "loss": 0.735, "step": 45000 }, { "epoch": 4.01, "learning_rate": 4.444290080577939e-05, "loss": 0.6992, "step": 45500 }, { "epoch": 4.05, "learning_rate": 4.4379751951299604e-05, "loss": 0.7007, "step": 46000 }, { "epoch": 4.1, "learning_rate": 4.431660309681983e-05, "loss": 0.6979, "step": 46500 }, { "epoch": 4.14, "learning_rate": 4.425345424234005e-05, "loss": 0.6515, "step": 47000 }, { "epoch": 4.18, "learning_rate": 4.4190305387860266e-05, "loss": 0.6919, "step": 47500 }, { "epoch": 4.23, "learning_rate": 4.412715653338049e-05, "loss": 0.6241, "step": 48000 }, { "epoch": 4.27, "learning_rate": 4.406400767890071e-05, "loss": 0.6196, "step": 48500 }, { "epoch": 4.32, "learning_rate": 4.400085882442093e-05, "loss": 0.6557, "step": 49000 }, { "epoch": 4.36, "learning_rate": 4.393770996994115e-05, "loss": 0.6601, "step": 49500 }, { "epoch": 4.36, "eval_bleu": 5.3084, "eval_gen_len": 17.4292, "eval_loss": 0.9806875586509705, "eval_runtime": 1461.811, "eval_samples_per_second": 15.533, "eval_steps_per_second": 0.971, "step": 49500 }, { "epoch": 4.4, "learning_rate": 4.387456111546137e-05, "loss": 0.7354, "step": 50000 }, { "epoch": 4.45, "learning_rate": 4.381141226098159e-05, "loss": 0.7452, "step": 50500 }, { "epoch": 4.49, "learning_rate": 4.374826340650181e-05, "loss": 0.7175, "step": 51000 }, { "epoch": 4.54, "learning_rate": 4.368511455202203e-05, "loss": 0.7101, "step": 51500 }, { "epoch": 4.58, "learning_rate": 4.362196569754225e-05, "loss": 0.7068, "step": 52000 }, { "epoch": 4.62, "learning_rate": 4.355881684306247e-05, "loss": 0.6404, "step": 52500 }, { "epoch": 4.67, "learning_rate": 4.349566798858269e-05, "loss": 0.6316, "step": 53000 }, { "epoch": 4.71, "learning_rate": 4.3432519134102913e-05, "loss": 0.598, "step": 53500 }, { "epoch": 4.76, "learning_rate": 4.336937027962313e-05, "loss": 0.6091, "step": 54000 }, { "epoch": 4.8, "learning_rate": 4.330622142514335e-05, "loss": 0.695, "step": 54500 }, { "epoch": 4.84, "learning_rate": 4.324307257066357e-05, "loss": 0.6933, "step": 55000 }, { "epoch": 4.84, "eval_bleu": 6.8389, "eval_gen_len": 17.5131, "eval_loss": 0.923828661441803, "eval_runtime": 1455.6892, "eval_samples_per_second": 15.599, "eval_steps_per_second": 0.975, "step": 55000 } ], "logging_steps": 500, "max_steps": 397390, "num_train_epochs": 35, "save_steps": 5500, "total_flos": 1.907212892088828e+18, "trial_name": null, "trial_params": null }