{ "best_metric": 0.24614077806472778, "best_model_checkpoint": "../outputs/deductor-flant5-large/checkpoint-500", "epoch": 3.8314176245210727, "eval_steps": 50, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 4.952107279693487e-05, "loss": 0.5028, "step": 25 }, { "epoch": 0.19, "learning_rate": 4.904214559386973e-05, "loss": 0.306, "step": 50 }, { "epoch": 0.19, "eval_gen_len": 11.22110552763819, "eval_loss": 0.2959373891353607, "eval_rouge1": 89.3028, "eval_rouge2": 82.5127, "eval_rougeL": 87.4173, "eval_rougeLsum": 87.3544, "eval_runtime": 58.3773, "eval_samples_per_second": 40.906, "eval_steps_per_second": 1.285, "step": 50 }, { "epoch": 0.29, "learning_rate": 4.85632183908046e-05, "loss": 0.2749, "step": 75 }, { "epoch": 0.38, "learning_rate": 4.8084291187739464e-05, "loss": 0.2774, "step": 100 }, { "epoch": 0.38, "eval_gen_len": 11.257118927973199, "eval_loss": 0.2716549038887024, "eval_rouge1": 90.8414, "eval_rouge2": 84.2378, "eval_rougeL": 88.9385, "eval_rougeLsum": 88.9058, "eval_runtime": 58.4865, "eval_samples_per_second": 40.83, "eval_steps_per_second": 1.282, "step": 100 }, { "epoch": 0.48, "learning_rate": 4.760536398467433e-05, "loss": 0.2676, "step": 125 }, { "epoch": 0.57, "learning_rate": 4.7126436781609195e-05, "loss": 0.2366, "step": 150 }, { "epoch": 0.57, "eval_gen_len": 11.208123953098827, "eval_loss": 0.2612508535385132, "eval_rouge1": 91.0152, "eval_rouge2": 84.6687, "eval_rougeL": 89.2107, "eval_rougeLsum": 89.1735, "eval_runtime": 58.169, "eval_samples_per_second": 41.053, "eval_steps_per_second": 1.289, "step": 150 }, { "epoch": 0.67, "learning_rate": 4.6647509578544064e-05, "loss": 0.274, "step": 175 }, { "epoch": 0.77, "learning_rate": 4.616858237547893e-05, "loss": 0.2166, "step": 200 }, { "epoch": 0.77, "eval_gen_len": 11.280150753768844, "eval_loss": 0.25851312279701233, "eval_rouge1": 91.5215, "eval_rouge2": 85.4308, "eval_rougeL": 89.7742, "eval_rougeLsum": 89.7422, "eval_runtime": 58.2807, "eval_samples_per_second": 40.974, "eval_steps_per_second": 1.287, "step": 200 }, { "epoch": 0.86, "learning_rate": 4.5689655172413794e-05, "loss": 0.2063, "step": 225 }, { "epoch": 0.96, "learning_rate": 4.5210727969348656e-05, "loss": 0.22, "step": 250 }, { "epoch": 0.96, "eval_gen_len": 11.265494137353434, "eval_loss": 0.25169575214385986, "eval_rouge1": 91.5587, "eval_rouge2": 85.6107, "eval_rougeL": 89.8835, "eval_rougeLsum": 89.8621, "eval_runtime": 58.1997, "eval_samples_per_second": 41.031, "eval_steps_per_second": 1.289, "step": 250 }, { "epoch": 1.05, "learning_rate": 4.4731800766283525e-05, "loss": 0.196, "step": 275 }, { "epoch": 1.15, "learning_rate": 4.4252873563218394e-05, "loss": 0.1564, "step": 300 }, { "epoch": 1.15, "eval_gen_len": 11.20393634840871, "eval_loss": 0.26295191049575806, "eval_rouge1": 91.999, "eval_rouge2": 86.0835, "eval_rougeL": 90.3611, "eval_rougeLsum": 90.3168, "eval_runtime": 58.4367, "eval_samples_per_second": 40.865, "eval_steps_per_second": 1.283, "step": 300 }, { "epoch": 1.25, "learning_rate": 4.3773946360153256e-05, "loss": 0.1545, "step": 325 }, { "epoch": 1.34, "learning_rate": 4.3295019157088125e-05, "loss": 0.1803, "step": 350 }, { "epoch": 1.34, "eval_gen_len": 11.246231155778894, "eval_loss": 0.2546021044254303, "eval_rouge1": 91.5183, "eval_rouge2": 85.6214, "eval_rougeL": 89.9752, "eval_rougeLsum": 89.9323, "eval_runtime": 58.1825, "eval_samples_per_second": 41.043, "eval_steps_per_second": 1.289, "step": 350 }, { "epoch": 1.44, "learning_rate": 4.2816091954022994e-05, "loss": 0.1793, "step": 375 }, { "epoch": 1.53, "learning_rate": 4.2337164750957856e-05, "loss": 0.1737, "step": 400 }, { "epoch": 1.53, "eval_gen_len": 11.194304857621441, "eval_loss": 0.24834655225276947, "eval_rouge1": 91.8342, "eval_rouge2": 86.0171, "eval_rougeL": 90.3042, "eval_rougeLsum": 90.2641, "eval_runtime": 58.3123, "eval_samples_per_second": 40.952, "eval_steps_per_second": 1.286, "step": 400 }, { "epoch": 1.63, "learning_rate": 4.185823754789272e-05, "loss": 0.166, "step": 425 }, { "epoch": 1.72, "learning_rate": 4.1379310344827587e-05, "loss": 0.157, "step": 450 }, { "epoch": 1.72, "eval_gen_len": 10.937604690117253, "eval_loss": 0.24926304817199707, "eval_rouge1": 91.6585, "eval_rouge2": 85.4651, "eval_rougeL": 90.0181, "eval_rougeLsum": 89.9991, "eval_runtime": 57.6625, "eval_samples_per_second": 41.413, "eval_steps_per_second": 1.301, "step": 450 }, { "epoch": 1.82, "learning_rate": 4.0900383141762455e-05, "loss": 0.1612, "step": 475 }, { "epoch": 1.92, "learning_rate": 4.0421455938697324e-05, "loss": 0.1561, "step": 500 }, { "epoch": 1.92, "eval_gen_len": 11.20142378559464, "eval_loss": 0.24614077806472778, "eval_rouge1": 92.1213, "eval_rouge2": 86.4281, "eval_rougeL": 90.5846, "eval_rougeLsum": 90.5294, "eval_runtime": 58.4684, "eval_samples_per_second": 40.843, "eval_steps_per_second": 1.283, "step": 500 }, { "epoch": 2.01, "learning_rate": 3.9942528735632186e-05, "loss": 0.1472, "step": 525 }, { "epoch": 2.11, "learning_rate": 3.9463601532567055e-05, "loss": 0.1191, "step": 550 }, { "epoch": 2.11, "eval_gen_len": 11.241624790619765, "eval_loss": 0.2584824860095978, "eval_rouge1": 92.4493, "eval_rouge2": 86.6961, "eval_rougeL": 90.9293, "eval_rougeLsum": 90.8761, "eval_runtime": 58.3545, "eval_samples_per_second": 40.922, "eval_steps_per_second": 1.285, "step": 550 }, { "epoch": 2.2, "learning_rate": 3.898467432950192e-05, "loss": 0.1252, "step": 575 }, { "epoch": 2.3, "learning_rate": 3.850574712643678e-05, "loss": 0.1134, "step": 600 }, { "epoch": 2.3, "eval_gen_len": 11.16750418760469, "eval_loss": 0.2633197009563446, "eval_rouge1": 92.4707, "eval_rouge2": 86.833, "eval_rougeL": 90.9516, "eval_rougeLsum": 90.9195, "eval_runtime": 58.1972, "eval_samples_per_second": 41.033, "eval_steps_per_second": 1.289, "step": 600 }, { "epoch": 2.39, "learning_rate": 3.802681992337165e-05, "loss": 0.1128, "step": 625 }, { "epoch": 2.49, "learning_rate": 3.7547892720306517e-05, "loss": 0.1227, "step": 650 }, { "epoch": 2.49, "eval_gen_len": 11.264237855946398, "eval_loss": 0.25923022627830505, "eval_rouge1": 92.2738, "eval_rouge2": 86.5064, "eval_rougeL": 90.7556, "eval_rougeLsum": 90.6998, "eval_runtime": 58.2266, "eval_samples_per_second": 41.012, "eval_steps_per_second": 1.288, "step": 650 }, { "epoch": 2.59, "learning_rate": 3.7068965517241385e-05, "loss": 0.1232, "step": 675 }, { "epoch": 2.68, "learning_rate": 3.659003831417625e-05, "loss": 0.1175, "step": 700 }, { "epoch": 2.68, "eval_gen_len": 11.17001675041876, "eval_loss": 0.2656923830509186, "eval_rouge1": 92.0861, "eval_rouge2": 86.2203, "eval_rougeL": 90.6168, "eval_rougeLsum": 90.5657, "eval_runtime": 58.0131, "eval_samples_per_second": 41.163, "eval_steps_per_second": 1.293, "step": 700 }, { "epoch": 2.78, "learning_rate": 3.611111111111111e-05, "loss": 0.1095, "step": 725 }, { "epoch": 2.87, "learning_rate": 3.563218390804598e-05, "loss": 0.1132, "step": 750 }, { "epoch": 2.87, "eval_gen_len": 11.212311557788945, "eval_loss": 0.26437509059906006, "eval_rouge1": 92.3834, "eval_rouge2": 86.7237, "eval_rougeL": 90.8761, "eval_rougeLsum": 90.8389, "eval_runtime": 58.1768, "eval_samples_per_second": 41.047, "eval_steps_per_second": 1.289, "step": 750 }, { "epoch": 2.97, "learning_rate": 3.515325670498085e-05, "loss": 0.1124, "step": 775 }, { "epoch": 3.07, "learning_rate": 3.467432950191571e-05, "loss": 0.1097, "step": 800 }, { "epoch": 3.07, "eval_gen_len": 11.1821608040201, "eval_loss": 0.2691878378391266, "eval_rouge1": 92.3356, "eval_rouge2": 86.7021, "eval_rougeL": 90.8717, "eval_rougeLsum": 90.8185, "eval_runtime": 58.2708, "eval_samples_per_second": 40.981, "eval_steps_per_second": 1.287, "step": 800 }, { "epoch": 3.16, "learning_rate": 3.419540229885058e-05, "loss": 0.0874, "step": 825 }, { "epoch": 3.26, "learning_rate": 3.371647509578545e-05, "loss": 0.0949, "step": 850 }, { "epoch": 3.26, "eval_gen_len": 11.278475711892797, "eval_loss": 0.26897767186164856, "eval_rouge1": 92.5746, "eval_rouge2": 87.001, "eval_rougeL": 91.1734, "eval_rougeLsum": 91.1222, "eval_runtime": 58.645, "eval_samples_per_second": 40.72, "eval_steps_per_second": 1.279, "step": 850 }, { "epoch": 3.35, "learning_rate": 3.323754789272031e-05, "loss": 0.0901, "step": 875 }, { "epoch": 3.45, "learning_rate": 3.275862068965517e-05, "loss": 0.0813, "step": 900 }, { "epoch": 3.45, "eval_gen_len": 11.22571189279732, "eval_loss": 0.2874927222728729, "eval_rouge1": 92.5641, "eval_rouge2": 86.9813, "eval_rougeL": 91.0881, "eval_rougeLsum": 91.0411, "eval_runtime": 58.3729, "eval_samples_per_second": 40.909, "eval_steps_per_second": 1.285, "step": 900 }, { "epoch": 3.54, "learning_rate": 3.227969348659004e-05, "loss": 0.1005, "step": 925 }, { "epoch": 3.64, "learning_rate": 3.180076628352491e-05, "loss": 0.0861, "step": 950 }, { "epoch": 3.64, "eval_gen_len": 11.21356783919598, "eval_loss": 0.280032217502594, "eval_rouge1": 92.4738, "eval_rouge2": 86.9379, "eval_rougeL": 91.0384, "eval_rougeLsum": 90.9995, "eval_runtime": 58.3261, "eval_samples_per_second": 40.942, "eval_steps_per_second": 1.286, "step": 950 }, { "epoch": 3.74, "learning_rate": 3.132183908045977e-05, "loss": 0.0828, "step": 975 }, { "epoch": 3.83, "learning_rate": 3.084291187739464e-05, "loss": 0.0879, "step": 1000 }, { "epoch": 3.83, "eval_gen_len": 11.23031825795645, "eval_loss": 0.27702075242996216, "eval_rouge1": 92.6025, "eval_rouge2": 87.105, "eval_rougeL": 91.1632, "eval_rougeLsum": 91.1292, "eval_runtime": 58.0228, "eval_samples_per_second": 41.156, "eval_steps_per_second": 1.293, "step": 1000 }, { "epoch": 3.83, "step": 1000, "total_flos": 2.492186999051059e+16, "train_loss": 0.16499798774719238, "train_runtime": 3538.4305, "train_samples_per_second": 47.196, "train_steps_per_second": 0.738 } ], "logging_steps": 25, "max_steps": 2610, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 50, "total_flos": 2.492186999051059e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }