|
{ |
|
"best_metric": 0.24614077806472778, |
|
"best_model_checkpoint": "../outputs/deductor-flant5-large/checkpoint-500", |
|
"epoch": 3.8314176245210727, |
|
"eval_steps": 50, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.952107279693487e-05, |
|
"loss": 0.5028, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.904214559386973e-05, |
|
"loss": 0.306, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_gen_len": 11.22110552763819, |
|
"eval_loss": 0.2959373891353607, |
|
"eval_rouge1": 89.3028, |
|
"eval_rouge2": 82.5127, |
|
"eval_rougeL": 87.4173, |
|
"eval_rougeLsum": 87.3544, |
|
"eval_runtime": 58.3773, |
|
"eval_samples_per_second": 40.906, |
|
"eval_steps_per_second": 1.285, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.85632183908046e-05, |
|
"loss": 0.2749, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8084291187739464e-05, |
|
"loss": 0.2774, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_gen_len": 11.257118927973199, |
|
"eval_loss": 0.2716549038887024, |
|
"eval_rouge1": 90.8414, |
|
"eval_rouge2": 84.2378, |
|
"eval_rougeL": 88.9385, |
|
"eval_rougeLsum": 88.9058, |
|
"eval_runtime": 58.4865, |
|
"eval_samples_per_second": 40.83, |
|
"eval_steps_per_second": 1.282, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.760536398467433e-05, |
|
"loss": 0.2676, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.7126436781609195e-05, |
|
"loss": 0.2366, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_gen_len": 11.208123953098827, |
|
"eval_loss": 0.2612508535385132, |
|
"eval_rouge1": 91.0152, |
|
"eval_rouge2": 84.6687, |
|
"eval_rougeL": 89.2107, |
|
"eval_rougeLsum": 89.1735, |
|
"eval_runtime": 58.169, |
|
"eval_samples_per_second": 41.053, |
|
"eval_steps_per_second": 1.289, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.6647509578544064e-05, |
|
"loss": 0.274, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.616858237547893e-05, |
|
"loss": 0.2166, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_gen_len": 11.280150753768844, |
|
"eval_loss": 0.25851312279701233, |
|
"eval_rouge1": 91.5215, |
|
"eval_rouge2": 85.4308, |
|
"eval_rougeL": 89.7742, |
|
"eval_rougeLsum": 89.7422, |
|
"eval_runtime": 58.2807, |
|
"eval_samples_per_second": 40.974, |
|
"eval_steps_per_second": 1.287, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.5689655172413794e-05, |
|
"loss": 0.2063, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5210727969348656e-05, |
|
"loss": 0.22, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_gen_len": 11.265494137353434, |
|
"eval_loss": 0.25169575214385986, |
|
"eval_rouge1": 91.5587, |
|
"eval_rouge2": 85.6107, |
|
"eval_rougeL": 89.8835, |
|
"eval_rougeLsum": 89.8621, |
|
"eval_runtime": 58.1997, |
|
"eval_samples_per_second": 41.031, |
|
"eval_steps_per_second": 1.289, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.4731800766283525e-05, |
|
"loss": 0.196, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.4252873563218394e-05, |
|
"loss": 0.1564, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_gen_len": 11.20393634840871, |
|
"eval_loss": 0.26295191049575806, |
|
"eval_rouge1": 91.999, |
|
"eval_rouge2": 86.0835, |
|
"eval_rougeL": 90.3611, |
|
"eval_rougeLsum": 90.3168, |
|
"eval_runtime": 58.4367, |
|
"eval_samples_per_second": 40.865, |
|
"eval_steps_per_second": 1.283, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.3773946360153256e-05, |
|
"loss": 0.1545, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.3295019157088125e-05, |
|
"loss": 0.1803, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_gen_len": 11.246231155778894, |
|
"eval_loss": 0.2546021044254303, |
|
"eval_rouge1": 91.5183, |
|
"eval_rouge2": 85.6214, |
|
"eval_rougeL": 89.9752, |
|
"eval_rougeLsum": 89.9323, |
|
"eval_runtime": 58.1825, |
|
"eval_samples_per_second": 41.043, |
|
"eval_steps_per_second": 1.289, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.2816091954022994e-05, |
|
"loss": 0.1793, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.2337164750957856e-05, |
|
"loss": 0.1737, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_gen_len": 11.194304857621441, |
|
"eval_loss": 0.24834655225276947, |
|
"eval_rouge1": 91.8342, |
|
"eval_rouge2": 86.0171, |
|
"eval_rougeL": 90.3042, |
|
"eval_rougeLsum": 90.2641, |
|
"eval_runtime": 58.3123, |
|
"eval_samples_per_second": 40.952, |
|
"eval_steps_per_second": 1.286, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.185823754789272e-05, |
|
"loss": 0.166, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1379310344827587e-05, |
|
"loss": 0.157, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_gen_len": 10.937604690117253, |
|
"eval_loss": 0.24926304817199707, |
|
"eval_rouge1": 91.6585, |
|
"eval_rouge2": 85.4651, |
|
"eval_rougeL": 90.0181, |
|
"eval_rougeLsum": 89.9991, |
|
"eval_runtime": 57.6625, |
|
"eval_samples_per_second": 41.413, |
|
"eval_steps_per_second": 1.301, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.0900383141762455e-05, |
|
"loss": 0.1612, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.0421455938697324e-05, |
|
"loss": 0.1561, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_gen_len": 11.20142378559464, |
|
"eval_loss": 0.24614077806472778, |
|
"eval_rouge1": 92.1213, |
|
"eval_rouge2": 86.4281, |
|
"eval_rougeL": 90.5846, |
|
"eval_rougeLsum": 90.5294, |
|
"eval_runtime": 58.4684, |
|
"eval_samples_per_second": 40.843, |
|
"eval_steps_per_second": 1.283, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.9942528735632186e-05, |
|
"loss": 0.1472, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.9463601532567055e-05, |
|
"loss": 0.1191, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_gen_len": 11.241624790619765, |
|
"eval_loss": 0.2584824860095978, |
|
"eval_rouge1": 92.4493, |
|
"eval_rouge2": 86.6961, |
|
"eval_rougeL": 90.9293, |
|
"eval_rougeLsum": 90.8761, |
|
"eval_runtime": 58.3545, |
|
"eval_samples_per_second": 40.922, |
|
"eval_steps_per_second": 1.285, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.898467432950192e-05, |
|
"loss": 0.1252, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.850574712643678e-05, |
|
"loss": 0.1134, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_gen_len": 11.16750418760469, |
|
"eval_loss": 0.2633197009563446, |
|
"eval_rouge1": 92.4707, |
|
"eval_rouge2": 86.833, |
|
"eval_rougeL": 90.9516, |
|
"eval_rougeLsum": 90.9195, |
|
"eval_runtime": 58.1972, |
|
"eval_samples_per_second": 41.033, |
|
"eval_steps_per_second": 1.289, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.802681992337165e-05, |
|
"loss": 0.1128, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.7547892720306517e-05, |
|
"loss": 0.1227, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_gen_len": 11.264237855946398, |
|
"eval_loss": 0.25923022627830505, |
|
"eval_rouge1": 92.2738, |
|
"eval_rouge2": 86.5064, |
|
"eval_rougeL": 90.7556, |
|
"eval_rougeLsum": 90.6998, |
|
"eval_runtime": 58.2266, |
|
"eval_samples_per_second": 41.012, |
|
"eval_steps_per_second": 1.288, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.7068965517241385e-05, |
|
"loss": 0.1232, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.659003831417625e-05, |
|
"loss": 0.1175, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_gen_len": 11.17001675041876, |
|
"eval_loss": 0.2656923830509186, |
|
"eval_rouge1": 92.0861, |
|
"eval_rouge2": 86.2203, |
|
"eval_rougeL": 90.6168, |
|
"eval_rougeLsum": 90.5657, |
|
"eval_runtime": 58.0131, |
|
"eval_samples_per_second": 41.163, |
|
"eval_steps_per_second": 1.293, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.1095, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.563218390804598e-05, |
|
"loss": 0.1132, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_gen_len": 11.212311557788945, |
|
"eval_loss": 0.26437509059906006, |
|
"eval_rouge1": 92.3834, |
|
"eval_rouge2": 86.7237, |
|
"eval_rougeL": 90.8761, |
|
"eval_rougeLsum": 90.8389, |
|
"eval_runtime": 58.1768, |
|
"eval_samples_per_second": 41.047, |
|
"eval_steps_per_second": 1.289, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.515325670498085e-05, |
|
"loss": 0.1124, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.467432950191571e-05, |
|
"loss": 0.1097, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_gen_len": 11.1821608040201, |
|
"eval_loss": 0.2691878378391266, |
|
"eval_rouge1": 92.3356, |
|
"eval_rouge2": 86.7021, |
|
"eval_rougeL": 90.8717, |
|
"eval_rougeLsum": 90.8185, |
|
"eval_runtime": 58.2708, |
|
"eval_samples_per_second": 40.981, |
|
"eval_steps_per_second": 1.287, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.419540229885058e-05, |
|
"loss": 0.0874, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.371647509578545e-05, |
|
"loss": 0.0949, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_gen_len": 11.278475711892797, |
|
"eval_loss": 0.26897767186164856, |
|
"eval_rouge1": 92.5746, |
|
"eval_rouge2": 87.001, |
|
"eval_rougeL": 91.1734, |
|
"eval_rougeLsum": 91.1222, |
|
"eval_runtime": 58.645, |
|
"eval_samples_per_second": 40.72, |
|
"eval_steps_per_second": 1.279, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.323754789272031e-05, |
|
"loss": 0.0901, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.275862068965517e-05, |
|
"loss": 0.0813, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_gen_len": 11.22571189279732, |
|
"eval_loss": 0.2874927222728729, |
|
"eval_rouge1": 92.5641, |
|
"eval_rouge2": 86.9813, |
|
"eval_rougeL": 91.0881, |
|
"eval_rougeLsum": 91.0411, |
|
"eval_runtime": 58.3729, |
|
"eval_samples_per_second": 40.909, |
|
"eval_steps_per_second": 1.285, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.227969348659004e-05, |
|
"loss": 0.1005, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.180076628352491e-05, |
|
"loss": 0.0861, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_gen_len": 11.21356783919598, |
|
"eval_loss": 0.280032217502594, |
|
"eval_rouge1": 92.4738, |
|
"eval_rouge2": 86.9379, |
|
"eval_rougeL": 91.0384, |
|
"eval_rougeLsum": 90.9995, |
|
"eval_runtime": 58.3261, |
|
"eval_samples_per_second": 40.942, |
|
"eval_steps_per_second": 1.286, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.132183908045977e-05, |
|
"loss": 0.0828, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.084291187739464e-05, |
|
"loss": 0.0879, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_gen_len": 11.23031825795645, |
|
"eval_loss": 0.27702075242996216, |
|
"eval_rouge1": 92.6025, |
|
"eval_rouge2": 87.105, |
|
"eval_rougeL": 91.1632, |
|
"eval_rougeLsum": 91.1292, |
|
"eval_runtime": 58.0228, |
|
"eval_samples_per_second": 41.156, |
|
"eval_steps_per_second": 1.293, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"step": 1000, |
|
"total_flos": 2.492186999051059e+16, |
|
"train_loss": 0.16499798774719238, |
|
"train_runtime": 3538.4305, |
|
"train_samples_per_second": 47.196, |
|
"train_steps_per_second": 0.738 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 2610, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 50, |
|
"total_flos": 2.492186999051059e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|