|
{ |
|
"best_metric": 1.4883581399917603, |
|
"best_model_checkpoint": "ccdv_pegasus_xsum_summarization/checkpoint-13500", |
|
"epoch": 2.0012007204322595, |
|
"global_step": 30000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.944411091099104e-05, |
|
"loss": 1.3482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_gen_len": 190.7041, |
|
"eval_loss": 1.5671061277389526, |
|
"eval_rouge1": 43.9725, |
|
"eval_rouge2": 20.8852, |
|
"eval_rougeL": 29.6036, |
|
"eval_rougeLsum": 39.2595, |
|
"eval_runtime": 10745.9844, |
|
"eval_samples_per_second": 0.617, |
|
"eval_steps_per_second": 0.077, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8888221821982085e-05, |
|
"loss": 1.0335, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_gen_len": 159.8545, |
|
"eval_loss": 1.5465657711029053, |
|
"eval_rouge1": 44.9236, |
|
"eval_rouge2": 21.1853, |
|
"eval_rougeL": 30.4447, |
|
"eval_rougeLsum": 39.9918, |
|
"eval_runtime": 10231.1208, |
|
"eval_samples_per_second": 0.648, |
|
"eval_steps_per_second": 0.081, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8332332732973116e-05, |
|
"loss": 1.0184, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gen_len": 143.5453, |
|
"eval_loss": 1.5334348678588867, |
|
"eval_rouge1": 44.9483, |
|
"eval_rouge2": 20.9962, |
|
"eval_rougeL": 30.5328, |
|
"eval_rougeLsum": 40.0531, |
|
"eval_runtime": 8769.3615, |
|
"eval_samples_per_second": 0.756, |
|
"eval_steps_per_second": 0.095, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.777644364396416e-05, |
|
"loss": 1.0015, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_gen_len": 137.856, |
|
"eval_loss": 1.529853343963623, |
|
"eval_rouge1": 45.9034, |
|
"eval_rouge2": 21.784, |
|
"eval_rougeL": 31.4025, |
|
"eval_rougeLsum": 40.8983, |
|
"eval_runtime": 7582.4229, |
|
"eval_samples_per_second": 0.875, |
|
"eval_steps_per_second": 0.109, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.72205545549552e-05, |
|
"loss": 1.0101, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_gen_len": 134.5485, |
|
"eval_loss": 1.5291049480438232, |
|
"eval_rouge1": 45.6738, |
|
"eval_rouge2": 21.5853, |
|
"eval_rougeL": 31.1439, |
|
"eval_rougeLsum": 40.7442, |
|
"eval_runtime": 6945.0608, |
|
"eval_samples_per_second": 0.955, |
|
"eval_steps_per_second": 0.12, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6664665465946236e-05, |
|
"loss": 0.9973, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_gen_len": 131.2587, |
|
"eval_loss": 1.523977518081665, |
|
"eval_rouge1": 45.5052, |
|
"eval_rouge2": 21.4202, |
|
"eval_rougeL": 31.1499, |
|
"eval_rougeLsum": 40.5736, |
|
"eval_runtime": 6458.4712, |
|
"eval_samples_per_second": 1.027, |
|
"eval_steps_per_second": 0.129, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6108776376937274e-05, |
|
"loss": 0.9855, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_gen_len": 131.0582, |
|
"eval_loss": 1.5234577655792236, |
|
"eval_rouge1": 45.8336, |
|
"eval_rouge2": 21.7072, |
|
"eval_rougeL": 31.439, |
|
"eval_rougeLsum": 40.9387, |
|
"eval_runtime": 6345.3859, |
|
"eval_samples_per_second": 1.045, |
|
"eval_steps_per_second": 0.131, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.555288728792831e-05, |
|
"loss": 0.9868, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_gen_len": 127.9753, |
|
"eval_loss": 1.5183237791061401, |
|
"eval_rouge1": 45.6348, |
|
"eval_rouge2": 21.5462, |
|
"eval_rougeL": 31.3009, |
|
"eval_rougeLsum": 40.6469, |
|
"eval_runtime": 6091.2782, |
|
"eval_samples_per_second": 1.089, |
|
"eval_steps_per_second": 0.136, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4996998198919356e-05, |
|
"loss": 0.9802, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_gen_len": 127.78, |
|
"eval_loss": 1.5132805109024048, |
|
"eval_rouge1": 45.4357, |
|
"eval_rouge2": 21.3339, |
|
"eval_rougeL": 31.1304, |
|
"eval_rougeLsum": 40.531, |
|
"eval_runtime": 5970.8563, |
|
"eval_samples_per_second": 1.111, |
|
"eval_steps_per_second": 0.139, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4441109109910394e-05, |
|
"loss": 0.9743, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_gen_len": 126.9619, |
|
"eval_loss": 1.5101301670074463, |
|
"eval_rouge1": 45.4845, |
|
"eval_rouge2": 21.4302, |
|
"eval_rougeL": 31.2033, |
|
"eval_rougeLsum": 40.5934, |
|
"eval_runtime": 5820.9525, |
|
"eval_samples_per_second": 1.14, |
|
"eval_steps_per_second": 0.143, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.388522002090143e-05, |
|
"loss": 0.972, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_gen_len": 127.1796, |
|
"eval_loss": 1.5053614377975464, |
|
"eval_rouge1": 45.196, |
|
"eval_rouge2": 21.1882, |
|
"eval_rougeL": 30.9407, |
|
"eval_rougeLsum": 40.2648, |
|
"eval_runtime": 5768.324, |
|
"eval_samples_per_second": 1.15, |
|
"eval_steps_per_second": 0.144, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.332933093189247e-05, |
|
"loss": 0.9651, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_gen_len": 126.9254, |
|
"eval_loss": 1.5030862092971802, |
|
"eval_rouge1": 45.4822, |
|
"eval_rouge2": 21.4363, |
|
"eval_rougeL": 31.1422, |
|
"eval_rougeLsum": 40.5397, |
|
"eval_runtime": 5665.8916, |
|
"eval_samples_per_second": 1.171, |
|
"eval_steps_per_second": 0.146, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.277344184288351e-05, |
|
"loss": 0.9758, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_gen_len": 126.4933, |
|
"eval_loss": 1.495548963546753, |
|
"eval_rouge1": 45.299, |
|
"eval_rouge2": 21.346, |
|
"eval_rougeL": 31.0361, |
|
"eval_rougeLsum": 40.3325, |
|
"eval_runtime": 5589.2093, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.149, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.221755275387455e-05, |
|
"loss": 0.9652, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_gen_len": 126.0859, |
|
"eval_loss": 1.4975615739822388, |
|
"eval_rouge1": 45.4694, |
|
"eval_rouge2": 21.5044, |
|
"eval_rougeL": 31.1786, |
|
"eval_rougeLsum": 40.5032, |
|
"eval_runtime": 5569.8623, |
|
"eval_samples_per_second": 1.191, |
|
"eval_steps_per_second": 0.149, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.166166366486558e-05, |
|
"loss": 0.9601, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_gen_len": 126.8815, |
|
"eval_loss": 1.4945002794265747, |
|
"eval_rouge1": 45.1971, |
|
"eval_rouge2": 21.2682, |
|
"eval_rougeL": 30.9321, |
|
"eval_rougeLsum": 40.2959, |
|
"eval_runtime": 5557.7856, |
|
"eval_samples_per_second": 1.193, |
|
"eval_steps_per_second": 0.149, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.110577457585663e-05, |
|
"loss": 0.9502, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_gen_len": 126.4628, |
|
"eval_loss": 1.49406898021698, |
|
"eval_rouge1": 45.5653, |
|
"eval_rouge2": 21.5655, |
|
"eval_rougeL": 31.2703, |
|
"eval_rougeLsum": 40.5622, |
|
"eval_runtime": 5535.3927, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.15, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0549885486847665e-05, |
|
"loss": 0.9537, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_gen_len": 126.5709, |
|
"eval_loss": 1.4941043853759766, |
|
"eval_rouge1": 45.2806, |
|
"eval_rouge2": 21.2587, |
|
"eval_rougeL": 30.93, |
|
"eval_rougeLsum": 40.354, |
|
"eval_runtime": 5533.5879, |
|
"eval_samples_per_second": 1.199, |
|
"eval_steps_per_second": 0.15, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.999399639783871e-05, |
|
"loss": 0.9629, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_gen_len": 126.547, |
|
"eval_loss": 1.4939745664596558, |
|
"eval_rouge1": 45.2474, |
|
"eval_rouge2": 21.275, |
|
"eval_rougeL": 30.9302, |
|
"eval_rougeLsum": 40.3377, |
|
"eval_runtime": 5530.7272, |
|
"eval_samples_per_second": 1.199, |
|
"eval_steps_per_second": 0.15, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.943810730882974e-05, |
|
"loss": 0.9528, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_gen_len": 126.768, |
|
"eval_loss": 1.4947481155395508, |
|
"eval_rouge1": 45.3619, |
|
"eval_rouge2": 21.3754, |
|
"eval_rougeL": 31.0723, |
|
"eval_rougeLsum": 40.4162, |
|
"eval_runtime": 5524.5717, |
|
"eval_samples_per_second": 1.201, |
|
"eval_steps_per_second": 0.15, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.888221821982078e-05, |
|
"loss": 0.9532, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_gen_len": 126.5323, |
|
"eval_loss": 1.4923893213272095, |
|
"eval_rouge1": 45.5763, |
|
"eval_rouge2": 21.6469, |
|
"eval_rougeL": 31.2585, |
|
"eval_rougeLsum": 40.5722, |
|
"eval_runtime": 5518.2912, |
|
"eval_samples_per_second": 1.202, |
|
"eval_steps_per_second": 0.15, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.832632913081182e-05, |
|
"loss": 0.945, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_gen_len": 126.69, |
|
"eval_loss": 1.4898710250854492, |
|
"eval_rouge1": 45.2629, |
|
"eval_rouge2": 21.3471, |
|
"eval_rougeL": 31.0405, |
|
"eval_rougeLsum": 40.3211, |
|
"eval_runtime": 6184.8714, |
|
"eval_samples_per_second": 1.072, |
|
"eval_steps_per_second": 0.134, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.777044004180286e-05, |
|
"loss": 0.9464, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_gen_len": 126.9052, |
|
"eval_loss": 1.489205002784729, |
|
"eval_rouge1": 45.3769, |
|
"eval_rouge2": 21.3457, |
|
"eval_rougeL": 30.9968, |
|
"eval_rougeLsum": 40.388, |
|
"eval_runtime": 5520.5499, |
|
"eval_samples_per_second": 1.202, |
|
"eval_steps_per_second": 0.15, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7214550952793906e-05, |
|
"loss": 0.9544, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_gen_len": 126.5739, |
|
"eval_loss": 1.4892535209655762, |
|
"eval_rouge1": 45.411, |
|
"eval_rouge2": 21.3852, |
|
"eval_rougeL": 31.0295, |
|
"eval_rougeLsum": 40.4881, |
|
"eval_runtime": 5521.4271, |
|
"eval_samples_per_second": 1.201, |
|
"eval_steps_per_second": 0.15, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6658661863784937e-05, |
|
"loss": 0.9467, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_gen_len": 126.7315, |
|
"eval_loss": 1.4929231405258179, |
|
"eval_rouge1": 45.4345, |
|
"eval_rouge2": 21.4378, |
|
"eval_rougeL": 31.1163, |
|
"eval_rougeLsum": 40.4393, |
|
"eval_runtime": 5524.2145, |
|
"eval_samples_per_second": 1.201, |
|
"eval_steps_per_second": 0.15, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.610277277477598e-05, |
|
"loss": 0.9517, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_gen_len": 126.58, |
|
"eval_loss": 1.4917516708374023, |
|
"eval_rouge1": 45.3614, |
|
"eval_rouge2": 21.3396, |
|
"eval_rougeL": 30.9925, |
|
"eval_rougeLsum": 40.3636, |
|
"eval_runtime": 5514.201, |
|
"eval_samples_per_second": 1.203, |
|
"eval_steps_per_second": 0.151, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.554688368576702e-05, |
|
"loss": 0.9497, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_gen_len": 126.7977, |
|
"eval_loss": 1.4918133020401, |
|
"eval_rouge1": 45.2485, |
|
"eval_rouge2": 21.2367, |
|
"eval_rougeL": 30.9282, |
|
"eval_rougeLsum": 40.3438, |
|
"eval_runtime": 6509.3818, |
|
"eval_samples_per_second": 1.019, |
|
"eval_steps_per_second": 0.128, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.499099459675806e-05, |
|
"loss": 0.9386, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_gen_len": 126.7524, |
|
"eval_loss": 1.4883581399917603, |
|
"eval_rouge1": 45.5038, |
|
"eval_rouge2": 21.5064, |
|
"eval_rougeL": 31.2132, |
|
"eval_rougeLsum": 40.5696, |
|
"eval_runtime": 5529.57, |
|
"eval_samples_per_second": 1.2, |
|
"eval_steps_per_second": 0.15, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4435105507749095e-05, |
|
"loss": 0.9473, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_gen_len": 126.6534, |
|
"eval_loss": 1.4918317794799805, |
|
"eval_rouge1": 45.2367, |
|
"eval_rouge2": 21.2615, |
|
"eval_rougeL": 30.9179, |
|
"eval_rougeLsum": 40.2548, |
|
"eval_runtime": 5515.735, |
|
"eval_samples_per_second": 1.203, |
|
"eval_steps_per_second": 0.15, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.387921641874013e-05, |
|
"loss": 0.9235, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 126.5972, |
|
"eval_loss": 1.4897193908691406, |
|
"eval_rouge1": 45.8027, |
|
"eval_rouge2": 21.7228, |
|
"eval_rougeL": 31.3946, |
|
"eval_rougeLsum": 40.764, |
|
"eval_runtime": 5518.2889, |
|
"eval_samples_per_second": 1.202, |
|
"eval_steps_per_second": 0.15, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.332332732973118e-05, |
|
"loss": 0.9344, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 126.9212, |
|
"eval_loss": 1.4973394870758057, |
|
"eval_rouge1": 44.8773, |
|
"eval_rouge2": 20.9475, |
|
"eval_rougeL": 30.5827, |
|
"eval_rougeLsum": 39.9489, |
|
"eval_runtime": 5510.2549, |
|
"eval_samples_per_second": 1.204, |
|
"eval_steps_per_second": 0.151, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2767438240722215e-05, |
|
"loss": 0.9139, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_gen_len": 126.692, |
|
"eval_loss": 1.5064738988876343, |
|
"eval_rouge1": 45.4207, |
|
"eval_rouge2": 21.3856, |
|
"eval_rougeL": 31.0837, |
|
"eval_rougeLsum": 40.4414, |
|
"eval_runtime": 5526.017, |
|
"eval_samples_per_second": 1.2, |
|
"eval_steps_per_second": 0.15, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.221154915171325e-05, |
|
"loss": 0.8939, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_gen_len": 126.5179, |
|
"eval_loss": 1.508902668952942, |
|
"eval_rouge1": 45.5575, |
|
"eval_rouge2": 21.5153, |
|
"eval_rougeL": 31.2115, |
|
"eval_rougeLsum": 40.5517, |
|
"eval_runtime": 5698.8075, |
|
"eval_samples_per_second": 1.164, |
|
"eval_steps_per_second": 0.146, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.165566006270429e-05, |
|
"loss": 0.8968, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_gen_len": 126.5447, |
|
"eval_loss": 1.5106098651885986, |
|
"eval_rouge1": 45.4574, |
|
"eval_rouge2": 21.4786, |
|
"eval_rougeL": 31.1065, |
|
"eval_rougeLsum": 40.495, |
|
"eval_runtime": 5606.9538, |
|
"eval_samples_per_second": 1.183, |
|
"eval_steps_per_second": 0.148, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.109977097369533e-05, |
|
"loss": 0.8999, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_gen_len": 126.6894, |
|
"eval_loss": 1.5100876092910767, |
|
"eval_rouge1": 45.4805, |
|
"eval_rouge2": 21.4579, |
|
"eval_rougeL": 31.1062, |
|
"eval_rougeLsum": 40.5138, |
|
"eval_runtime": 5594.355, |
|
"eval_samples_per_second": 1.186, |
|
"eval_steps_per_second": 0.148, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.054388188468637e-05, |
|
"loss": 0.903, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_gen_len": 126.5988, |
|
"eval_loss": 1.5103389024734497, |
|
"eval_rouge1": 45.495, |
|
"eval_rouge2": 21.4395, |
|
"eval_rougeL": 31.1445, |
|
"eval_rougeLsum": 40.4949, |
|
"eval_runtime": 5586.6059, |
|
"eval_samples_per_second": 1.187, |
|
"eval_steps_per_second": 0.149, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9987992795677407e-05, |
|
"loss": 0.8988, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_gen_len": 126.5643, |
|
"eval_loss": 1.5120760202407837, |
|
"eval_rouge1": 45.2764, |
|
"eval_rouge2": 21.2652, |
|
"eval_rougeL": 30.944, |
|
"eval_rougeLsum": 40.3249, |
|
"eval_runtime": 5558.8098, |
|
"eval_samples_per_second": 1.193, |
|
"eval_steps_per_second": 0.149, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9432103706668445e-05, |
|
"loss": 0.9027, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_gen_len": 126.8441, |
|
"eval_loss": 1.5092076063156128, |
|
"eval_rouge1": 45.4884, |
|
"eval_rouge2": 21.4334, |
|
"eval_rougeL": 31.0499, |
|
"eval_rougeLsum": 40.4796, |
|
"eval_runtime": 5536.9856, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.15, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8876214617659486e-05, |
|
"loss": 0.9044, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_gen_len": 126.8737, |
|
"eval_loss": 1.5079020261764526, |
|
"eval_rouge1": 45.5708, |
|
"eval_rouge2": 21.5358, |
|
"eval_rougeL": 31.1862, |
|
"eval_rougeLsum": 40.594, |
|
"eval_runtime": 5524.867, |
|
"eval_samples_per_second": 1.201, |
|
"eval_steps_per_second": 0.15, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8320325528650527e-05, |
|
"loss": 0.906, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_gen_len": 126.8627, |
|
"eval_loss": 1.5116254091262817, |
|
"eval_rouge1": 45.4542, |
|
"eval_rouge2": 21.4172, |
|
"eval_rougeL": 31.0754, |
|
"eval_rougeLsum": 40.439, |
|
"eval_runtime": 5524.341, |
|
"eval_samples_per_second": 1.201, |
|
"eval_steps_per_second": 0.15, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.776443643964157e-05, |
|
"loss": 0.8994, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_gen_len": 126.8206, |
|
"eval_loss": 1.5085355043411255, |
|
"eval_rouge1": 45.5424, |
|
"eval_rouge2": 21.5009, |
|
"eval_rougeL": 31.1428, |
|
"eval_rougeLsum": 40.5667, |
|
"eval_runtime": 5528.1375, |
|
"eval_samples_per_second": 1.2, |
|
"eval_steps_per_second": 0.15, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7208547350632603e-05, |
|
"loss": 0.9088, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_gen_len": 126.7414, |
|
"eval_loss": 1.5124515295028687, |
|
"eval_rouge1": 45.3129, |
|
"eval_rouge2": 21.2629, |
|
"eval_rougeL": 30.9461, |
|
"eval_rougeLsum": 40.3271, |
|
"eval_runtime": 5534.3419, |
|
"eval_samples_per_second": 1.199, |
|
"eval_steps_per_second": 0.15, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.665265826162364e-05, |
|
"loss": 0.8983, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_gen_len": 126.357, |
|
"eval_loss": 1.5135449171066284, |
|
"eval_rouge1": 45.6846, |
|
"eval_rouge2": 21.6282, |
|
"eval_rougeL": 31.2929, |
|
"eval_rougeLsum": 40.6821, |
|
"eval_runtime": 5538.2932, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.15, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6096769172614682e-05, |
|
"loss": 0.907, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_gen_len": 127.0029, |
|
"eval_loss": 1.5076923370361328, |
|
"eval_rouge1": 45.4873, |
|
"eval_rouge2": 21.455, |
|
"eval_rougeL": 31.1193, |
|
"eval_rougeLsum": 40.5128, |
|
"eval_runtime": 5539.9922, |
|
"eval_samples_per_second": 1.197, |
|
"eval_steps_per_second": 0.15, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5540880083605723e-05, |
|
"loss": 0.9097, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_gen_len": 126.8553, |
|
"eval_loss": 1.5052434206008911, |
|
"eval_rouge1": 45.5988, |
|
"eval_rouge2": 21.6134, |
|
"eval_rougeL": 31.247, |
|
"eval_rougeLsum": 40.58, |
|
"eval_runtime": 5539.7468, |
|
"eval_samples_per_second": 1.197, |
|
"eval_steps_per_second": 0.15, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.498499099459676e-05, |
|
"loss": 0.9033, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_gen_len": 127.0048, |
|
"eval_loss": 1.5133850574493408, |
|
"eval_rouge1": 45.3223, |
|
"eval_rouge2": 21.2968, |
|
"eval_rougeL": 30.9357, |
|
"eval_rougeLsum": 40.3813, |
|
"eval_runtime": 9404.5, |
|
"eval_samples_per_second": 0.705, |
|
"eval_steps_per_second": 0.088, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.44291019055878e-05, |
|
"loss": 0.8925, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_gen_len": 126.7316, |
|
"eval_loss": 1.510839819908142, |
|
"eval_rouge1": 45.6747, |
|
"eval_rouge2": 21.6374, |
|
"eval_rougeL": 31.31, |
|
"eval_rougeLsum": 40.7015, |
|
"eval_runtime": 22732.2519, |
|
"eval_samples_per_second": 0.292, |
|
"eval_steps_per_second": 0.037, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.387321281657884e-05, |
|
"loss": 0.8913, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_gen_len": 126.6869, |
|
"eval_loss": 1.5129714012145996, |
|
"eval_rouge1": 45.6531, |
|
"eval_rouge2": 21.6354, |
|
"eval_rougeL": 31.2956, |
|
"eval_rougeLsum": 40.6555, |
|
"eval_runtime": 6945.8776, |
|
"eval_samples_per_second": 0.955, |
|
"eval_steps_per_second": 0.119, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3317323727569874e-05, |
|
"loss": 0.8931, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_gen_len": 126.4862, |
|
"eval_loss": 1.5111068487167358, |
|
"eval_rouge1": 45.7876, |
|
"eval_rouge2": 21.7115, |
|
"eval_rougeL": 31.3274, |
|
"eval_rougeLsum": 40.7579, |
|
"eval_runtime": 5539.5619, |
|
"eval_samples_per_second": 1.197, |
|
"eval_steps_per_second": 0.15, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2761434638560915e-05, |
|
"loss": 0.9009, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_gen_len": 126.6229, |
|
"eval_loss": 1.5083845853805542, |
|
"eval_rouge1": 45.6359, |
|
"eval_rouge2": 21.583, |
|
"eval_rougeL": 31.2775, |
|
"eval_rougeLsum": 40.6351, |
|
"eval_runtime": 5545.0209, |
|
"eval_samples_per_second": 1.196, |
|
"eval_steps_per_second": 0.15, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2205545549551953e-05, |
|
"loss": 0.8925, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_gen_len": 126.8396, |
|
"eval_loss": 1.5094473361968994, |
|
"eval_rouge1": 45.397, |
|
"eval_rouge2": 21.4266, |
|
"eval_rougeL": 31.082, |
|
"eval_rougeLsum": 40.4261, |
|
"eval_runtime": 5534.6802, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.15, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1649656460542994e-05, |
|
"loss": 0.8991, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_gen_len": 126.722, |
|
"eval_loss": 1.512014627456665, |
|
"eval_rouge1": 45.2851, |
|
"eval_rouge2": 21.2798, |
|
"eval_rougeL": 30.8973, |
|
"eval_rougeLsum": 40.2787, |
|
"eval_runtime": 5538.1327, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.15, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1093767371534032e-05, |
|
"loss": 0.9019, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_gen_len": 126.8048, |
|
"eval_loss": 1.510252833366394, |
|
"eval_rouge1": 45.2905, |
|
"eval_rouge2": 21.2992, |
|
"eval_rougeL": 30.9204, |
|
"eval_rougeLsum": 40.3262, |
|
"eval_runtime": 5535.5354, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.15, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0537878282525073e-05, |
|
"loss": 0.891, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_gen_len": 126.2902, |
|
"eval_loss": 1.5112383365631104, |
|
"eval_rouge1": 45.7091, |
|
"eval_rouge2": 21.6159, |
|
"eval_rougeL": 31.2889, |
|
"eval_rougeLsum": 40.6986, |
|
"eval_runtime": 5537.5343, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.15, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.998198919351611e-05, |
|
"loss": 0.898, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_gen_len": 126.5218, |
|
"eval_loss": 1.5084278583526611, |
|
"eval_rouge1": 45.4964, |
|
"eval_rouge2": 21.4702, |
|
"eval_rougeL": 31.177, |
|
"eval_rougeLsum": 40.5432, |
|
"eval_runtime": 5530.5865, |
|
"eval_samples_per_second": 1.199, |
|
"eval_steps_per_second": 0.15, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.942610010450715e-05, |
|
"loss": 0.8839, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_gen_len": 126.8648, |
|
"eval_loss": 1.5090144872665405, |
|
"eval_rouge1": 45.6279, |
|
"eval_rouge2": 21.5346, |
|
"eval_rougeL": 31.252, |
|
"eval_rougeLsum": 40.6096, |
|
"eval_runtime": 5522.5033, |
|
"eval_samples_per_second": 1.201, |
|
"eval_steps_per_second": 0.15, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8870211015498187e-05, |
|
"loss": 0.8899, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_gen_len": 126.8498, |
|
"eval_loss": 1.5073039531707764, |
|
"eval_rouge1": 45.6406, |
|
"eval_rouge2": 21.5551, |
|
"eval_rougeL": 31.2519, |
|
"eval_rougeLsum": 40.6425, |
|
"eval_runtime": 5520.5026, |
|
"eval_samples_per_second": 1.202, |
|
"eval_steps_per_second": 0.15, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8314321926489228e-05, |
|
"loss": 0.8904, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_gen_len": 126.689, |
|
"eval_loss": 1.5086652040481567, |
|
"eval_rouge1": 45.7334, |
|
"eval_rouge2": 21.7071, |
|
"eval_rougeL": 31.3069, |
|
"eval_rougeLsum": 40.6992, |
|
"eval_runtime": 5519.0861, |
|
"eval_samples_per_second": 1.202, |
|
"eval_steps_per_second": 0.15, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7758432837480266e-05, |
|
"loss": 0.8958, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_gen_len": 126.9157, |
|
"eval_loss": 1.5112992525100708, |
|
"eval_rouge1": 45.4618, |
|
"eval_rouge2": 21.4623, |
|
"eval_rougeL": 31.0914, |
|
"eval_rougeLsum": 40.4648, |
|
"eval_runtime": 5520.0088, |
|
"eval_samples_per_second": 1.202, |
|
"eval_steps_per_second": 0.15, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7202543748471307e-05, |
|
"loss": 0.8991, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_gen_len": 126.7855, |
|
"eval_loss": 1.5126971006393433, |
|
"eval_rouge1": 45.6364, |
|
"eval_rouge2": 21.5467, |
|
"eval_rougeL": 31.2001, |
|
"eval_rougeLsum": 40.5946, |
|
"eval_runtime": 5532.4138, |
|
"eval_samples_per_second": 1.199, |
|
"eval_steps_per_second": 0.15, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6646654659462345e-05, |
|
"loss": 0.889, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 126.6989, |
|
"eval_loss": 1.5128982067108154, |
|
"eval_rouge1": 45.3668, |
|
"eval_rouge2": 21.3563, |
|
"eval_rougeL": 30.998, |
|
"eval_rougeLsum": 40.3714, |
|
"eval_runtime": 5736.9272, |
|
"eval_samples_per_second": 1.156, |
|
"eval_steps_per_second": 0.145, |
|
"step": 30000 |
|
} |
|
], |
|
"max_steps": 44973, |
|
"num_train_epochs": 3, |
|
"total_flos": 7.801293866564321e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|