|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 55860, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26852846401718583, |
|
"grad_norm": 5.849491596221924, |
|
"learning_rate": 4.955245255997136e-05, |
|
"loss": 3.8311, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5370569280343717, |
|
"grad_norm": 5.955765247344971, |
|
"learning_rate": 4.9104905119942717e-05, |
|
"loss": 3.0732, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8055853920515574, |
|
"grad_norm": 6.657278537750244, |
|
"learning_rate": 4.8657357679914074e-05, |
|
"loss": 2.7161, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.284536838531494, |
|
"eval_rouge2_fmeasure": 0.3144, |
|
"eval_rouge2_precision": 0.3144, |
|
"eval_rouge2_recall": 0.3144, |
|
"eval_runtime": 108.4134, |
|
"eval_samples_per_second": 44.155, |
|
"eval_steps_per_second": 2.767, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 1.0741138560687433, |
|
"grad_norm": 6.813873767852783, |
|
"learning_rate": 4.820981023988543e-05, |
|
"loss": 2.3742, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.342642320085929, |
|
"grad_norm": 6.493252754211426, |
|
"learning_rate": 4.776226279985679e-05, |
|
"loss": 1.9193, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.6111707841031149, |
|
"grad_norm": 7.747920989990234, |
|
"learning_rate": 4.7314715359828145e-05, |
|
"loss": 1.9001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.8796992481203008, |
|
"grad_norm": 6.069449424743652, |
|
"learning_rate": 4.6867167919799495e-05, |
|
"loss": 1.8676, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.0997378826141357, |
|
"eval_rouge2_fmeasure": 0.352, |
|
"eval_rouge2_precision": 0.352, |
|
"eval_rouge2_recall": 0.352, |
|
"eval_runtime": 110.0345, |
|
"eval_samples_per_second": 43.505, |
|
"eval_steps_per_second": 2.726, |
|
"step": 3724 |
|
}, |
|
{ |
|
"epoch": 2.1482277121374866, |
|
"grad_norm": 6.031770706176758, |
|
"learning_rate": 4.641962047977086e-05, |
|
"loss": 1.511, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.4167561761546725, |
|
"grad_norm": 6.697205543518066, |
|
"learning_rate": 4.5972073039742216e-05, |
|
"loss": 1.2827, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.685284640171858, |
|
"grad_norm": 7.0569071769714355, |
|
"learning_rate": 4.552452559971357e-05, |
|
"loss": 1.3136, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.9538131041890443, |
|
"grad_norm": 6.0844526290893555, |
|
"learning_rate": 4.507697815968493e-05, |
|
"loss": 1.3387, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.111859083175659, |
|
"eval_rouge2_fmeasure": 0.3545, |
|
"eval_rouge2_precision": 0.3545, |
|
"eval_rouge2_recall": 0.3545, |
|
"eval_runtime": 108.6339, |
|
"eval_samples_per_second": 44.065, |
|
"eval_steps_per_second": 2.762, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 3.2223415682062297, |
|
"grad_norm": 5.514146327972412, |
|
"learning_rate": 4.462943071965629e-05, |
|
"loss": 0.9052, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.4908700322234156, |
|
"grad_norm": 6.395975112915039, |
|
"learning_rate": 4.4181883279627644e-05, |
|
"loss": 0.866, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.7593984962406015, |
|
"grad_norm": 6.1064348220825195, |
|
"learning_rate": 4.3734335839599e-05, |
|
"loss": 0.8991, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.1967225074768066, |
|
"eval_rouge2_fmeasure": 0.3531, |
|
"eval_rouge2_precision": 0.3531, |
|
"eval_rouge2_recall": 0.3531, |
|
"eval_runtime": 108.9474, |
|
"eval_samples_per_second": 43.939, |
|
"eval_steps_per_second": 2.754, |
|
"step": 7448 |
|
}, |
|
{ |
|
"epoch": 4.027926960257787, |
|
"grad_norm": 5.5337700843811035, |
|
"learning_rate": 4.328678839957035e-05, |
|
"loss": 0.89, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.296455424274973, |
|
"grad_norm": 5.39340353012085, |
|
"learning_rate": 4.2839240959541715e-05, |
|
"loss": 0.539, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.564983888292159, |
|
"grad_norm": 6.1959967613220215, |
|
"learning_rate": 4.239169351951307e-05, |
|
"loss": 0.5854, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.833512352309345, |
|
"grad_norm": 6.185136795043945, |
|
"learning_rate": 4.194414607948442e-05, |
|
"loss": 0.6131, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.305931806564331, |
|
"eval_rouge2_fmeasure": 0.3538, |
|
"eval_rouge2_precision": 0.3538, |
|
"eval_rouge2_recall": 0.3538, |
|
"eval_runtime": 108.7292, |
|
"eval_samples_per_second": 44.027, |
|
"eval_steps_per_second": 2.759, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 5.1020408163265305, |
|
"grad_norm": 3.842778205871582, |
|
"learning_rate": 4.149659863945579e-05, |
|
"loss": 0.5237, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.370569280343717, |
|
"grad_norm": 4.404533863067627, |
|
"learning_rate": 4.1049051199427144e-05, |
|
"loss": 0.3687, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.639097744360902, |
|
"grad_norm": 5.195127487182617, |
|
"learning_rate": 4.0601503759398494e-05, |
|
"loss": 0.3907, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.907626208378088, |
|
"grad_norm": 5.097487926483154, |
|
"learning_rate": 4.015395631936986e-05, |
|
"loss": 0.4204, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.4349613189697266, |
|
"eval_rouge2_fmeasure": 0.3489, |
|
"eval_rouge2_precision": 0.3489, |
|
"eval_rouge2_recall": 0.3489, |
|
"eval_runtime": 108.5656, |
|
"eval_samples_per_second": 44.093, |
|
"eval_steps_per_second": 2.763, |
|
"step": 11172 |
|
}, |
|
{ |
|
"epoch": 6.176154672395274, |
|
"grad_norm": 4.235614776611328, |
|
"learning_rate": 3.970640887934121e-05, |
|
"loss": 0.3066, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.4446831364124595, |
|
"grad_norm": 4.217000961303711, |
|
"learning_rate": 3.9258861439312565e-05, |
|
"loss": 0.2646, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.713211600429646, |
|
"grad_norm": 3.1819193363189697, |
|
"learning_rate": 3.881131399928393e-05, |
|
"loss": 0.2797, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.981740064446831, |
|
"grad_norm": 4.56110954284668, |
|
"learning_rate": 3.836376655925528e-05, |
|
"loss": 0.2933, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.5125792026519775, |
|
"eval_rouge2_fmeasure": 0.3476, |
|
"eval_rouge2_precision": 0.3476, |
|
"eval_rouge2_recall": 0.3476, |
|
"eval_runtime": 108.9982, |
|
"eval_samples_per_second": 43.918, |
|
"eval_steps_per_second": 2.752, |
|
"step": 13034 |
|
}, |
|
{ |
|
"epoch": 7.250268528464018, |
|
"grad_norm": 3.549121856689453, |
|
"learning_rate": 3.791621911922664e-05, |
|
"loss": 0.1865, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.518796992481203, |
|
"grad_norm": 3.6021695137023926, |
|
"learning_rate": 3.7468671679198e-05, |
|
"loss": 0.194, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.7873254564983885, |
|
"grad_norm": 4.42069149017334, |
|
"learning_rate": 3.702112423916935e-05, |
|
"loss": 0.2065, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.6083967685699463, |
|
"eval_rouge2_fmeasure": 0.3468, |
|
"eval_rouge2_precision": 0.3468, |
|
"eval_rouge2_recall": 0.3468, |
|
"eval_runtime": 108.8525, |
|
"eval_samples_per_second": 43.977, |
|
"eval_steps_per_second": 2.756, |
|
"step": 14896 |
|
}, |
|
{ |
|
"epoch": 8.055853920515574, |
|
"grad_norm": 3.3351027965545654, |
|
"learning_rate": 3.6573576799140714e-05, |
|
"loss": 0.1976, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 8.324382384532761, |
|
"grad_norm": 4.0378007888793945, |
|
"learning_rate": 3.6126029359112065e-05, |
|
"loss": 0.1395, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 8.592910848549947, |
|
"grad_norm": 4.384516716003418, |
|
"learning_rate": 3.567848191908342e-05, |
|
"loss": 0.1485, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.861439312567132, |
|
"grad_norm": 3.7494637966156006, |
|
"learning_rate": 3.5230934479054786e-05, |
|
"loss": 0.1555, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 2.697331428527832, |
|
"eval_rouge2_fmeasure": 0.345, |
|
"eval_rouge2_precision": 0.345, |
|
"eval_rouge2_recall": 0.345, |
|
"eval_runtime": 108.5316, |
|
"eval_samples_per_second": 44.107, |
|
"eval_steps_per_second": 2.764, |
|
"step": 16758 |
|
}, |
|
{ |
|
"epoch": 9.129967776584317, |
|
"grad_norm": 2.9512407779693604, |
|
"learning_rate": 3.4783387039026136e-05, |
|
"loss": 0.1341, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 9.398496240601503, |
|
"grad_norm": 2.6564815044403076, |
|
"learning_rate": 3.433583959899749e-05, |
|
"loss": 0.1092, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 9.66702470461869, |
|
"grad_norm": 2.2893385887145996, |
|
"learning_rate": 3.388829215896886e-05, |
|
"loss": 0.1169, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 9.935553168635876, |
|
"grad_norm": 4.070418834686279, |
|
"learning_rate": 3.344074471894021e-05, |
|
"loss": 0.1207, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.7456772327423096, |
|
"eval_rouge2_fmeasure": 0.3462, |
|
"eval_rouge2_precision": 0.3462, |
|
"eval_rouge2_recall": 0.3462, |
|
"eval_runtime": 108.495, |
|
"eval_samples_per_second": 44.122, |
|
"eval_steps_per_second": 2.765, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 10.204081632653061, |
|
"grad_norm": 2.9593915939331055, |
|
"learning_rate": 3.2993197278911564e-05, |
|
"loss": 0.0937, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 10.472610096670246, |
|
"grad_norm": 3.2143216133117676, |
|
"learning_rate": 3.254564983888292e-05, |
|
"loss": 0.0905, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 10.741138560687434, |
|
"grad_norm": 3.0030126571655273, |
|
"learning_rate": 3.209810239885428e-05, |
|
"loss": 0.0942, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 2.8160953521728516, |
|
"eval_rouge2_fmeasure": 0.3516, |
|
"eval_rouge2_precision": 0.3516, |
|
"eval_rouge2_recall": 0.3516, |
|
"eval_runtime": 108.5954, |
|
"eval_samples_per_second": 44.081, |
|
"eval_steps_per_second": 2.763, |
|
"step": 20482 |
|
}, |
|
{ |
|
"epoch": 11.009667024704619, |
|
"grad_norm": 2.6557607650756836, |
|
"learning_rate": 3.1650554958825635e-05, |
|
"loss": 0.0966, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 11.278195488721805, |
|
"grad_norm": 2.9481875896453857, |
|
"learning_rate": 3.120300751879699e-05, |
|
"loss": 0.0701, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 11.54672395273899, |
|
"grad_norm": 2.3334264755249023, |
|
"learning_rate": 3.075546007876835e-05, |
|
"loss": 0.0733, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 11.815252416756175, |
|
"grad_norm": 2.348362684249878, |
|
"learning_rate": 3.030791263873971e-05, |
|
"loss": 0.0793, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 2.861933708190918, |
|
"eval_rouge2_fmeasure": 0.351, |
|
"eval_rouge2_precision": 0.351, |
|
"eval_rouge2_recall": 0.351, |
|
"eval_runtime": 109.2797, |
|
"eval_samples_per_second": 43.805, |
|
"eval_steps_per_second": 2.745, |
|
"step": 22344 |
|
}, |
|
{ |
|
"epoch": 12.083780880773363, |
|
"grad_norm": 3.213848352432251, |
|
"learning_rate": 2.9860365198711067e-05, |
|
"loss": 0.0714, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 12.352309344790548, |
|
"grad_norm": 2.3654093742370605, |
|
"learning_rate": 2.941281775868242e-05, |
|
"loss": 0.0585, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 12.620837808807734, |
|
"grad_norm": 3.389200448989868, |
|
"learning_rate": 2.896527031865378e-05, |
|
"loss": 0.0616, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 12.889366272824919, |
|
"grad_norm": 2.646812915802002, |
|
"learning_rate": 2.8517722878625135e-05, |
|
"loss": 0.0651, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 2.9142892360687256, |
|
"eval_rouge2_fmeasure": 0.3507, |
|
"eval_rouge2_precision": 0.3507, |
|
"eval_rouge2_recall": 0.3507, |
|
"eval_runtime": 109.4829, |
|
"eval_samples_per_second": 43.724, |
|
"eval_steps_per_second": 2.74, |
|
"step": 24206 |
|
}, |
|
{ |
|
"epoch": 13.157894736842104, |
|
"grad_norm": 1.8852452039718628, |
|
"learning_rate": 2.8070175438596492e-05, |
|
"loss": 0.0552, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 13.426423200859292, |
|
"grad_norm": 3.123187780380249, |
|
"learning_rate": 2.7622627998567852e-05, |
|
"loss": 0.0503, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 13.694951664876477, |
|
"grad_norm": 3.341310739517212, |
|
"learning_rate": 2.7175080558539206e-05, |
|
"loss": 0.0527, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 13.963480128893663, |
|
"grad_norm": 1.7806404829025269, |
|
"learning_rate": 2.6727533118510563e-05, |
|
"loss": 0.0532, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 2.962496042251587, |
|
"eval_rouge2_fmeasure": 0.3549, |
|
"eval_rouge2_precision": 0.3549, |
|
"eval_rouge2_recall": 0.3549, |
|
"eval_runtime": 108.8902, |
|
"eval_samples_per_second": 43.962, |
|
"eval_steps_per_second": 2.755, |
|
"step": 26068 |
|
}, |
|
{ |
|
"epoch": 14.232008592910848, |
|
"grad_norm": 1.865723967552185, |
|
"learning_rate": 2.6279985678481924e-05, |
|
"loss": 0.044, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 14.500537056928035, |
|
"grad_norm": 2.5559322834014893, |
|
"learning_rate": 2.5832438238453277e-05, |
|
"loss": 0.0422, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 14.76906552094522, |
|
"grad_norm": 2.9102725982666016, |
|
"learning_rate": 2.538489079842463e-05, |
|
"loss": 0.0428, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 2.9846532344818115, |
|
"eval_rouge2_fmeasure": 0.351, |
|
"eval_rouge2_precision": 0.351, |
|
"eval_rouge2_recall": 0.351, |
|
"eval_runtime": 108.8716, |
|
"eval_samples_per_second": 43.969, |
|
"eval_steps_per_second": 2.756, |
|
"step": 27930 |
|
}, |
|
{ |
|
"epoch": 15.037593984962406, |
|
"grad_norm": 1.985103726387024, |
|
"learning_rate": 2.493734335839599e-05, |
|
"loss": 0.0461, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 15.306122448979592, |
|
"grad_norm": 2.463585138320923, |
|
"learning_rate": 2.448979591836735e-05, |
|
"loss": 0.0366, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 15.574650912996777, |
|
"grad_norm": 1.7706794738769531, |
|
"learning_rate": 2.4042248478338705e-05, |
|
"loss": 0.0357, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 15.843179377013964, |
|
"grad_norm": 1.8243809938430786, |
|
"learning_rate": 2.3594701038310063e-05, |
|
"loss": 0.0361, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 3.0359654426574707, |
|
"eval_rouge2_fmeasure": 0.3542, |
|
"eval_rouge2_precision": 0.3542, |
|
"eval_rouge2_recall": 0.3542, |
|
"eval_runtime": 108.0792, |
|
"eval_samples_per_second": 44.292, |
|
"eval_steps_per_second": 2.776, |
|
"step": 29792 |
|
}, |
|
{ |
|
"epoch": 16.111707841031148, |
|
"grad_norm": 1.5588297843933105, |
|
"learning_rate": 2.314715359828142e-05, |
|
"loss": 0.0335, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 16.380236305048335, |
|
"grad_norm": 2.016108274459839, |
|
"learning_rate": 2.2699606158252777e-05, |
|
"loss": 0.0294, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 16.648764769065522, |
|
"grad_norm": 2.35304594039917, |
|
"learning_rate": 2.2252058718224134e-05, |
|
"loss": 0.0317, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 16.917293233082706, |
|
"grad_norm": 1.6884231567382812, |
|
"learning_rate": 2.1804511278195487e-05, |
|
"loss": 0.0311, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 3.0791501998901367, |
|
"eval_rouge2_fmeasure": 0.3557, |
|
"eval_rouge2_precision": 0.3557, |
|
"eval_rouge2_recall": 0.3557, |
|
"eval_runtime": 108.4852, |
|
"eval_samples_per_second": 44.126, |
|
"eval_steps_per_second": 2.765, |
|
"step": 31654 |
|
}, |
|
{ |
|
"epoch": 17.185821697099893, |
|
"grad_norm": 2.347106695175171, |
|
"learning_rate": 2.1356963838166848e-05, |
|
"loss": 0.026, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 17.454350161117077, |
|
"grad_norm": 3.1584882736206055, |
|
"learning_rate": 2.0909416398138205e-05, |
|
"loss": 0.0265, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 17.722878625134264, |
|
"grad_norm": 1.6716469526290894, |
|
"learning_rate": 2.0461868958109562e-05, |
|
"loss": 0.0251, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 17.99140708915145, |
|
"grad_norm": 1.3004461526870728, |
|
"learning_rate": 2.0014321518080916e-05, |
|
"loss": 0.0255, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 3.0882503986358643, |
|
"eval_rouge2_fmeasure": 0.353, |
|
"eval_rouge2_precision": 0.353, |
|
"eval_rouge2_recall": 0.353, |
|
"eval_runtime": 108.6282, |
|
"eval_samples_per_second": 44.068, |
|
"eval_steps_per_second": 2.762, |
|
"step": 33516 |
|
}, |
|
{ |
|
"epoch": 18.259935553168635, |
|
"grad_norm": 0.8307426571846008, |
|
"learning_rate": 1.9566774078052276e-05, |
|
"loss": 0.0218, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 18.528464017185822, |
|
"grad_norm": 1.7560513019561768, |
|
"learning_rate": 1.9119226638023633e-05, |
|
"loss": 0.0209, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 18.796992481203006, |
|
"grad_norm": 1.8823086023330688, |
|
"learning_rate": 1.8671679197994987e-05, |
|
"loss": 0.0217, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 3.1189284324645996, |
|
"eval_rouge2_fmeasure": 0.3525, |
|
"eval_rouge2_precision": 0.3525, |
|
"eval_rouge2_recall": 0.3525, |
|
"eval_runtime": 108.8718, |
|
"eval_samples_per_second": 43.969, |
|
"eval_steps_per_second": 2.756, |
|
"step": 35378 |
|
}, |
|
{ |
|
"epoch": 19.065520945220193, |
|
"grad_norm": 2.705395460128784, |
|
"learning_rate": 1.8224131757966344e-05, |
|
"loss": 0.0211, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 19.33404940923738, |
|
"grad_norm": 0.777070164680481, |
|
"learning_rate": 1.7776584317937704e-05, |
|
"loss": 0.0171, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 19.602577873254564, |
|
"grad_norm": 0.8922987580299377, |
|
"learning_rate": 1.7329036877909058e-05, |
|
"loss": 0.0194, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 19.87110633727175, |
|
"grad_norm": 1.2800770998001099, |
|
"learning_rate": 1.6881489437880415e-05, |
|
"loss": 0.0182, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 3.1534199714660645, |
|
"eval_rouge2_fmeasure": 0.3553, |
|
"eval_rouge2_precision": 0.3553, |
|
"eval_rouge2_recall": 0.3553, |
|
"eval_runtime": 108.9674, |
|
"eval_samples_per_second": 43.931, |
|
"eval_steps_per_second": 2.753, |
|
"step": 37240 |
|
}, |
|
{ |
|
"epoch": 20.13963480128894, |
|
"grad_norm": 0.7617535591125488, |
|
"learning_rate": 1.6433941997851772e-05, |
|
"loss": 0.0159, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 20.408163265306122, |
|
"grad_norm": 1.3193833827972412, |
|
"learning_rate": 1.5986394557823133e-05, |
|
"loss": 0.0141, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 20.67669172932331, |
|
"grad_norm": 0.7571066617965698, |
|
"learning_rate": 1.5538847117794486e-05, |
|
"loss": 0.0151, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 20.945220193340493, |
|
"grad_norm": 0.3401932418346405, |
|
"learning_rate": 1.5091299677765843e-05, |
|
"loss": 0.0155, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 3.1899983882904053, |
|
"eval_rouge2_fmeasure": 0.3573, |
|
"eval_rouge2_precision": 0.3573, |
|
"eval_rouge2_recall": 0.3573, |
|
"eval_runtime": 108.6672, |
|
"eval_samples_per_second": 44.052, |
|
"eval_steps_per_second": 2.761, |
|
"step": 39102 |
|
}, |
|
{ |
|
"epoch": 21.21374865735768, |
|
"grad_norm": 0.29092851281166077, |
|
"learning_rate": 1.4643752237737202e-05, |
|
"loss": 0.0122, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 21.482277121374867, |
|
"grad_norm": 1.4362056255340576, |
|
"learning_rate": 1.4196204797708556e-05, |
|
"loss": 0.0122, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 21.75080558539205, |
|
"grad_norm": 6.062518119812012, |
|
"learning_rate": 1.3748657357679915e-05, |
|
"loss": 0.0119, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 3.1969234943389893, |
|
"eval_rouge2_fmeasure": 0.3561, |
|
"eval_rouge2_precision": 0.3561, |
|
"eval_rouge2_recall": 0.3561, |
|
"eval_runtime": 108.579, |
|
"eval_samples_per_second": 44.088, |
|
"eval_steps_per_second": 2.763, |
|
"step": 40964 |
|
}, |
|
{ |
|
"epoch": 22.019334049409238, |
|
"grad_norm": 2.686310052871704, |
|
"learning_rate": 1.3301109917651272e-05, |
|
"loss": 0.0126, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 22.287862513426422, |
|
"grad_norm": 1.607986569404602, |
|
"learning_rate": 1.285356247762263e-05, |
|
"loss": 0.0104, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 22.55639097744361, |
|
"grad_norm": 0.5264362692832947, |
|
"learning_rate": 1.2406015037593984e-05, |
|
"loss": 0.0099, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 22.824919441460796, |
|
"grad_norm": 0.3918701112270355, |
|
"learning_rate": 1.1958467597565343e-05, |
|
"loss": 0.0103, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 3.2080607414245605, |
|
"eval_rouge2_fmeasure": 0.3579, |
|
"eval_rouge2_precision": 0.3579, |
|
"eval_rouge2_recall": 0.3579, |
|
"eval_runtime": 108.8035, |
|
"eval_samples_per_second": 43.997, |
|
"eval_steps_per_second": 2.757, |
|
"step": 42826 |
|
}, |
|
{ |
|
"epoch": 23.09344790547798, |
|
"grad_norm": 0.4341621696949005, |
|
"learning_rate": 1.1510920157536698e-05, |
|
"loss": 0.0094, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 23.361976369495167, |
|
"grad_norm": 1.674943208694458, |
|
"learning_rate": 1.1063372717508057e-05, |
|
"loss": 0.008, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 23.63050483351235, |
|
"grad_norm": 0.439281165599823, |
|
"learning_rate": 1.0615825277479412e-05, |
|
"loss": 0.0087, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 23.899033297529538, |
|
"grad_norm": 0.629629909992218, |
|
"learning_rate": 1.0168277837450771e-05, |
|
"loss": 0.0085, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 3.2334721088409424, |
|
"eval_rouge2_fmeasure": 0.358, |
|
"eval_rouge2_precision": 0.358, |
|
"eval_rouge2_recall": 0.358, |
|
"eval_runtime": 108.5696, |
|
"eval_samples_per_second": 44.092, |
|
"eval_steps_per_second": 2.763, |
|
"step": 44688 |
|
}, |
|
{ |
|
"epoch": 24.167561761546725, |
|
"grad_norm": 0.44458189606666565, |
|
"learning_rate": 9.720730397422126e-06, |
|
"loss": 0.0073, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 24.43609022556391, |
|
"grad_norm": 0.6769450902938843, |
|
"learning_rate": 9.273182957393484e-06, |
|
"loss": 0.0066, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 24.704618689581096, |
|
"grad_norm": 0.3089818060398102, |
|
"learning_rate": 8.82563551736484e-06, |
|
"loss": 0.0065, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 24.97314715359828, |
|
"grad_norm": 0.40338027477264404, |
|
"learning_rate": 8.378088077336198e-06, |
|
"loss": 0.0067, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 3.242875576019287, |
|
"eval_rouge2_fmeasure": 0.3599, |
|
"eval_rouge2_precision": 0.3599, |
|
"eval_rouge2_recall": 0.3599, |
|
"eval_runtime": 109.3379, |
|
"eval_samples_per_second": 43.782, |
|
"eval_steps_per_second": 2.744, |
|
"step": 46550 |
|
}, |
|
{ |
|
"epoch": 25.241675617615467, |
|
"grad_norm": 0.41913506388664246, |
|
"learning_rate": 7.930540637307555e-06, |
|
"loss": 0.0057, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 25.510204081632654, |
|
"grad_norm": 0.20852769911289215, |
|
"learning_rate": 7.482993197278912e-06, |
|
"loss": 0.0054, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 25.778732545649838, |
|
"grad_norm": 0.31291133165359497, |
|
"learning_rate": 7.035445757250268e-06, |
|
"loss": 0.0053, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 3.256943464279175, |
|
"eval_rouge2_fmeasure": 0.3586, |
|
"eval_rouge2_precision": 0.3586, |
|
"eval_rouge2_recall": 0.3586, |
|
"eval_runtime": 109.1749, |
|
"eval_samples_per_second": 43.847, |
|
"eval_steps_per_second": 2.748, |
|
"step": 48412 |
|
}, |
|
{ |
|
"epoch": 26.047261009667025, |
|
"grad_norm": 0.19362983107566833, |
|
"learning_rate": 6.587898317221626e-06, |
|
"loss": 0.0052, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 26.31578947368421, |
|
"grad_norm": 0.7348465323448181, |
|
"learning_rate": 6.140350877192982e-06, |
|
"loss": 0.0044, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 26.584317937701396, |
|
"grad_norm": 0.08919904381036758, |
|
"learning_rate": 5.692803437164339e-06, |
|
"loss": 0.0048, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 26.852846401718583, |
|
"grad_norm": 0.22906944155693054, |
|
"learning_rate": 5.245255997135696e-06, |
|
"loss": 0.0043, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 3.264575719833374, |
|
"eval_rouge2_fmeasure": 0.3604, |
|
"eval_rouge2_precision": 0.3604, |
|
"eval_rouge2_recall": 0.3604, |
|
"eval_runtime": 108.6063, |
|
"eval_samples_per_second": 44.077, |
|
"eval_steps_per_second": 2.762, |
|
"step": 50274 |
|
}, |
|
{ |
|
"epoch": 27.121374865735767, |
|
"grad_norm": 0.2078796625137329, |
|
"learning_rate": 4.797708557107053e-06, |
|
"loss": 0.0037, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 27.389903329752954, |
|
"grad_norm": 0.6463788151741028, |
|
"learning_rate": 4.3501611170784104e-06, |
|
"loss": 0.0034, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 27.65843179377014, |
|
"grad_norm": 0.19699470698833466, |
|
"learning_rate": 3.9026136770497675e-06, |
|
"loss": 0.0032, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 27.926960257787325, |
|
"grad_norm": 0.08220793306827545, |
|
"learning_rate": 3.455066237021124e-06, |
|
"loss": 0.0032, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 3.2737011909484863, |
|
"eval_rouge2_fmeasure": 0.3623, |
|
"eval_rouge2_precision": 0.3623, |
|
"eval_rouge2_recall": 0.3623, |
|
"eval_runtime": 108.5841, |
|
"eval_samples_per_second": 44.086, |
|
"eval_steps_per_second": 2.763, |
|
"step": 52136 |
|
}, |
|
{ |
|
"epoch": 28.195488721804512, |
|
"grad_norm": 0.09868729114532471, |
|
"learning_rate": 3.007518796992481e-06, |
|
"loss": 0.0026, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 28.464017185821696, |
|
"grad_norm": 0.12712427973747253, |
|
"learning_rate": 2.5599713569638383e-06, |
|
"loss": 0.0025, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 28.732545649838883, |
|
"grad_norm": 0.12487781047821045, |
|
"learning_rate": 2.1124239169351953e-06, |
|
"loss": 0.0027, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 3.2774112224578857, |
|
"eval_rouge2_fmeasure": 0.3629, |
|
"eval_rouge2_precision": 0.3629, |
|
"eval_rouge2_recall": 0.3629, |
|
"eval_runtime": 108.3776, |
|
"eval_samples_per_second": 44.17, |
|
"eval_steps_per_second": 2.768, |
|
"step": 53998 |
|
}, |
|
{ |
|
"epoch": 29.00107411385607, |
|
"grad_norm": 0.15216058492660522, |
|
"learning_rate": 1.664876476906552e-06, |
|
"loss": 0.0028, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 29.269602577873254, |
|
"grad_norm": 0.1937492936849594, |
|
"learning_rate": 1.217329036877909e-06, |
|
"loss": 0.0025, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 29.53813104189044, |
|
"grad_norm": 0.08752380311489105, |
|
"learning_rate": 7.697815968492661e-07, |
|
"loss": 0.002, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 29.806659505907625, |
|
"grad_norm": 0.07473881542682648, |
|
"learning_rate": 3.22234156820623e-07, |
|
"loss": 0.002, |
|
"step": 55500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 55860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.879896028708864e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|