|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.436636818940658, |
|
"eval_steps": 750, |
|
"global_step": 98000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15176809834572771, |
|
"grad_norm": 1.347341537475586, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 2.0649, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.30353619669145543, |
|
"grad_norm": 1.299472689628601, |
|
"learning_rate": 1.984469638142569e-05, |
|
"loss": 1.7903, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45530429503718317, |
|
"grad_norm": 1.5234107971191406, |
|
"learning_rate": 1.9534089144277063e-05, |
|
"loss": 1.6734, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6070723933829109, |
|
"grad_norm": 6.629986763000488, |
|
"learning_rate": 1.922348190712844e-05, |
|
"loss": 1.6014, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.7588404917286387, |
|
"grad_norm": 1.8827601671218872, |
|
"learning_rate": 1.8912874669979814e-05, |
|
"loss": 1.5582, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.9106085900743663, |
|
"grad_norm": 1.4713941812515259, |
|
"learning_rate": 1.8602267432831186e-05, |
|
"loss": 1.5393, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.4226312637329102, |
|
"eval_rouge1": 15.9571, |
|
"eval_rouge2": 5.6803, |
|
"eval_rougeL": 13.7071, |
|
"eval_rougeLsum": 13.8127, |
|
"eval_runtime": 167.9928, |
|
"eval_samples_per_second": 29.763, |
|
"eval_steps_per_second": 3.72, |
|
"step": 13178 |
|
}, |
|
{ |
|
"epoch": 1.062376688420094, |
|
"grad_norm": 1.397419810295105, |
|
"learning_rate": 1.829166019568256e-05, |
|
"loss": 1.5108, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.214144786765822, |
|
"grad_norm": 1.282456398010254, |
|
"learning_rate": 1.7981052958533937e-05, |
|
"loss": 1.4931, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.3659128851115496, |
|
"grad_norm": 1.5143241882324219, |
|
"learning_rate": 1.767044572138531e-05, |
|
"loss": 1.4687, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.5176809834572773, |
|
"grad_norm": 1.0350826978683472, |
|
"learning_rate": 1.7359838484236684e-05, |
|
"loss": 1.4543, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.669449081803005, |
|
"grad_norm": 1.2214738130569458, |
|
"learning_rate": 1.704923124708806e-05, |
|
"loss": 1.4466, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.8212171801487327, |
|
"grad_norm": 1.226135492324829, |
|
"learning_rate": 1.6738624009939432e-05, |
|
"loss": 1.4314, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.9729852784944604, |
|
"grad_norm": 1.2347540855407715, |
|
"learning_rate": 1.6428016772790807e-05, |
|
"loss": 1.4255, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.3427051305770874, |
|
"eval_rouge1": 16.3039, |
|
"eval_rouge2": 6.0757, |
|
"eval_rougeL": 14.0031, |
|
"eval_rougeLsum": 14.1114, |
|
"eval_runtime": 168.2423, |
|
"eval_samples_per_second": 29.719, |
|
"eval_steps_per_second": 3.715, |
|
"step": 26356 |
|
}, |
|
{ |
|
"epoch": 2.124753376840188, |
|
"grad_norm": 1.024688482284546, |
|
"learning_rate": 1.6117409535642183e-05, |
|
"loss": 1.4107, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.2765214751859157, |
|
"grad_norm": 1.1046956777572632, |
|
"learning_rate": 1.5806802298493555e-05, |
|
"loss": 1.3999, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 2.428289573531644, |
|
"grad_norm": 1.4118067026138306, |
|
"learning_rate": 1.549619506134493e-05, |
|
"loss": 1.394, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 2.5800576718773716, |
|
"grad_norm": 1.0044879913330078, |
|
"learning_rate": 1.5185587824196304e-05, |
|
"loss": 1.3894, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 2.7318257702230992, |
|
"grad_norm": 3.0287246704101562, |
|
"learning_rate": 1.4874980587047681e-05, |
|
"loss": 1.3823, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.883593868568827, |
|
"grad_norm": 0.8824607133865356, |
|
"learning_rate": 1.4564373349899055e-05, |
|
"loss": 1.3747, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.2928217649459839, |
|
"eval_rouge1": 16.2852, |
|
"eval_rouge2": 6.139, |
|
"eval_rougeL": 14.0119, |
|
"eval_rougeLsum": 14.1209, |
|
"eval_runtime": 172.2837, |
|
"eval_samples_per_second": 29.022, |
|
"eval_steps_per_second": 3.628, |
|
"step": 39534 |
|
}, |
|
{ |
|
"epoch": 3.0353619669145546, |
|
"grad_norm": 1.7139147520065308, |
|
"learning_rate": 1.4253766112750429e-05, |
|
"loss": 1.365, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 3.1871300652602823, |
|
"grad_norm": 0.9839210510253906, |
|
"learning_rate": 1.3943158875601804e-05, |
|
"loss": 1.3631, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 3.33889816360601, |
|
"grad_norm": 2.022289514541626, |
|
"learning_rate": 1.3632551638453178e-05, |
|
"loss": 1.3497, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 3.4906662619517377, |
|
"grad_norm": 4.369687080383301, |
|
"learning_rate": 1.3321944401304551e-05, |
|
"loss": 1.3536, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 3.6424343602974654, |
|
"grad_norm": 1.046391487121582, |
|
"learning_rate": 1.3011337164155927e-05, |
|
"loss": 1.3455, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 3.794202458643193, |
|
"grad_norm": 3.8603522777557373, |
|
"learning_rate": 1.27007299270073e-05, |
|
"loss": 1.3396, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 3.9459705569889207, |
|
"grad_norm": 1.02574622631073, |
|
"learning_rate": 1.2390122689858674e-05, |
|
"loss": 1.3347, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.2622406482696533, |
|
"eval_rouge1": 16.4481, |
|
"eval_rouge2": 6.2714, |
|
"eval_rougeL": 14.1706, |
|
"eval_rougeLsum": 14.2806, |
|
"eval_runtime": 167.9003, |
|
"eval_samples_per_second": 29.78, |
|
"eval_steps_per_second": 3.722, |
|
"step": 52712 |
|
}, |
|
{ |
|
"epoch": 4.097738655334648, |
|
"grad_norm": 1.366310715675354, |
|
"learning_rate": 1.2079515452710048e-05, |
|
"loss": 1.3304, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 4.249506753680376, |
|
"grad_norm": 1.1469073295593262, |
|
"learning_rate": 1.1768908215561424e-05, |
|
"loss": 1.3271, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 4.401274852026104, |
|
"grad_norm": 1.0787475109100342, |
|
"learning_rate": 1.1458300978412797e-05, |
|
"loss": 1.3199, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 4.5530429503718315, |
|
"grad_norm": 1.045688271522522, |
|
"learning_rate": 1.1147693741264171e-05, |
|
"loss": 1.3229, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 4.704811048717559, |
|
"grad_norm": 1.0128060579299927, |
|
"learning_rate": 1.0837086504115546e-05, |
|
"loss": 1.3156, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 4.856579147063288, |
|
"grad_norm": 1.1346766948699951, |
|
"learning_rate": 1.052647926696692e-05, |
|
"loss": 1.3186, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.23964262008667, |
|
"eval_rouge1": 16.5213, |
|
"eval_rouge2": 6.4307, |
|
"eval_rougeL": 14.289, |
|
"eval_rougeLsum": 14.3853, |
|
"eval_runtime": 170.4893, |
|
"eval_samples_per_second": 29.327, |
|
"eval_steps_per_second": 3.666, |
|
"step": 65890 |
|
}, |
|
{ |
|
"epoch": 5.008347245409015, |
|
"grad_norm": 1.000510334968567, |
|
"learning_rate": 1.0215872029818294e-05, |
|
"loss": 1.3074, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 5.160115343754743, |
|
"grad_norm": 1.4083774089813232, |
|
"learning_rate": 9.90526479266967e-06, |
|
"loss": 1.3079, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 5.311883442100471, |
|
"grad_norm": 1.065021276473999, |
|
"learning_rate": 9.594657555521045e-06, |
|
"loss": 1.3055, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 5.4636515404461985, |
|
"grad_norm": 0.9340164065361023, |
|
"learning_rate": 9.284050318372419e-06, |
|
"loss": 1.305, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 5.615419638791926, |
|
"grad_norm": 0.9457820653915405, |
|
"learning_rate": 8.973443081223792e-06, |
|
"loss": 1.3015, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 5.767187737137654, |
|
"grad_norm": 0.8897130489349365, |
|
"learning_rate": 8.662835844075168e-06, |
|
"loss": 1.2985, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 5.9189558354833816, |
|
"grad_norm": 1.2775472402572632, |
|
"learning_rate": 8.352228606926543e-06, |
|
"loss": 1.2973, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.2244175672531128, |
|
"eval_rouge1": 16.4244, |
|
"eval_rouge2": 6.384, |
|
"eval_rougeL": 14.2167, |
|
"eval_rougeLsum": 14.3188, |
|
"eval_runtime": 170.4105, |
|
"eval_samples_per_second": 29.341, |
|
"eval_steps_per_second": 3.668, |
|
"step": 79068 |
|
}, |
|
{ |
|
"epoch": 6.070723933829109, |
|
"grad_norm": 1.0759906768798828, |
|
"learning_rate": 8.041621369777917e-06, |
|
"loss": 1.2908, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 6.222492032174837, |
|
"grad_norm": 1.315941333770752, |
|
"learning_rate": 7.73101413262929e-06, |
|
"loss": 1.2927, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 6.374260130520565, |
|
"grad_norm": 0.936198353767395, |
|
"learning_rate": 7.420406895480665e-06, |
|
"loss": 1.2945, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 6.526028228866292, |
|
"grad_norm": 1.233934998512268, |
|
"learning_rate": 7.10979965833204e-06, |
|
"loss": 1.285, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 6.67779632721202, |
|
"grad_norm": 1.1760342121124268, |
|
"learning_rate": 6.7991924211834135e-06, |
|
"loss": 1.2877, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 6.829564425557748, |
|
"grad_norm": 2.0586724281311035, |
|
"learning_rate": 6.488585184034788e-06, |
|
"loss": 1.283, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 6.981332523903475, |
|
"grad_norm": 0.8292114734649658, |
|
"learning_rate": 6.1779779468861636e-06, |
|
"loss": 1.2817, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.21384859085083, |
|
"eval_rouge1": 16.5727, |
|
"eval_rouge2": 6.4685, |
|
"eval_rougeL": 14.3558, |
|
"eval_rougeLsum": 14.4703, |
|
"eval_runtime": 168.1885, |
|
"eval_samples_per_second": 29.729, |
|
"eval_steps_per_second": 3.716, |
|
"step": 92246 |
|
}, |
|
{ |
|
"epoch": 7.133100622249203, |
|
"grad_norm": 0.9742059111595154, |
|
"learning_rate": 5.867370709737537e-06, |
|
"loss": 1.2858, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 7.284868720594931, |
|
"grad_norm": 1.0256426334381104, |
|
"learning_rate": 5.556763472588912e-06, |
|
"loss": 1.2804, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 7.436636818940658, |
|
"grad_norm": 1.0933358669281006, |
|
"learning_rate": 5.2461562354402865e-06, |
|
"loss": 1.2772, |
|
"step": 98000 |
|
} |
|
], |
|
"logging_steps": 2000, |
|
"max_steps": 131780, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0610228799838618e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|