|
{ |
|
"best_metric": 0.6245766282081604, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_dj/checkpoint-8037", |
|
"epoch": 8.0, |
|
"eval_steps": 500, |
|
"global_step": 21432, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1291303634643555, |
|
"learning_rate": 4.7947437829691034e-05, |
|
"loss": 1.2336, |
|
"step": 2679 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.3526371485696729, |
|
"eval_loss": 0.7061845660209656, |
|
"eval_rouge1": 0.5198447201202445, |
|
"eval_rouge2": 0.25470674188424197, |
|
"eval_rougeL": 0.516986428368343, |
|
"eval_runtime": 17.6993, |
|
"eval_samples_per_second": 303.967, |
|
"eval_steps_per_second": 38.024, |
|
"step": 2679 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.4575163125991821, |
|
"learning_rate": 4.542388847023361e-05, |
|
"loss": 0.634, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.37564617349393215, |
|
"eval_loss": 0.6422649025917053, |
|
"eval_rouge1": 0.5739420026060218, |
|
"eval_rouge2": 0.3114152854574803, |
|
"eval_rougeL": 0.571389088656927, |
|
"eval_runtime": 17.5523, |
|
"eval_samples_per_second": 306.513, |
|
"eval_steps_per_second": 38.343, |
|
"step": 5358 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.237191915512085, |
|
"learning_rate": 4.290033911077619e-05, |
|
"loss": 0.5299, |
|
"step": 8037 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.3877191285680082, |
|
"eval_loss": 0.6245766282081604, |
|
"eval_rouge1": 0.5957940125562868, |
|
"eval_rouge2": 0.3370143004573494, |
|
"eval_rougeL": 0.5934967085426222, |
|
"eval_runtime": 17.5587, |
|
"eval_samples_per_second": 306.401, |
|
"eval_steps_per_second": 38.329, |
|
"step": 8037 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.5628466606140137, |
|
"learning_rate": 4.0376789751318766e-05, |
|
"loss": 0.4492, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.39048560865149107, |
|
"eval_loss": 0.6246171593666077, |
|
"eval_rouge1": 0.6081397458304423, |
|
"eval_rouge2": 0.3525706786064172, |
|
"eval_rougeL": 0.6056655214414464, |
|
"eval_runtime": 17.6128, |
|
"eval_samples_per_second": 305.459, |
|
"eval_steps_per_second": 38.211, |
|
"step": 10716 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.8470176458358765, |
|
"learning_rate": 3.785324039186134e-05, |
|
"loss": 0.3829, |
|
"step": 13395 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.3963490407851369, |
|
"eval_loss": 0.6300457715988159, |
|
"eval_rouge1": 0.6145424292978614, |
|
"eval_rouge2": 0.3620910031723723, |
|
"eval_rougeL": 0.6124537005851034, |
|
"eval_runtime": 17.4259, |
|
"eval_samples_per_second": 308.735, |
|
"eval_steps_per_second": 38.621, |
|
"step": 13395 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.5765687227249146, |
|
"learning_rate": 3.532969103240392e-05, |
|
"loss": 0.328, |
|
"step": 16074 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.3960700684284105, |
|
"eval_loss": 0.6383510231971741, |
|
"eval_rouge1": 0.6213431858539703, |
|
"eval_rouge2": 0.36997070576967905, |
|
"eval_rougeL": 0.6194073222098655, |
|
"eval_runtime": 17.7673, |
|
"eval_samples_per_second": 302.803, |
|
"eval_steps_per_second": 37.879, |
|
"step": 16074 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.960555076599121, |
|
"learning_rate": 3.28061416729465e-05, |
|
"loss": 0.2832, |
|
"step": 18753 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.3998604582987598, |
|
"eval_loss": 0.6490957736968994, |
|
"eval_rouge1": 0.623202150680765, |
|
"eval_rouge2": 0.3741196322885935, |
|
"eval_rougeL": 0.6209434708040459, |
|
"eval_runtime": 17.3486, |
|
"eval_samples_per_second": 310.112, |
|
"eval_steps_per_second": 38.793, |
|
"step": 18753 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.4162492752075195, |
|
"learning_rate": 3.028259231348907e-05, |
|
"loss": 0.2453, |
|
"step": 21432 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.3967586062831279, |
|
"eval_loss": 0.660749614238739, |
|
"eval_rouge1": 0.6231513583077875, |
|
"eval_rouge2": 0.37461482860421447, |
|
"eval_rougeL": 0.6211860899632824, |
|
"eval_runtime": 17.8295, |
|
"eval_samples_per_second": 301.747, |
|
"eval_steps_per_second": 37.746, |
|
"step": 21432 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 21432, |
|
"total_flos": 1.1198453907456e+16, |
|
"train_loss": 0.5107668242644979, |
|
"train_runtime": 1728.8387, |
|
"train_samples_per_second": 247.901, |
|
"train_steps_per_second": 30.992 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 53580, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1198453907456e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|