|
{ |
|
"best_metric": 0.41785159707069397, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_gulf/checkpoint-3344", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 7524, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0378659963607788, |
|
"learning_rate": 4.896424167694204e-05, |
|
"loss": 1.8893, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.3817836670274851, |
|
"eval_loss": 0.4814806580543518, |
|
"eval_rouge1": 0.53445667160678, |
|
"eval_rouge2": 0.2556577725762693, |
|
"eval_rougeL": 0.5334625759517182, |
|
"eval_runtime": 13.5021, |
|
"eval_samples_per_second": 123.833, |
|
"eval_steps_per_second": 15.479, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1964277029037476, |
|
"learning_rate": 4.638717632552405e-05, |
|
"loss": 0.4211, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.39468460167151326, |
|
"eval_loss": 0.4300891160964966, |
|
"eval_rouge1": 0.5886104983083813, |
|
"eval_rouge2": 0.30839577052783973, |
|
"eval_rougeL": 0.5877200747673375, |
|
"eval_runtime": 5.8159, |
|
"eval_samples_per_second": 287.485, |
|
"eval_steps_per_second": 35.936, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.0570303201675415, |
|
"learning_rate": 4.3810110974106046e-05, |
|
"loss": 0.3307, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.40983966101019575, |
|
"eval_loss": 0.4181276261806488, |
|
"eval_rouge1": 0.6130185046348249, |
|
"eval_rouge2": 0.33816617915935476, |
|
"eval_rougeL": 0.6119109322410228, |
|
"eval_runtime": 18.13, |
|
"eval_samples_per_second": 92.223, |
|
"eval_steps_per_second": 11.528, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.0588442087173462, |
|
"learning_rate": 4.1233045622688044e-05, |
|
"loss": 0.2641, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.4093436818765597, |
|
"eval_loss": 0.41785159707069397, |
|
"eval_rouge1": 0.6308594985430711, |
|
"eval_rouge2": 0.35871585587023913, |
|
"eval_rougeL": 0.6297639876498756, |
|
"eval_runtime": 5.6414, |
|
"eval_samples_per_second": 296.382, |
|
"eval_steps_per_second": 37.048, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.3207675218582153, |
|
"learning_rate": 3.8655980271270036e-05, |
|
"loss": 0.2135, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.41246868412584214, |
|
"eval_loss": 0.4242132008075714, |
|
"eval_rouge1": 0.6373073919820111, |
|
"eval_rouge2": 0.36914621811033116, |
|
"eval_rougeL": 0.6359208794207342, |
|
"eval_runtime": 5.3736, |
|
"eval_samples_per_second": 311.149, |
|
"eval_steps_per_second": 38.894, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1845903396606445, |
|
"learning_rate": 3.6078914919852034e-05, |
|
"loss": 0.1765, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.41560183658228855, |
|
"eval_loss": 0.43002957105636597, |
|
"eval_rouge1": 0.6465581796614536, |
|
"eval_rouge2": 0.382979021316462, |
|
"eval_rougeL": 0.6453232583485639, |
|
"eval_runtime": 5.4874, |
|
"eval_samples_per_second": 304.7, |
|
"eval_steps_per_second": 38.088, |
|
"step": 5016 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.9745954871177673, |
|
"learning_rate": 3.350184956843403e-05, |
|
"loss": 0.1492, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.41879193694803385, |
|
"eval_loss": 0.4348294138908386, |
|
"eval_rouge1": 0.6460454718776507, |
|
"eval_rouge2": 0.3852436451416871, |
|
"eval_rougeL": 0.6448992585011752, |
|
"eval_runtime": 12.7816, |
|
"eval_samples_per_second": 130.813, |
|
"eval_steps_per_second": 16.352, |
|
"step": 5852 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.8118007779121399, |
|
"learning_rate": 3.092478421701603e-05, |
|
"loss": 0.1293, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.4190463110305805, |
|
"eval_loss": 0.44312888383865356, |
|
"eval_rouge1": 0.6492735112653347, |
|
"eval_rouge2": 0.389473573727392, |
|
"eval_rougeL": 0.6482287927411368, |
|
"eval_runtime": 5.4157, |
|
"eval_samples_per_second": 308.729, |
|
"eval_steps_per_second": 38.591, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.9986652135848999, |
|
"learning_rate": 2.8347718865598028e-05, |
|
"loss": 0.1155, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.4202341801445125, |
|
"eval_loss": 0.449709415435791, |
|
"eval_rouge1": 0.6498383797626817, |
|
"eval_rouge2": 0.39052732019696323, |
|
"eval_rougeL": 0.6488280555236853, |
|
"eval_runtime": 5.4735, |
|
"eval_samples_per_second": 305.473, |
|
"eval_steps_per_second": 38.184, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 7524, |
|
"total_flos": 3930158776320000.0, |
|
"train_loss": 0.40989596332913064, |
|
"train_runtime": 1923.2663, |
|
"train_samples_per_second": 69.517, |
|
"train_steps_per_second": 8.694 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3930158776320000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|