|
{ |
|
"best_metric": 1.1031256914138794, |
|
"best_model_checkpoint": "arabic-embedding-model-pair-class2/checkpoint-781", |
|
"epoch": 0.999360204734485, |
|
"eval_steps": 500, |
|
"global_step": 781, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03198976327575176, |
|
"grad_norm": 4.865348815917969, |
|
"learning_rate": 1.2787723785166242e-07, |
|
"loss": 1.172, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06397952655150352, |
|
"grad_norm": 4.501841068267822, |
|
"learning_rate": 2.5575447570332484e-07, |
|
"loss": 1.1839, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 4.760345458984375, |
|
"learning_rate": 3.836317135549872e-07, |
|
"loss": 1.1595, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12795905310300704, |
|
"grad_norm": 4.2426605224609375, |
|
"learning_rate": 5.115089514066497e-07, |
|
"loss": 1.1516, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1599488163787588, |
|
"grad_norm": 4.42633581161499, |
|
"learning_rate": 6.39386189258312e-07, |
|
"loss": 1.1312, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 4.73534631729126, |
|
"learning_rate": 7.672634271099744e-07, |
|
"loss": 1.1458, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22392834293026231, |
|
"grad_norm": 4.202542781829834, |
|
"learning_rate": 8.951406649616368e-07, |
|
"loss": 1.1202, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2559181062060141, |
|
"grad_norm": 4.2955827713012695, |
|
"learning_rate": 1.0230179028132994e-06, |
|
"loss": 1.1113, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 4.325717926025391, |
|
"learning_rate": 1.1508951406649615e-06, |
|
"loss": 1.0973, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3198976327575176, |
|
"grad_norm": 4.795027256011963, |
|
"learning_rate": 1.278772378516624e-06, |
|
"loss": 1.1004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.35188739603326935, |
|
"grad_norm": 4.470003128051758, |
|
"learning_rate": 1.4066496163682863e-06, |
|
"loss": 1.0892, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 4.943461894989014, |
|
"learning_rate": 1.5345268542199487e-06, |
|
"loss": 1.0708, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.41586692258477287, |
|
"grad_norm": 4.593530654907227, |
|
"learning_rate": 1.6624040920716111e-06, |
|
"loss": 1.0937, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.44785668586052463, |
|
"grad_norm": 4.122324466705322, |
|
"learning_rate": 1.7902813299232735e-06, |
|
"loss": 1.0698, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 4.206705093383789, |
|
"learning_rate": 1.918158567774936e-06, |
|
"loss": 1.0893, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5118362124120281, |
|
"grad_norm": 5.153020858764648, |
|
"learning_rate": 1.9948776323278313e-06, |
|
"loss": 1.0597, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5438259756877799, |
|
"grad_norm": 4.710206985473633, |
|
"learning_rate": 1.9806488332384747e-06, |
|
"loss": 1.0638, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 4.139469146728516, |
|
"learning_rate": 1.9664200341491176e-06, |
|
"loss": 1.0524, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6078055022392834, |
|
"grad_norm": 4.873435974121094, |
|
"learning_rate": 1.952191235059761e-06, |
|
"loss": 1.0673, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6397952655150352, |
|
"grad_norm": 4.9657063484191895, |
|
"learning_rate": 1.9379624359704043e-06, |
|
"loss": 1.0619, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 4.697886943817139, |
|
"learning_rate": 1.923733636881047e-06, |
|
"loss": 1.0254, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7037747920665387, |
|
"grad_norm": 5.463695526123047, |
|
"learning_rate": 1.90950483779169e-06, |
|
"loss": 1.0423, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7357645553422905, |
|
"grad_norm": 4.888927459716797, |
|
"learning_rate": 1.8952760387023335e-06, |
|
"loss": 1.0175, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 5.26019287109375, |
|
"learning_rate": 1.8810472396129766e-06, |
|
"loss": 1.0365, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.799744081893794, |
|
"grad_norm": 4.753886699676514, |
|
"learning_rate": 1.8668184405236197e-06, |
|
"loss": 1.0412, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.8317338451695457, |
|
"grad_norm": 4.420036315917969, |
|
"learning_rate": 1.8525896414342628e-06, |
|
"loss": 1.0411, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 5.005071640014648, |
|
"learning_rate": 1.838360842344906e-06, |
|
"loss": 1.0287, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.8957133717210493, |
|
"grad_norm": 5.211964130401611, |
|
"learning_rate": 1.8241320432555491e-06, |
|
"loss": 1.0318, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.927703134996801, |
|
"grad_norm": 5.416666030883789, |
|
"learning_rate": 1.8099032441661922e-06, |
|
"loss": 1.0486, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 4.680449962615967, |
|
"learning_rate": 1.7956744450768356e-06, |
|
"loss": 1.0237, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.9916826615483045, |
|
"grad_norm": 4.465170860290527, |
|
"learning_rate": 1.7814456459874785e-06, |
|
"loss": 1.0199, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.999360204734485, |
|
"eval_loss": 1.1031256914138794, |
|
"eval_runtime": 10.1366, |
|
"eval_samples_per_second": 493.263, |
|
"eval_steps_per_second": 15.488, |
|
"step": 781 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 3905, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|