|
{ |
|
"best_metric": 0.06450273841619492, |
|
"best_model_checkpoint": "saves/Llama-3.1-8B-Instruct/lora/sft-400/checkpoint-200", |
|
"epoch": 9.777777777777779, |
|
"eval_steps": 50, |
|
"global_step": 220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 7.734859943389893, |
|
"learning_rate": 2.0454545454545457e-06, |
|
"loss": 1.6805, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 5.1195478439331055, |
|
"learning_rate": 4.3181818181818185e-06, |
|
"loss": 1.6677, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 3.948561668395996, |
|
"learning_rate": 4.9845961611531356e-06, |
|
"loss": 1.3842, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 3.0817477703094482, |
|
"learning_rate": 4.9096053963998555e-06, |
|
"loss": 1.0403, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 3.429473638534546, |
|
"learning_rate": 4.7740799883862966e-06, |
|
"loss": 0.8183, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"eval_loss": 0.6115408539772034, |
|
"eval_runtime": 1.0308, |
|
"eval_samples_per_second": 38.806, |
|
"eval_steps_per_second": 19.403, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 3.2133073806762695, |
|
"learning_rate": 4.5814246365869285e-06, |
|
"loss": 0.633, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 2.653892755508423, |
|
"learning_rate": 4.336479271643833e-06, |
|
"loss": 0.4486, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 3.072075605392456, |
|
"learning_rate": 4.045397465551513e-06, |
|
"loss": 0.3127, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.2421813011169434, |
|
"learning_rate": 3.715491840251172e-06, |
|
"loss": 0.2, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 1.154951810836792, |
|
"learning_rate": 3.3550503583141726e-06, |
|
"loss": 0.1498, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"eval_loss": 0.08401396125555038, |
|
"eval_runtime": 1.0303, |
|
"eval_samples_per_second": 38.823, |
|
"eval_steps_per_second": 19.411, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 1.2458988428115845, |
|
"learning_rate": 2.973128110901026e-06, |
|
"loss": 0.1251, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 0.47104161977767944, |
|
"learning_rate": 2.57931983374517e-06, |
|
"loss": 0.1033, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.777777777777778, |
|
"grad_norm": 1.8045340776443481, |
|
"learning_rate": 2.183518866065627e-06, |
|
"loss": 0.102, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.222222222222222, |
|
"grad_norm": 0.7171550989151001, |
|
"learning_rate": 1.7956686078964257e-06, |
|
"loss": 0.0972, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 1.1168746948242188, |
|
"learning_rate": 1.425512719777071e-06, |
|
"loss": 0.0829, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"eval_loss": 0.06506412476301193, |
|
"eval_runtime": 1.0294, |
|
"eval_samples_per_second": 38.857, |
|
"eval_steps_per_second": 19.429, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"grad_norm": 1.1370668411254883, |
|
"learning_rate": 1.0823503403430736e-06, |
|
"loss": 0.111, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.555555555555555, |
|
"grad_norm": 0.8633684515953064, |
|
"learning_rate": 7.748024712947205e-07, |
|
"loss": 0.1118, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.568353533744812, |
|
"learning_rate": 5.105953986729196e-07, |
|
"loss": 0.0766, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.444444444444445, |
|
"grad_norm": 0.6475286483764648, |
|
"learning_rate": 2.963665913810451e-07, |
|
"loss": 0.0952, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 0.6339282393455505, |
|
"learning_rate": 1.3749795321332887e-07, |
|
"loss": 0.0952, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_loss": 0.06450273841619492, |
|
"eval_runtime": 1.0279, |
|
"eval_samples_per_second": 38.913, |
|
"eval_steps_per_second": 19.457, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.333333333333334, |
|
"grad_norm": 0.8839389681816101, |
|
"learning_rate": 3.798061746947995e-08, |
|
"loss": 0.0727, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"grad_norm": 1.410608172416687, |
|
"learning_rate": 3.146808153123293e-10, |
|
"loss": 0.1111, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"step": 220, |
|
"total_flos": 1.959093130965811e+16, |
|
"train_loss": 0.4326907604932785, |
|
"train_runtime": 310.7933, |
|
"train_samples_per_second": 11.583, |
|
"train_steps_per_second": 0.708 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.959093130965811e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|