{ "best_metric": 0.06450273841619492, "best_model_checkpoint": "saves/Llama-3.1-8B-Instruct/lora/sft-400/checkpoint-200", "epoch": 9.777777777777779, "eval_steps": 50, "global_step": 220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4444444444444444, "grad_norm": 7.734859943389893, "learning_rate": 2.0454545454545457e-06, "loss": 1.6805, "step": 10 }, { "epoch": 0.8888888888888888, "grad_norm": 5.1195478439331055, "learning_rate": 4.3181818181818185e-06, "loss": 1.6677, "step": 20 }, { "epoch": 1.3333333333333333, "grad_norm": 3.948561668395996, "learning_rate": 4.9845961611531356e-06, "loss": 1.3842, "step": 30 }, { "epoch": 1.7777777777777777, "grad_norm": 3.0817477703094482, "learning_rate": 4.9096053963998555e-06, "loss": 1.0403, "step": 40 }, { "epoch": 2.2222222222222223, "grad_norm": 3.429473638534546, "learning_rate": 4.7740799883862966e-06, "loss": 0.8183, "step": 50 }, { "epoch": 2.2222222222222223, "eval_loss": 0.6115408539772034, "eval_runtime": 1.0308, "eval_samples_per_second": 38.806, "eval_steps_per_second": 19.403, "step": 50 }, { "epoch": 2.6666666666666665, "grad_norm": 3.2133073806762695, "learning_rate": 4.5814246365869285e-06, "loss": 0.633, "step": 60 }, { "epoch": 3.111111111111111, "grad_norm": 2.653892755508423, "learning_rate": 4.336479271643833e-06, "loss": 0.4486, "step": 70 }, { "epoch": 3.5555555555555554, "grad_norm": 3.072075605392456, "learning_rate": 4.045397465551513e-06, "loss": 0.3127, "step": 80 }, { "epoch": 4.0, "grad_norm": 1.2421813011169434, "learning_rate": 3.715491840251172e-06, "loss": 0.2, "step": 90 }, { "epoch": 4.444444444444445, "grad_norm": 1.154951810836792, "learning_rate": 3.3550503583141726e-06, "loss": 0.1498, "step": 100 }, { "epoch": 4.444444444444445, "eval_loss": 0.08401396125555038, "eval_runtime": 1.0303, "eval_samples_per_second": 38.823, "eval_steps_per_second": 19.411, "step": 100 }, { "epoch": 4.888888888888889, "grad_norm": 1.2458988428115845, "learning_rate": 2.973128110901026e-06, "loss": 0.1251, "step": 110 }, { "epoch": 5.333333333333333, "grad_norm": 0.47104161977767944, "learning_rate": 2.57931983374517e-06, "loss": 0.1033, "step": 120 }, { "epoch": 5.777777777777778, "grad_norm": 1.8045340776443481, "learning_rate": 2.183518866065627e-06, "loss": 0.102, "step": 130 }, { "epoch": 6.222222222222222, "grad_norm": 0.7171550989151001, "learning_rate": 1.7956686078964257e-06, "loss": 0.0972, "step": 140 }, { "epoch": 6.666666666666667, "grad_norm": 1.1168746948242188, "learning_rate": 1.425512719777071e-06, "loss": 0.0829, "step": 150 }, { "epoch": 6.666666666666667, "eval_loss": 0.06506412476301193, "eval_runtime": 1.0294, "eval_samples_per_second": 38.857, "eval_steps_per_second": 19.429, "step": 150 }, { "epoch": 7.111111111111111, "grad_norm": 1.1370668411254883, "learning_rate": 1.0823503403430736e-06, "loss": 0.111, "step": 160 }, { "epoch": 7.555555555555555, "grad_norm": 0.8633684515953064, "learning_rate": 7.748024712947205e-07, "loss": 0.1118, "step": 170 }, { "epoch": 8.0, "grad_norm": 0.568353533744812, "learning_rate": 5.105953986729196e-07, "loss": 0.0766, "step": 180 }, { "epoch": 8.444444444444445, "grad_norm": 0.6475286483764648, "learning_rate": 2.963665913810451e-07, "loss": 0.0952, "step": 190 }, { "epoch": 8.88888888888889, "grad_norm": 0.6339282393455505, "learning_rate": 1.3749795321332887e-07, "loss": 0.0952, "step": 200 }, { "epoch": 8.88888888888889, "eval_loss": 0.06450273841619492, "eval_runtime": 1.0279, "eval_samples_per_second": 38.913, "eval_steps_per_second": 19.457, "step": 200 }, { "epoch": 9.333333333333334, "grad_norm": 0.8839389681816101, "learning_rate": 3.798061746947995e-08, "loss": 0.0727, "step": 210 }, { "epoch": 9.777777777777779, "grad_norm": 1.410608172416687, "learning_rate": 3.146808153123293e-10, "loss": 0.1111, "step": 220 }, { "epoch": 9.777777777777779, "step": 220, "total_flos": 1.959093130965811e+16, "train_loss": 0.4326907604932785, "train_runtime": 310.7933, "train_samples_per_second": 11.583, "train_steps_per_second": 0.708 } ], "logging_steps": 10, "max_steps": 220, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.959093130965811e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }