{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0006399902477676531, "eval_steps": 5, "global_step": 21, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.0475726084173955e-05, "eval_loss": 1.0151351690292358, "eval_runtime": 1911.0259, "eval_samples_per_second": 7.23, "eval_steps_per_second": 3.615, "step": 1 }, { "epoch": 9.142717825252186e-05, "grad_norm": 2.0595638751983643, "learning_rate": 2.857142857142857e-05, "loss": 3.6748, "step": 3 }, { "epoch": 0.00015237863042086977, "eval_loss": 1.0050420761108398, "eval_runtime": 1916.5778, "eval_samples_per_second": 7.209, "eval_steps_per_second": 3.604, "step": 5 }, { "epoch": 0.00018285435650504373, "grad_norm": 1.2734413146972656, "learning_rate": 5.714285714285714e-05, "loss": 4.541, "step": 6 }, { "epoch": 0.0002742815347575656, "grad_norm": 2.5725290775299072, "learning_rate": 8.571428571428571e-05, "loss": 3.8809, "step": 9 }, { "epoch": 0.00030475726084173955, "eval_loss": 0.9246754050254822, "eval_runtime": 1917.7451, "eval_samples_per_second": 7.204, "eval_steps_per_second": 3.602, "step": 10 }, { "epoch": 0.00036570871301008746, "grad_norm": 1.181660532951355, "learning_rate": 0.00011428571428571428, "loss": 3.6147, "step": 12 }, { "epoch": 0.00045713589126260935, "grad_norm": 1.1298421621322632, "learning_rate": 0.00014285714285714287, "loss": 3.2705, "step": 15 }, { "epoch": 0.00045713589126260935, "eval_loss": 0.8345330357551575, "eval_runtime": 1916.8879, "eval_samples_per_second": 7.208, "eval_steps_per_second": 3.604, "step": 15 }, { "epoch": 0.0005485630695151312, "grad_norm": 1.11538827419281, "learning_rate": 0.00017142857142857143, "loss": 3.8981, "step": 18 }, { "epoch": 0.0006095145216834791, "eval_loss": 0.7881443500518799, "eval_runtime": 1916.3846, "eval_samples_per_second": 7.209, "eval_steps_per_second": 3.605, "step": 20 }, { "epoch": 0.0006399902477676531, "grad_norm": 1.4301753044128418, "learning_rate": 0.0002, "loss": 2.8858, "step": 21 } ], "logging_steps": 3, "max_steps": 39, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 21, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.57107795001344e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }