{ "best_metric": 1.81984543800354, "best_model_checkpoint": "shawgpt-ft-trial-0/checkpoint-18", "epoch": 5.76, "eval_steps": 500, "global_step": 18, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.96, "grad_norm": 1.081973671913147, "learning_rate": 0.00026388697902791035, "loss": 4.1859, "step": 3 }, { "epoch": 0.96, "eval_loss": 3.5218443870544434, "eval_runtime": 4.4578, "eval_samples_per_second": 2.019, "eval_steps_per_second": 1.122, "step": 3 }, { "epoch": 1.92, "grad_norm": 1.1095596551895142, "learning_rate": 0.00021110958322232825, "loss": 3.3269, "step": 6 }, { "epoch": 1.92, "eval_loss": 2.843799114227295, "eval_runtime": 4.4972, "eval_samples_per_second": 2.001, "eval_steps_per_second": 1.112, "step": 6 }, { "epoch": 2.88, "grad_norm": 1.0787794589996338, "learning_rate": 0.0001583321874167462, "loss": 2.7223, "step": 9 }, { "epoch": 2.88, "eval_loss": 2.420926094055176, "eval_runtime": 4.4722, "eval_samples_per_second": 2.012, "eval_steps_per_second": 1.118, "step": 9 }, { "epoch": 3.84, "grad_norm": 1.9727703332901, "learning_rate": 0.00010555479161116412, "loss": 2.3079, "step": 12 }, { "epoch": 3.84, "eval_loss": 2.1191325187683105, "eval_runtime": 4.4796, "eval_samples_per_second": 2.009, "eval_steps_per_second": 1.116, "step": 12 }, { "epoch": 4.8, "grad_norm": 1.8805612325668335, "learning_rate": 5.277739580558206e-05, "loss": 1.9878, "step": 15 }, { "epoch": 4.8, "eval_loss": 1.904133677482605, "eval_runtime": 4.4752, "eval_samples_per_second": 2.011, "eval_steps_per_second": 1.117, "step": 15 }, { "epoch": 5.76, "grad_norm": 1.2010680437088013, "learning_rate": 0.0, "loss": 1.3913, "step": 18 }, { "epoch": 5.76, "eval_loss": 1.81984543800354, "eval_runtime": 4.4826, "eval_samples_per_second": 2.008, "eval_steps_per_second": 1.115, "step": 18 }, { "epoch": 5.76, "step": 18, "total_flos": 55123439616000.0, "train_loss": 2.653691212336222, "train_runtime": 318.9177, "train_samples_per_second": 0.941, "train_steps_per_second": 0.056 } ], "logging_steps": 500, "max_steps": 18, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 55123439616000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }