{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9975308641975309, "eval_steps": 6, "global_step": 101, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05925925925925926, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 0.9886, "step": 6 }, { "epoch": 0.11851851851851852, "grad_norm": 0.24954715371131897, "learning_rate": 0.001, "loss": 0.984, "step": 12 }, { "epoch": 0.17777777777777778, "grad_norm": 0.33483922481536865, "learning_rate": 0.0016329931618554523, "loss": 0.9071, "step": 18 }, { "epoch": 0.23703703703703705, "grad_norm": 1.0519644021987915, "learning_rate": 0.0012649110640673518, "loss": 3.3881, "step": 24 }, { "epoch": 0.2962962962962963, "grad_norm": 2.5967140197753906, "learning_rate": 0.0008944271909999159, "loss": 2.3416, "step": 30 }, { "epoch": 0.35555555555555557, "grad_norm": 0.6895209550857544, "learning_rate": 0.0007071067811865475, "loss": 0.7674, "step": 36 }, { "epoch": 0.4148148148148148, "grad_norm": 0.22938764095306396, "learning_rate": 0.0006030226891555273, "loss": 0.7034, "step": 42 }, { "epoch": 0.4740740740740741, "grad_norm": 0.21268492937088013, "learning_rate": 0.0005345224838248488, "loss": 0.6851, "step": 48 }, { "epoch": 0.5333333333333333, "grad_norm": 0.20341278612613678, "learning_rate": 0.0004850712500726659, "loss": 0.6684, "step": 54 }, { "epoch": 0.5925925925925926, "grad_norm": 0.2080058604478836, "learning_rate": 0.00044721359549995795, "loss": 0.6385, "step": 60 }, { "epoch": 0.6518518518518519, "grad_norm": 0.21044473350048065, "learning_rate": 0.0004170288281141495, "loss": 0.6324, "step": 66 }, { "epoch": 0.7111111111111111, "grad_norm": 0.20623333752155304, "learning_rate": 0.0003922322702763681, "loss": 0.6319, "step": 72 }, { "epoch": 0.7703703703703704, "grad_norm": 0.26323583722114563, "learning_rate": 0.0003713906763541037, "loss": 0.6401, "step": 78 }, { "epoch": 0.8296296296296296, "grad_norm": 0.17696638405323029, "learning_rate": 0.00035355339059327376, "loss": 0.6248, "step": 84 }, { "epoch": 0.8888888888888888, "grad_norm": 0.1904665231704712, "learning_rate": 0.0003380617018914066, "loss": 0.6202, "step": 90 }, { "epoch": 0.9481481481481482, "grad_norm": 0.21308590471744537, "learning_rate": 0.0003244428422615251, "loss": 0.6346, "step": 96 }, { "epoch": 0.9975308641975309, "step": 101, "total_flos": 1.0258028421510595e+18, "train_loss": 0.9721237692502466, "train_runtime": 907.9026, "train_samples_per_second": 14.272, "train_steps_per_second": 0.111 } ], "logging_steps": 6, "max_steps": 101, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0258028421510595e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }