{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.963963963963964, "eval_steps": 500, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36036036036036034, "grad_norm": 5.788876433035059, "learning_rate": 5e-06, "loss": 0.3026, "step": 10 }, { "epoch": 0.7207207207207207, "grad_norm": 2.9871413577007604, "learning_rate": 5e-06, "loss": 0.2081, "step": 20 }, { "epoch": 0.972972972972973, "eval_loss": 0.18130172789096832, "eval_runtime": 30.3187, "eval_samples_per_second": 24.638, "eval_steps_per_second": 0.396, "step": 27 }, { "epoch": 1.1036036036036037, "grad_norm": 0.7150273386303733, "learning_rate": 5e-06, "loss": 0.202, "step": 30 }, { "epoch": 1.4639639639639639, "grad_norm": 0.765396666007098, "learning_rate": 5e-06, "loss": 0.1717, "step": 40 }, { "epoch": 1.8243243243243243, "grad_norm": 0.43140896524380545, "learning_rate": 5e-06, "loss": 0.1642, "step": 50 }, { "epoch": 1.9684684684684686, "eval_loss": 0.16076427698135376, "eval_runtime": 30.2833, "eval_samples_per_second": 24.667, "eval_steps_per_second": 0.396, "step": 54 }, { "epoch": 2.2072072072072073, "grad_norm": 0.44793469318434836, "learning_rate": 5e-06, "loss": 0.172, "step": 60 }, { "epoch": 2.5675675675675675, "grad_norm": 0.3516467006088966, "learning_rate": 5e-06, "loss": 0.1532, "step": 70 }, { "epoch": 2.9279279279279278, "grad_norm": 0.49731596311614684, "learning_rate": 5e-06, "loss": 0.1506, "step": 80 }, { "epoch": 2.963963963963964, "eval_loss": 0.15399795770645142, "eval_runtime": 29.6439, "eval_samples_per_second": 25.199, "eval_steps_per_second": 0.405, "step": 81 }, { "epoch": 2.963963963963964, "step": 81, "total_flos": 135468637224960.0, "train_loss": 0.1901383955537537, "train_runtime": 5069.1426, "train_samples_per_second": 8.39, "train_steps_per_second": 0.016 } ], "logging_steps": 10, "max_steps": 81, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 135468637224960.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }