{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9333333333333336, "eval_steps": 500, "global_step": 99, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2962962962962963, "grad_norm": 8.841581474102506, "learning_rate": 5e-06, "loss": 1.0027, "step": 10 }, { "epoch": 0.5925925925925926, "grad_norm": 8.669231176540645, "learning_rate": 5e-06, "loss": 0.8838, "step": 20 }, { "epoch": 0.8888888888888888, "grad_norm": 2.0438362887402333, "learning_rate": 5e-06, "loss": 0.8462, "step": 30 }, { "epoch": 0.9777777777777777, "eval_loss": 0.8316775560379028, "eval_runtime": 24.3137, "eval_samples_per_second": 37.304, "eval_steps_per_second": 0.617, "step": 33 }, { "epoch": 1.1851851851851851, "grad_norm": 1.5325350368450368, "learning_rate": 5e-06, "loss": 0.8292, "step": 40 }, { "epoch": 1.4814814814814814, "grad_norm": 1.2135925661799924, "learning_rate": 5e-06, "loss": 0.7806, "step": 50 }, { "epoch": 1.7777777777777777, "grad_norm": 0.9729741730730993, "learning_rate": 5e-06, "loss": 0.7626, "step": 60 }, { "epoch": 1.9851851851851852, "eval_loss": 0.7775214910507202, "eval_runtime": 23.9767, "eval_samples_per_second": 37.828, "eval_steps_per_second": 0.626, "step": 67 }, { "epoch": 2.074074074074074, "grad_norm": 1.4003520868501302, "learning_rate": 5e-06, "loss": 0.7619, "step": 70 }, { "epoch": 2.3703703703703702, "grad_norm": 0.738966252442236, "learning_rate": 5e-06, "loss": 0.7081, "step": 80 }, { "epoch": 2.6666666666666665, "grad_norm": 0.6691400354019779, "learning_rate": 5e-06, "loss": 0.6996, "step": 90 }, { "epoch": 2.9333333333333336, "eval_loss": 0.7636918425559998, "eval_runtime": 22.9672, "eval_samples_per_second": 39.491, "eval_steps_per_second": 0.653, "step": 99 }, { "epoch": 2.9333333333333336, "step": 99, "total_flos": 165619307642880.0, "train_loss": 0.7985600895351834, "train_runtime": 3525.8092, "train_samples_per_second": 14.66, "train_steps_per_second": 0.028 } ], "logging_steps": 10, "max_steps": 99, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 165619307642880.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }