{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9724770642201834, "eval_steps": 500, "global_step": 81, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3669724770642202, "grad_norm": 15.223515124052987, "learning_rate": 5e-06, "loss": 1.1932, "step": 10 }, { "epoch": 0.7339449541284404, "grad_norm": 2.19506481158568, "learning_rate": 5e-06, "loss": 1.0612, "step": 20 }, { "epoch": 0.9908256880733946, "eval_loss": 1.006506085395813, "eval_runtime": 85.0324, "eval_samples_per_second": 8.632, "eval_steps_per_second": 0.27, "step": 27 }, { "epoch": 1.1009174311926606, "grad_norm": 4.12557913604333, "learning_rate": 5e-06, "loss": 1.0044, "step": 30 }, { "epoch": 1.4678899082568808, "grad_norm": 2.873184668037523, "learning_rate": 5e-06, "loss": 0.9667, "step": 40 }, { "epoch": 1.834862385321101, "grad_norm": 2.185324880239001, "learning_rate": 5e-06, "loss": 0.9481, "step": 50 }, { "epoch": 1.981651376146789, "eval_loss": 0.9613564610481262, "eval_runtime": 84.6961, "eval_samples_per_second": 8.666, "eval_steps_per_second": 0.272, "step": 54 }, { "epoch": 2.2018348623853212, "grad_norm": 1.6556333126334992, "learning_rate": 5e-06, "loss": 0.9173, "step": 60 }, { "epoch": 2.5688073394495414, "grad_norm": 1.040600621325176, "learning_rate": 5e-06, "loss": 0.8827, "step": 70 }, { "epoch": 2.9357798165137616, "grad_norm": 1.0042947441731673, "learning_rate": 5e-06, "loss": 0.8819, "step": 80 }, { "epoch": 2.9724770642201834, "eval_loss": 0.9339568018913269, "eval_runtime": 84.5066, "eval_samples_per_second": 8.686, "eval_steps_per_second": 0.272, "step": 81 }, { "epoch": 2.9724770642201834, "step": 81, "total_flos": 135573327052800.0, "train_loss": 0.9809838378870929, "train_runtime": 13278.7272, "train_samples_per_second": 3.15, "train_steps_per_second": 0.006 } ], "logging_steps": 10, "max_steps": 81, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 135573327052800.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }