{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.937728937728938, "eval_steps": 500, "global_step": 102, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29304029304029305, "grad_norm": 0.6432506442070007, "learning_rate": 9.090909090909092e-05, "loss": 1.2624, "step": 10 }, { "epoch": 0.5860805860805861, "grad_norm": 0.3166349530220032, "learning_rate": 9.760588329553571e-05, "loss": 1.1353, "step": 20 }, { "epoch": 0.8791208791208791, "grad_norm": 0.32692158222198486, "learning_rate": 8.962384209755452e-05, "loss": 1.0604, "step": 30 }, { "epoch": 1.1465201465201464, "grad_norm": 0.2800314426422119, "learning_rate": 7.696600172495997e-05, "loss": 1.0072, "step": 40 }, { "epoch": 1.4395604395604396, "grad_norm": 0.4036503732204437, "learning_rate": 6.112604669781572e-05, "loss": 1.0499, "step": 50 }, { "epoch": 1.7326007326007327, "grad_norm": 0.2690628170967102, "learning_rate": 4.397316598723385e-05, "loss": 0.9917, "step": 60 }, { "epoch": 2.0, "grad_norm": 6.335123062133789, "learning_rate": 2.7531479951641924e-05, "loss": 1.0404, "step": 70 }, { "epoch": 2.293040293040293, "grad_norm": 0.38795047998428345, "learning_rate": 1.3741184642831189e-05, "loss": 1.0057, "step": 80 }, { "epoch": 2.586080586080586, "grad_norm": 0.3348829746246338, "learning_rate": 4.229599573731685e-06, "loss": 0.9651, "step": 90 }, { "epoch": 2.879120879120879, "grad_norm": 0.3123050630092621, "learning_rate": 1.191363849376237e-07, "loss": 0.9705, "step": 100 }, { "epoch": 2.937728937728938, "step": 102, "total_flos": 2.405589486193869e+16, "train_loss": 1.0459267169821496, "train_runtime": 1269.1774, "train_samples_per_second": 2.579, "train_steps_per_second": 0.08 } ], "logging_steps": 10, "max_steps": 102, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.405589486193869e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }