{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0512, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0032, "grad_norm": 1.7230606079101562, "learning_rate": 4.99208e-05, "loss": 1.2281, "step": 500 }, { "epoch": 0.0064, "grad_norm": 3.655383348464966, "learning_rate": 4.9840800000000006e-05, "loss": 0.7566, "step": 1000 }, { "epoch": 0.0096, "grad_norm": 1.2925927639007568, "learning_rate": 4.97608e-05, "loss": 0.6764, "step": 1500 }, { "epoch": 0.0128, "grad_norm": 1.286004900932312, "learning_rate": 4.968080000000001e-05, "loss": 0.6304, "step": 2000 }, { "epoch": 0.016, "grad_norm": 1.2140214443206787, "learning_rate": 4.96008e-05, "loss": 0.5981, "step": 2500 }, { "epoch": 0.0192, "grad_norm": 1.2525482177734375, "learning_rate": 4.95208e-05, "loss": 0.5767, "step": 3000 }, { "epoch": 0.0224, "grad_norm": 1.2310410737991333, "learning_rate": 4.94408e-05, "loss": 0.5597, "step": 3500 }, { "epoch": 0.0256, "grad_norm": 1.1735206842422485, "learning_rate": 4.9360800000000004e-05, "loss": 0.5418, "step": 4000 }, { "epoch": 0.0288, "grad_norm": 1.114688754081726, "learning_rate": 4.9280800000000004e-05, "loss": 0.5335, "step": 4500 }, { "epoch": 0.032, "grad_norm": 0.8874593377113342, "learning_rate": 4.9200800000000005e-05, "loss": 0.5237, "step": 5000 }, { "epoch": 0.0352, "grad_norm": 1.1261299848556519, "learning_rate": 4.91208e-05, "loss": 0.5135, "step": 5500 }, { "epoch": 0.0384, "grad_norm": 0.9994556307792664, "learning_rate": 4.9040800000000007e-05, "loss": 0.5059, "step": 6000 }, { "epoch": 0.0416, "grad_norm": 1.2349673509597778, "learning_rate": 4.89608e-05, "loss": 0.4939, "step": 6500 }, { "epoch": 0.0448, "grad_norm": 0.9770995378494263, "learning_rate": 4.88808e-05, "loss": 0.4824, "step": 7000 }, { "epoch": 0.048, "grad_norm": 0.981966495513916, "learning_rate": 4.88008e-05, "loss": 0.4875, "step": 7500 }, { "epoch": 0.0512, "grad_norm": 1.0177415609359741, "learning_rate": 4.87208e-05, "loss": 0.4785, "step": 8000 } ], "logging_steps": 500, "max_steps": 312500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.897330499584e+16, "train_batch_size": 64, "trial_name": null, "trial_params": null }