{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6584, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07594167679222358, "grad_norm": 0.35286444425582886, "learning_rate": 1.848116646415553e-05, "loss": 1.4832, "step": 500 }, { "epoch": 0.07594167679222358, "eval_loss": 1.412253737449646, "eval_runtime": 5.5687, "eval_samples_per_second": 5.746, "eval_steps_per_second": 5.746, "step": 500 }, { "epoch": 0.15188335358444716, "grad_norm": 0.41863012313842773, "learning_rate": 1.6962332928311057e-05, "loss": 1.4527, "step": 1000 }, { "epoch": 0.15188335358444716, "eval_loss": 1.3994698524475098, "eval_runtime": 5.5761, "eval_samples_per_second": 5.739, "eval_steps_per_second": 5.739, "step": 1000 }, { "epoch": 0.2278250303766707, "grad_norm": 0.5559484362602234, "learning_rate": 1.5443499392466586e-05, "loss": 1.4355, "step": 1500 }, { "epoch": 0.2278250303766707, "eval_loss": 1.3935128450393677, "eval_runtime": 5.5659, "eval_samples_per_second": 5.749, "eval_steps_per_second": 5.749, "step": 1500 }, { "epoch": 0.3037667071688943, "grad_norm": 0.4486136734485626, "learning_rate": 1.3924665856622116e-05, "loss": 1.424, "step": 2000 }, { "epoch": 0.3037667071688943, "eval_loss": 1.3896900415420532, "eval_runtime": 5.5617, "eval_samples_per_second": 5.754, "eval_steps_per_second": 5.754, "step": 2000 }, { "epoch": 0.37970838396111783, "grad_norm": 0.4136578142642975, "learning_rate": 1.2405832320777645e-05, "loss": 1.4242, "step": 2500 }, { "epoch": 0.37970838396111783, "eval_loss": 1.3874199390411377, "eval_runtime": 5.5692, "eval_samples_per_second": 5.746, "eval_steps_per_second": 5.746, "step": 2500 }, { "epoch": 0.4556500607533414, "grad_norm": 0.36681634187698364, "learning_rate": 1.0886998784933171e-05, "loss": 1.4179, "step": 3000 }, { "epoch": 0.4556500607533414, "eval_loss": 1.3852367401123047, "eval_runtime": 5.5668, "eval_samples_per_second": 5.748, "eval_steps_per_second": 5.748, "step": 3000 }, { "epoch": 0.531591737545565, "grad_norm": 0.4278933107852936, "learning_rate": 9.3681652490887e-06, "loss": 1.4148, "step": 3500 }, { "epoch": 0.531591737545565, "eval_loss": 1.3838541507720947, "eval_runtime": 5.5611, "eval_samples_per_second": 5.754, "eval_steps_per_second": 5.754, "step": 3500 }, { "epoch": 0.6075334143377886, "grad_norm": 0.5046536326408386, "learning_rate": 7.84933171324423e-06, "loss": 1.4217, "step": 4000 }, { "epoch": 0.6075334143377886, "eval_loss": 1.3825995922088623, "eval_runtime": 5.5615, "eval_samples_per_second": 5.754, "eval_steps_per_second": 5.754, "step": 4000 }, { "epoch": 0.6834750911300121, "grad_norm": 0.3952041268348694, "learning_rate": 6.3304981773997575e-06, "loss": 1.4212, "step": 4500 }, { "epoch": 0.6834750911300121, "eval_loss": 1.381594181060791, "eval_runtime": 5.5621, "eval_samples_per_second": 5.753, "eval_steps_per_second": 5.753, "step": 4500 }, { "epoch": 0.7594167679222357, "grad_norm": 0.6603379249572754, "learning_rate": 4.811664641555286e-06, "loss": 1.4196, "step": 5000 }, { "epoch": 0.7594167679222357, "eval_loss": 1.3807512521743774, "eval_runtime": 5.5679, "eval_samples_per_second": 5.747, "eval_steps_per_second": 5.747, "step": 5000 }, { "epoch": 0.8353584447144593, "grad_norm": 0.5418098568916321, "learning_rate": 3.292831105710814e-06, "loss": 1.4244, "step": 5500 }, { "epoch": 0.8353584447144593, "eval_loss": 1.380265712738037, "eval_runtime": 5.5631, "eval_samples_per_second": 5.752, "eval_steps_per_second": 5.752, "step": 5500 }, { "epoch": 0.9113001215066828, "grad_norm": 0.38142943382263184, "learning_rate": 1.7739975698663428e-06, "loss": 1.4309, "step": 6000 }, { "epoch": 0.9113001215066828, "eval_loss": 1.380075216293335, "eval_runtime": 5.5924, "eval_samples_per_second": 5.722, "eval_steps_per_second": 5.722, "step": 6000 }, { "epoch": 0.9872417982989065, "grad_norm": 0.4322833716869354, "learning_rate": 2.551640340218712e-07, "loss": 1.4513, "step": 6500 }, { "epoch": 0.9872417982989065, "eval_loss": 1.3799034357070923, "eval_runtime": 5.5646, "eval_samples_per_second": 5.751, "eval_steps_per_second": 5.751, "step": 6500 } ], "logging_steps": 500, "max_steps": 6584, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.122003923295273e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }