{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 192, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15625, "grad_norm": 3.410148832715147, "learning_rate": 5e-06, "loss": 0.7255, "step": 10 }, { "epoch": 0.3125, "grad_norm": 1.9658070319836614, "learning_rate": 5e-06, "loss": 0.6224, "step": 20 }, { "epoch": 0.46875, "grad_norm": 2.0869328497874875, "learning_rate": 5e-06, "loss": 0.5848, "step": 30 }, { "epoch": 0.625, "grad_norm": 1.2748507240351463, "learning_rate": 5e-06, "loss": 0.5638, "step": 40 }, { "epoch": 0.78125, "grad_norm": 0.8013309231484568, "learning_rate": 5e-06, "loss": 0.5486, "step": 50 }, { "epoch": 0.9375, "grad_norm": 0.8121748600070269, "learning_rate": 5e-06, "loss": 0.5351, "step": 60 }, { "epoch": 1.0, "eval_loss": 0.5265702605247498, "eval_runtime": 6.4953, "eval_samples_per_second": 263.115, "eval_steps_per_second": 1.078, "step": 64 }, { "epoch": 1.09375, "grad_norm": 0.6230488954615538, "learning_rate": 5e-06, "loss": 0.5117, "step": 70 }, { "epoch": 1.25, "grad_norm": 0.6381342635296321, "learning_rate": 5e-06, "loss": 0.4927, "step": 80 }, { "epoch": 1.40625, "grad_norm": 0.5885474331678158, "learning_rate": 5e-06, "loss": 0.488, "step": 90 }, { "epoch": 1.5625, "grad_norm": 0.517571001381851, "learning_rate": 5e-06, "loss": 0.4873, "step": 100 }, { "epoch": 1.71875, "grad_norm": 0.4914014029041883, "learning_rate": 5e-06, "loss": 0.4868, "step": 110 }, { "epoch": 1.875, "grad_norm": 0.5996088431576516, "learning_rate": 5e-06, "loss": 0.483, "step": 120 }, { "epoch": 2.0, "eval_loss": 0.5022982358932495, "eval_runtime": 6.4313, "eval_samples_per_second": 265.73, "eval_steps_per_second": 1.088, "step": 128 }, { "epoch": 2.03125, "grad_norm": 0.9648708540633031, "learning_rate": 5e-06, "loss": 0.4743, "step": 130 }, { "epoch": 2.1875, "grad_norm": 0.641069504623041, "learning_rate": 5e-06, "loss": 0.4464, "step": 140 }, { "epoch": 2.34375, "grad_norm": 0.5277740771835514, "learning_rate": 5e-06, "loss": 0.4484, "step": 150 }, { "epoch": 2.5, "grad_norm": 0.5645101097649855, "learning_rate": 5e-06, "loss": 0.4458, "step": 160 }, { "epoch": 2.65625, "grad_norm": 0.6429640058172255, "learning_rate": 5e-06, "loss": 0.4485, "step": 170 }, { "epoch": 2.8125, "grad_norm": 0.4995911800780122, "learning_rate": 5e-06, "loss": 0.4471, "step": 180 }, { "epoch": 2.96875, "grad_norm": 0.6882039357462453, "learning_rate": 5e-06, "loss": 0.4461, "step": 190 }, { "epoch": 3.0, "eval_loss": 0.5014501214027405, "eval_runtime": 6.2961, "eval_samples_per_second": 271.439, "eval_steps_per_second": 1.112, "step": 192 }, { "epoch": 3.0, "step": 192, "total_flos": 321607151124480.0, "train_loss": 0.5088142100721598, "train_runtime": 1474.9936, "train_samples_per_second": 66.021, "train_steps_per_second": 0.13 } ], "logging_steps": 10, "max_steps": 192, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 321607151124480.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }