{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9988649262202043, "eval_steps": 6, "global_step": 110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05448354143019296, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.073, "step": 6 }, { "epoch": 0.10896708286038592, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.039, "step": 12 }, { "epoch": 0.16345062429057888, "grad_norm": 0.44932249188423157, "learning_rate": 0.0016329931618554523, "loss": 1.0353, "step": 18 }, { "epoch": 0.21793416572077184, "grad_norm": 0.7056324481964111, "learning_rate": 0.001, "loss": 0.9232, "step": 24 }, { "epoch": 0.2724177071509648, "grad_norm": 0.21584172546863556, "learning_rate": 0.0007559289460184544, "loss": 0.7909, "step": 30 }, { "epoch": 0.32690124858115777, "grad_norm": 0.19453680515289307, "learning_rate": 0.0006324555320336759, "loss": 0.7352, "step": 36 }, { "epoch": 0.3813847900113507, "grad_norm": 0.19031628966331482, "learning_rate": 0.0005547001962252292, "loss": 0.7123, "step": 42 }, { "epoch": 0.4358683314415437, "grad_norm": 0.1654825061559677, "learning_rate": 0.0005, "loss": 0.6918, "step": 48 }, { "epoch": 0.49035187287173665, "grad_norm": 0.1845771223306656, "learning_rate": 0.0004588314677411235, "loss": 0.7118, "step": 54 }, { "epoch": 0.5448354143019296, "grad_norm": 0.19587676227092743, "learning_rate": 0.00042640143271122083, "loss": 0.6994, "step": 60 }, { "epoch": 0.5993189557321226, "grad_norm": 0.20122814178466797, "learning_rate": 0.0004, "loss": 0.672, "step": 66 }, { "epoch": 0.6538024971623155, "grad_norm": 0.21745193004608154, "learning_rate": 0.0003779644730092272, "loss": 0.6553, "step": 72 }, { "epoch": 0.7082860385925085, "grad_norm": 0.17879709601402283, "learning_rate": 0.00035921060405354985, "loss": 0.6807, "step": 78 }, { "epoch": 0.7627695800227015, "grad_norm": 0.18536536395549774, "learning_rate": 0.00034299717028501764, "loss": 0.6738, "step": 84 }, { "epoch": 0.8172531214528944, "grad_norm": 0.20840761065483093, "learning_rate": 0.0003287979746107146, "loss": 0.669, "step": 90 }, { "epoch": 0.8717366628830874, "grad_norm": 0.16762040555477142, "learning_rate": 0.00031622776601683794, "loss": 0.6734, "step": 96 }, { "epoch": 0.9262202043132803, "grad_norm": 0.16556385159492493, "learning_rate": 0.00030499714066520935, "loss": 0.6661, "step": 102 }, { "epoch": 0.9807037457434733, "grad_norm": 0.1867838054895401, "learning_rate": 0.0002948839123097943, "loss": 0.6784, "step": 108 }, { "epoch": 0.9988649262202043, "step": 110, "total_flos": 1.1172110162041242e+18, "train_loss": 0.7636631564660505, "train_runtime": 975.3131, "train_samples_per_second": 14.451, "train_steps_per_second": 0.113 } ], "logging_steps": 6, "max_steps": 110, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 6, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1172110162041242e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }