{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.00404071423664847, "eval_steps": 125, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.232571389318776e-05, "eval_loss": NaN, "eval_runtime": 577.2541, "eval_samples_per_second": 45.129, "eval_steps_per_second": 22.565, "step": 1 }, { "epoch": 9.697714167956328e-05, "grad_norm": NaN, "learning_rate": 3e-05, "loss": 0.0, "step": 3 }, { "epoch": 0.00019395428335912656, "grad_norm": NaN, "learning_rate": 6e-05, "loss": 0.0, "step": 6 }, { "epoch": 0.0002909314250386898, "grad_norm": NaN, "learning_rate": 9e-05, "loss": 0.0, "step": 9 }, { "epoch": 0.0003879085667182531, "grad_norm": NaN, "learning_rate": 9.999588943391597e-05, "loss": 0.0, "step": 12 }, { "epoch": 0.0004848857083978164, "grad_norm": NaN, "learning_rate": 9.99743108100344e-05, "loss": 0.0, "step": 15 }, { "epoch": 0.0005818628500773796, "grad_norm": NaN, "learning_rate": 9.993424445916923e-05, "loss": 0.0, "step": 18 }, { "epoch": 0.0006788399917569429, "grad_norm": NaN, "learning_rate": 9.987570520365104e-05, "loss": 0.0, "step": 21 }, { "epoch": 0.0007758171334365062, "grad_norm": NaN, "learning_rate": 9.979871469976196e-05, "loss": 0.0, "step": 24 }, { "epoch": 0.0008727942751160696, "grad_norm": NaN, "learning_rate": 9.970330142972401e-05, "loss": 0.0, "step": 27 }, { "epoch": 0.0009697714167956328, "grad_norm": NaN, "learning_rate": 9.95895006911623e-05, "loss": 0.0, "step": 30 }, { "epoch": 0.0010667485584751962, "grad_norm": NaN, "learning_rate": 9.945735458404681e-05, "loss": 0.0, "step": 33 }, { "epoch": 0.0011637257001547593, "grad_norm": NaN, "learning_rate": 9.930691199511775e-05, "loss": 0.0, "step": 36 }, { "epoch": 0.0012607028418343226, "grad_norm": NaN, "learning_rate": 9.91382285798002e-05, "loss": 0.0, "step": 39 }, { "epoch": 0.0013576799835138859, "grad_norm": NaN, "learning_rate": 9.895136674161465e-05, "loss": 0.0, "step": 42 }, { "epoch": 0.0014546571251934492, "grad_norm": NaN, "learning_rate": 9.874639560909117e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.0015516342668730125, "grad_norm": NaN, "learning_rate": 9.852339101019574e-05, "loss": 0.0, "step": 48 }, { "epoch": 0.0016486114085525758, "grad_norm": NaN, "learning_rate": 9.828243544427796e-05, "loss": 0.0, "step": 51 }, { "epoch": 0.001745588550232139, "grad_norm": NaN, "learning_rate": 9.802361805155097e-05, "loss": 0.0, "step": 54 }, { "epoch": 0.0018425656919117024, "grad_norm": NaN, "learning_rate": 9.774703458011453e-05, "loss": 0.0, "step": 57 }, { "epoch": 0.0019395428335912655, "grad_norm": NaN, "learning_rate": 9.745278735053343e-05, "loss": 0.0, "step": 60 }, { "epoch": 0.002036519975270829, "grad_norm": NaN, "learning_rate": 9.714098521798465e-05, "loss": 0.0, "step": 63 }, { "epoch": 0.0021334971169503923, "grad_norm": NaN, "learning_rate": 9.681174353198687e-05, "loss": 0.0, "step": 66 }, { "epoch": 0.0022304742586299556, "grad_norm": NaN, "learning_rate": 9.64651840937276e-05, "loss": 0.0, "step": 69 }, { "epoch": 0.0023274514003095185, "grad_norm": NaN, "learning_rate": 9.610143511100354e-05, "loss": 0.0, "step": 72 }, { "epoch": 0.002424428541989082, "grad_norm": NaN, "learning_rate": 9.572063115079063e-05, "loss": 0.0, "step": 75 }, { "epoch": 0.002521405683668645, "grad_norm": NaN, "learning_rate": 9.53229130894619e-05, "loss": 0.0, "step": 78 }, { "epoch": 0.0026183828253482084, "grad_norm": NaN, "learning_rate": 9.490842806067095e-05, "loss": 0.0, "step": 81 }, { "epoch": 0.0027153599670277717, "grad_norm": NaN, "learning_rate": 9.44773294009206e-05, "loss": 0.0, "step": 84 }, { "epoch": 0.002812337108707335, "grad_norm": NaN, "learning_rate": 9.40297765928369e-05, "loss": 0.0, "step": 87 }, { "epoch": 0.0029093142503868984, "grad_norm": NaN, "learning_rate": 9.356593520616948e-05, "loss": 0.0, "step": 90 }, { "epoch": 0.0030062913920664617, "grad_norm": NaN, "learning_rate": 9.308597683653975e-05, "loss": 0.0, "step": 93 }, { "epoch": 0.003103268533746025, "grad_norm": NaN, "learning_rate": 9.259007904196023e-05, "loss": 0.0, "step": 96 }, { "epoch": 0.0032002456754255883, "grad_norm": NaN, "learning_rate": 9.207842527714767e-05, "loss": 0.0, "step": 99 }, { "epoch": 0.0032972228171051516, "grad_norm": NaN, "learning_rate": 9.155120482565521e-05, "loss": 0.0, "step": 102 }, { "epoch": 0.003394199958784715, "grad_norm": NaN, "learning_rate": 9.10086127298478e-05, "loss": 0.0, "step": 105 }, { "epoch": 0.003491177100464278, "grad_norm": NaN, "learning_rate": 9.045084971874738e-05, "loss": 0.0, "step": 108 }, { "epoch": 0.0035881542421438415, "grad_norm": NaN, "learning_rate": 8.987812213377424e-05, "loss": 0.0, "step": 111 }, { "epoch": 0.003685131383823405, "grad_norm": NaN, "learning_rate": 8.929064185241213e-05, "loss": 0.0, "step": 114 }, { "epoch": 0.0037821085255029677, "grad_norm": NaN, "learning_rate": 8.868862620982534e-05, "loss": 0.0, "step": 117 }, { "epoch": 0.003879085667182531, "grad_norm": NaN, "learning_rate": 8.807229791845673e-05, "loss": 0.0, "step": 120 }, { "epoch": 0.003976062808862094, "grad_norm": NaN, "learning_rate": 8.744188498563641e-05, "loss": 0.0, "step": 123 }, { "epoch": 0.00404071423664847, "eval_loss": NaN, "eval_runtime": 574.6926, "eval_samples_per_second": 45.33, "eval_steps_per_second": 22.666, "step": 125 } ], "logging_steps": 3, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 125, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8504522833920000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }