{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.09592326139088729, "eval_steps": 500, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00013, "loss": 1.1241, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.00026, "loss": 1.0107, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.00039, "loss": 1.1086, "step": 3 }, { "epoch": 0.01, "learning_rate": 0.00052, "loss": 1.0044, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.00065, "loss": 1.0496, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.0005933661039639299, "loss": 1.0199, "step": 6 }, { "epoch": 0.02, "learning_rate": 0.0005493502655735357, "loss": 1.0198, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.0005138701197773616, "loss": 0.969, "step": 8 }, { "epoch": 0.02, "learning_rate": 0.0004844813951249544, "loss": 0.9383, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.0004596194077712558, "loss": 0.8776, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.0004382299106011073, "loss": 1.0173, "step": 11 }, { "epoch": 0.03, "learning_rate": 0.0004195731958391368, "loss": 1.1173, "step": 12 }, { "epoch": 0.03, "learning_rate": 0.0004031128874149274, "loss": 1.0876, "step": 13 }, { "epoch": 0.03, "learning_rate": 0.0003884492980336779, "loss": 1.0524, "step": 14 }, { "epoch": 0.04, "learning_rate": 0.0003752776749732568, "loss": 0.8953, "step": 15 }, { "epoch": 0.04, "learning_rate": 0.00036336104634371584, "loss": 1.1335, "step": 16 }, { "epoch": 0.04, "learning_rate": 0.00035251199395531623, "loss": 0.9837, "step": 17 }, { "epoch": 0.04, "learning_rate": 0.00034258007985157445, "loss": 0.9707, "step": 18 }, { "epoch": 0.05, "learning_rate": 0.0003334429644276751, "loss": 0.9149, "step": 19 }, { "epoch": 0.05, "learning_rate": 0.000325, "loss": 1.0043, "step": 20 }, { "epoch": 0.05, "learning_rate": 0.00031716752370827323, "loss": 1.001, "step": 21 }, { "epoch": 0.05, "learning_rate": 0.00030987534150481746, "loss": 1.0395, "step": 22 }, { "epoch": 0.06, "learning_rate": 0.000303064062678102, "loss": 0.8718, "step": 23 }, { "epoch": 0.06, "learning_rate": 0.00029668305198196496, "loss": 1.1114, "step": 24 }, { "epoch": 0.06, "learning_rate": 0.00029068883707497264, "loss": 0.7765, "step": 25 }, { "epoch": 0.06, "learning_rate": 0.0002850438562747845, "loss": 0.9522, "step": 26 }, { "epoch": 0.06, "learning_rate": 0.00027971546389275785, "loss": 0.9588, "step": 27 }, { "epoch": 0.07, "learning_rate": 0.00027467513278676785, "loss": 1.0313, "step": 28 }, { "epoch": 0.07, "learning_rate": 0.0002698978095246549, "loss": 0.9338, "step": 29 }, { "epoch": 0.07, "learning_rate": 0.000265361388801511, "loss": 0.892, "step": 30 }, { "epoch": 0.07, "learning_rate": 0.00026104628189331215, "loss": 0.893, "step": 31 }, { "epoch": 0.08, "learning_rate": 0.0002569350598886808, "loss": 0.8983, "step": 32 }, { "epoch": 0.08, "learning_rate": 0.00025301215685249496, "loss": 0.9277, "step": 33 }, { "epoch": 0.08, "learning_rate": 0.00024926362137539537, "loss": 0.8962, "step": 34 }, { "epoch": 0.08, "learning_rate": 0.00024567690745599767, "loss": 0.9124, "step": 35 }, { "epoch": 0.09, "learning_rate": 0.0002422406975624772, "loss": 0.9535, "step": 36 }, { "epoch": 0.09, "learning_rate": 0.00023894475218048754, "loss": 0.9019, "step": 37 }, { "epoch": 0.09, "learning_rate": 0.0002357797812857538, "loss": 1.024, "step": 38 }, { "epoch": 0.09, "learning_rate": 0.00023273733406281566, "loss": 0.8549, "step": 39 }, { "epoch": 0.1, "learning_rate": 0.0002298097038856279, "loss": 1.0489, "step": 40 } ], "logging_steps": 1, "max_steps": 417, "num_train_epochs": 1, "save_steps": 10, "total_flos": 1.2924943770845184e+16, "trial_name": null, "trial_params": null }