{ "best_metric": null, "best_model_checkpoint": null, "epoch": 96.97087378640776, "eval_steps": 500, "global_step": 2497, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.15, "learning_rate": 0.000324, "loss": 1.6248, "step": 81 }, { "epoch": 6.29, "learning_rate": 0.000648, "loss": 1.5109, "step": 162 }, { "epoch": 9.44, "learning_rate": 0.000972, "loss": 1.4155, "step": 243 }, { "epoch": 12.58, "learning_rate": 0.0009671111111111112, "loss": 1.328, "step": 324 }, { "epoch": 15.73, "learning_rate": 0.0009311111111111112, "loss": 1.2665, "step": 405 }, { "epoch": 18.87, "learning_rate": 0.0008951111111111111, "loss": 1.2178, "step": 486 }, { "epoch": 22.02, "learning_rate": 0.0008591111111111112, "loss": 1.1829, "step": 567 }, { "epoch": 25.17, "learning_rate": 0.0008231111111111112, "loss": 1.1523, "step": 648 }, { "epoch": 28.31, "learning_rate": 0.0007871111111111111, "loss": 1.1296, "step": 729 }, { "epoch": 31.46, "learning_rate": 0.000751111111111111, "loss": 1.1084, "step": 810 }, { "epoch": 34.6, "learning_rate": 0.0007151111111111111, "loss": 1.0855, "step": 891 }, { "epoch": 37.75, "learning_rate": 0.0006791111111111111, "loss": 1.0708, "step": 972 }, { "epoch": 40.89, "learning_rate": 0.0006431111111111111, "loss": 1.0536, "step": 1053 }, { "epoch": 44.04, "learning_rate": 0.0006071111111111112, "loss": 1.0359, "step": 1134 }, { "epoch": 47.18, "learning_rate": 0.0005711111111111111, "loss": 1.0246, "step": 1215 }, { "epoch": 50.33, "learning_rate": 0.0005351111111111111, "loss": 1.0132, "step": 1296 }, { "epoch": 53.48, "learning_rate": 0.0004991111111111111, "loss": 1.0013, "step": 1377 }, { "epoch": 56.62, "learning_rate": 0.0004631111111111111, "loss": 0.9878, "step": 1458 }, { "epoch": 59.77, "learning_rate": 0.0004271111111111111, "loss": 0.9766, "step": 1539 }, { "epoch": 62.91, "learning_rate": 0.0003911111111111111, "loss": 0.9643, "step": 1620 }, { "epoch": 66.06, "learning_rate": 0.0003551111111111111, "loss": 0.9538, "step": 1701 }, { "epoch": 69.2, "learning_rate": 0.0003191111111111111, "loss": 0.9486, "step": 1782 }, { "epoch": 72.35, "learning_rate": 0.0002831111111111111, "loss": 0.9382, "step": 1863 }, { "epoch": 75.5, "learning_rate": 0.00024711111111111114, "loss": 0.9255, "step": 1944 }, { "epoch": 78.64, "learning_rate": 0.0002111111111111111, "loss": 0.9153, "step": 2025 }, { "epoch": 81.79, "learning_rate": 0.0001751111111111111, "loss": 0.9069, "step": 2106 }, { "epoch": 84.93, "learning_rate": 0.0001391111111111111, "loss": 0.8996, "step": 2187 }, { "epoch": 88.08, "learning_rate": 0.00010311111111111111, "loss": 0.888, "step": 2268 }, { "epoch": 91.22, "learning_rate": 6.71111111111111e-05, "loss": 0.8791, "step": 2349 }, { "epoch": 94.37, "learning_rate": 3.111111111111111e-05, "loss": 0.8721, "step": 2430 } ], "logging_steps": 81, "max_steps": 2500, "num_train_epochs": 100, "save_steps": 500, "total_flos": 2.7961865832310505e+19, "trial_name": null, "trial_params": null }