{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.008123683074814043, "eval_steps": 400, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.097165991902834e-09, "loss": 3.5463, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.048582995951417e-08, "loss": 3.6591, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.097165991902834e-08, "loss": 3.6377, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.214574898785425e-07, "loss": 3.3634, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.6194331983805668e-07, "loss": 3.069, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.0242914979757083e-07, "loss": 2.6403, "step": 25 }, { "epoch": 0.0, "learning_rate": 2.42914979757085e-07, "loss": 2.1059, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.8340080971659917e-07, "loss": 1.7116, "step": 35 }, { "epoch": 0.0, "learning_rate": 3.2388663967611335e-07, "loss": 1.2154, "step": 40 }, { "epoch": 0.0, "learning_rate": 3.6437246963562754e-07, "loss": 1.1809, "step": 45 }, { "epoch": 0.0, "learning_rate": 4.0485829959514166e-07, "loss": 1.1222, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.4534412955465585e-07, "loss": 1.0215, "step": 55 }, { "epoch": 0.0, "learning_rate": 4.8582995951417e-07, "loss": 0.9725, "step": 60 }, { "epoch": 0.0, "learning_rate": 5.263157894736842e-07, "loss": 0.9506, "step": 65 }, { "epoch": 0.0, "learning_rate": 5.668016194331983e-07, "loss": 0.8981, "step": 70 }, { "epoch": 0.0, "learning_rate": 6.072874493927125e-07, "loss": 0.8955, "step": 75 }, { "epoch": 0.0, "learning_rate": 6.477732793522267e-07, "loss": 0.8819, "step": 80 }, { "epoch": 0.0, "learning_rate": 6.882591093117408e-07, "loss": 0.9384, "step": 85 }, { "epoch": 0.0, "learning_rate": 7.287449392712551e-07, "loss": 0.9053, "step": 90 }, { "epoch": 0.0, "learning_rate": 7.692307692307693e-07, "loss": 0.8439, "step": 95 }, { "epoch": 0.0, "learning_rate": 8.097165991902833e-07, "loss": 0.8176, "step": 100 }, { "epoch": 0.0, "learning_rate": 8.502024291497975e-07, "loss": 0.8403, "step": 105 }, { "epoch": 0.0, "learning_rate": 8.906882591093117e-07, "loss": 0.8365, "step": 110 }, { "epoch": 0.0, "learning_rate": 9.311740890688259e-07, "loss": 0.8354, "step": 115 }, { "epoch": 0.0, "learning_rate": 9.7165991902834e-07, "loss": 0.7901, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.0121457489878542e-06, "loss": 0.8182, "step": 125 }, { "epoch": 0.01, "learning_rate": 1.0526315789473683e-06, "loss": 0.8021, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.0931174089068826e-06, "loss": 0.7999, "step": 135 }, { "epoch": 0.01, "learning_rate": 1.1336032388663967e-06, "loss": 0.7896, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.1740890688259108e-06, "loss": 0.8225, "step": 145 }, { "epoch": 0.01, "learning_rate": 1.214574898785425e-06, "loss": 0.8044, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.2550607287449393e-06, "loss": 0.7872, "step": 155 }, { "epoch": 0.01, "learning_rate": 1.2955465587044534e-06, "loss": 0.826, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.3360323886639675e-06, "loss": 0.772, "step": 165 }, { "epoch": 0.01, "learning_rate": 1.3765182186234816e-06, "loss": 0.7842, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.4170040485829959e-06, "loss": 0.7833, "step": 175 }, { "epoch": 0.01, "learning_rate": 1.4574898785425101e-06, "loss": 0.7697, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.4979757085020242e-06, "loss": 0.7515, "step": 185 }, { "epoch": 0.01, "learning_rate": 1.5384615384615385e-06, "loss": 0.7914, "step": 190 }, { "epoch": 0.01, "learning_rate": 1.5789473684210526e-06, "loss": 0.7729, "step": 195 }, { "epoch": 0.01, "learning_rate": 1.6194331983805667e-06, "loss": 0.7222, "step": 200 } ], "logging_steps": 5, "max_steps": 24619, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "total_flos": 27833727655936.0, "trial_name": null, "trial_params": null }