{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1970831690973591, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.999997870262096e-05, "loss": 2.714, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.999991481057455e-05, "loss": 2.7679, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.9999808324132915e-05, "loss": 2.6887, "step": 15 }, { "epoch": 0.01, "learning_rate": 1.999965924374964e-05, "loss": 2.6986, "step": 20 }, { "epoch": 0.01, "learning_rate": 1.999946757005972e-05, "loss": 2.7536, "step": 25 }, { "epoch": 0.01, "learning_rate": 1.9999233303879592e-05, "loss": 2.7516, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.99989564462071e-05, "loss": 2.6283, "step": 35 }, { "epoch": 0.02, "learning_rate": 1.999863699822152e-05, "loss": 2.708, "step": 40 }, { "epoch": 0.02, "learning_rate": 1.9998274961283523e-05, "loss": 2.6932, "step": 45 }, { "epoch": 0.02, "learning_rate": 1.9997870336935207e-05, "loss": 2.6321, "step": 50 }, { "epoch": 0.02, "learning_rate": 1.9997423126900056e-05, "loss": 2.7129, "step": 55 }, { "epoch": 0.02, "learning_rate": 1.9996933333082945e-05, "loss": 2.6179, "step": 60 }, { "epoch": 0.03, "learning_rate": 1.9996400957570148e-05, "loss": 2.6052, "step": 65 }, { "epoch": 0.03, "learning_rate": 1.99958260026293e-05, "loss": 2.7054, "step": 70 }, { "epoch": 0.03, "learning_rate": 1.9995208470709405e-05, "loss": 2.6568, "step": 75 }, { "epoch": 0.03, "learning_rate": 1.9994548364440836e-05, "loss": 2.6436, "step": 80 }, { "epoch": 0.03, "learning_rate": 1.999384568663529e-05, "loss": 2.7091, "step": 85 }, { "epoch": 0.04, "learning_rate": 1.9993100440285805e-05, "loss": 2.5557, "step": 90 }, { "epoch": 0.04, "learning_rate": 1.999231262856675e-05, "loss": 2.5945, "step": 95 }, { "epoch": 0.04, "learning_rate": 1.999148225483378e-05, "loss": 2.4729, "step": 100 }, { "epoch": 0.04, "learning_rate": 1.9990609322623854e-05, "loss": 2.6724, "step": 105 }, { "epoch": 0.04, "learning_rate": 1.9989693835655205e-05, "loss": 2.5272, "step": 110 }, { "epoch": 0.05, "learning_rate": 1.9988735797827336e-05, "loss": 2.6145, "step": 115 }, { "epoch": 0.05, "learning_rate": 1.9987735213220975e-05, "loss": 2.6772, "step": 120 }, { "epoch": 0.05, "learning_rate": 1.9986692086098095e-05, "loss": 2.6388, "step": 125 }, { "epoch": 0.05, "learning_rate": 1.998560642090187e-05, "loss": 2.4973, "step": 130 }, { "epoch": 0.05, "learning_rate": 1.998447822225666e-05, "loss": 2.5338, "step": 135 }, { "epoch": 0.06, "learning_rate": 1.9983307494968e-05, "loss": 2.6469, "step": 140 }, { "epoch": 0.06, "learning_rate": 1.9982094244022582e-05, "loss": 2.6008, "step": 145 }, { "epoch": 0.06, "learning_rate": 1.9980838474588214e-05, "loss": 2.5936, "step": 150 }, { "epoch": 0.06, "learning_rate": 1.9979540192013814e-05, "loss": 2.5862, "step": 155 }, { "epoch": 0.06, "learning_rate": 1.997819940182939e-05, "loss": 2.568, "step": 160 }, { "epoch": 0.07, "learning_rate": 1.9976816109746e-05, "loss": 2.5343, "step": 165 }, { "epoch": 0.07, "learning_rate": 1.9975390321655745e-05, "loss": 2.5575, "step": 170 }, { "epoch": 0.07, "learning_rate": 1.9973922043631737e-05, "loss": 2.5399, "step": 175 }, { "epoch": 0.07, "learning_rate": 1.9972411281928068e-05, "loss": 2.6394, "step": 180 }, { "epoch": 0.07, "learning_rate": 1.9970858042979794e-05, "loss": 2.5378, "step": 185 }, { "epoch": 0.07, "learning_rate": 1.9969262333402893e-05, "loss": 2.5373, "step": 190 }, { "epoch": 0.08, "learning_rate": 1.9967624159994262e-05, "loss": 2.5414, "step": 195 }, { "epoch": 0.08, "learning_rate": 1.9965943529731646e-05, "loss": 2.5606, "step": 200 }, { "epoch": 0.08, "learning_rate": 1.9964220449773664e-05, "loss": 2.5154, "step": 205 }, { "epoch": 0.08, "learning_rate": 1.9962454927459723e-05, "loss": 2.5468, "step": 210 }, { "epoch": 0.08, "learning_rate": 1.9960646970310027e-05, "loss": 2.5137, "step": 215 }, { "epoch": 0.09, "learning_rate": 1.9958796586025527e-05, "loss": 2.5453, "step": 220 }, { "epoch": 0.09, "learning_rate": 1.9956903782487885e-05, "loss": 2.5325, "step": 225 }, { "epoch": 0.09, "learning_rate": 1.9954968567759456e-05, "loss": 2.5054, "step": 230 }, { "epoch": 0.09, "learning_rate": 1.9952990950083236e-05, "loss": 2.5139, "step": 235 }, { "epoch": 0.09, "learning_rate": 1.995097093788285e-05, "loss": 2.5097, "step": 240 }, { "epoch": 0.1, "learning_rate": 1.994890853976248e-05, "loss": 2.5546, "step": 245 }, { "epoch": 0.1, "learning_rate": 1.994680376450686e-05, "loss": 2.4609, "step": 250 }, { "epoch": 0.1, "learning_rate": 1.994465662108124e-05, "loss": 2.5063, "step": 255 }, { "epoch": 0.1, "learning_rate": 1.9942467118631322e-05, "loss": 2.5162, "step": 260 }, { "epoch": 0.1, "learning_rate": 1.994023526648323e-05, "loss": 2.5317, "step": 265 }, { "epoch": 0.11, "learning_rate": 1.9937961074143492e-05, "loss": 2.4621, "step": 270 }, { "epoch": 0.11, "learning_rate": 1.9935644551298976e-05, "loss": 2.5879, "step": 275 }, { "epoch": 0.11, "learning_rate": 1.993328570781685e-05, "loss": 2.4742, "step": 280 }, { "epoch": 0.11, "learning_rate": 1.993088455374456e-05, "loss": 2.4661, "step": 285 }, { "epoch": 0.11, "learning_rate": 1.992844109930975e-05, "loss": 2.4943, "step": 290 }, { "epoch": 0.12, "learning_rate": 1.9925955354920265e-05, "loss": 2.4322, "step": 295 }, { "epoch": 0.12, "learning_rate": 1.9923427331164072e-05, "loss": 2.484, "step": 300 }, { "epoch": 0.12, "learning_rate": 1.9920857038809223e-05, "loss": 2.5334, "step": 305 }, { "epoch": 0.12, "learning_rate": 1.991824448880382e-05, "loss": 2.6554, "step": 310 }, { "epoch": 0.12, "learning_rate": 1.9915589692275955e-05, "loss": 2.4749, "step": 315 }, { "epoch": 0.13, "learning_rate": 1.991289266053367e-05, "loss": 2.5076, "step": 320 }, { "epoch": 0.13, "learning_rate": 1.9910153405064904e-05, "loss": 2.4635, "step": 325 }, { "epoch": 0.13, "learning_rate": 1.990737193753745e-05, "loss": 2.5443, "step": 330 }, { "epoch": 0.13, "learning_rate": 1.9904548269798906e-05, "loss": 2.6012, "step": 335 }, { "epoch": 0.13, "learning_rate": 1.990168241387662e-05, "loss": 2.4818, "step": 340 }, { "epoch": 0.14, "learning_rate": 1.9898774381977618e-05, "loss": 2.4717, "step": 345 }, { "epoch": 0.14, "learning_rate": 1.989582418648861e-05, "loss": 2.4756, "step": 350 }, { "epoch": 0.14, "learning_rate": 1.9892831839975874e-05, "loss": 2.4868, "step": 355 }, { "epoch": 0.14, "learning_rate": 1.9889797355185237e-05, "loss": 2.5652, "step": 360 }, { "epoch": 0.14, "learning_rate": 1.9886720745042017e-05, "loss": 2.3907, "step": 365 }, { "epoch": 0.15, "learning_rate": 1.988360202265096e-05, "loss": 2.4984, "step": 370 }, { "epoch": 0.15, "learning_rate": 1.9880441201296186e-05, "loss": 2.5654, "step": 375 }, { "epoch": 0.15, "learning_rate": 1.987723829444114e-05, "loss": 2.581, "step": 380 }, { "epoch": 0.15, "learning_rate": 1.9873993315728523e-05, "loss": 2.5189, "step": 385 }, { "epoch": 0.15, "learning_rate": 1.987070627898025e-05, "loss": 2.491, "step": 390 }, { "epoch": 0.16, "learning_rate": 1.9867377198197367e-05, "loss": 2.4875, "step": 395 }, { "epoch": 0.16, "learning_rate": 1.9864006087560016e-05, "loss": 2.4828, "step": 400 }, { "epoch": 0.16, "learning_rate": 1.9860592961427358e-05, "loss": 2.5298, "step": 405 }, { "epoch": 0.16, "learning_rate": 1.9857137834337527e-05, "loss": 2.4873, "step": 410 }, { "epoch": 0.16, "learning_rate": 1.985364072100755e-05, "loss": 2.5371, "step": 415 }, { "epoch": 0.17, "learning_rate": 1.98501016363333e-05, "loss": 2.4167, "step": 420 }, { "epoch": 0.17, "learning_rate": 1.9846520595389415e-05, "loss": 2.4312, "step": 425 }, { "epoch": 0.17, "learning_rate": 1.984289761342926e-05, "loss": 2.4471, "step": 430 }, { "epoch": 0.17, "learning_rate": 1.9839232705884836e-05, "loss": 2.4971, "step": 435 }, { "epoch": 0.17, "learning_rate": 1.9835525888366727e-05, "loss": 2.5206, "step": 440 }, { "epoch": 0.18, "learning_rate": 1.9831777176664035e-05, "loss": 2.5067, "step": 445 }, { "epoch": 0.18, "learning_rate": 1.9827986586744302e-05, "loss": 2.4903, "step": 450 }, { "epoch": 0.18, "learning_rate": 1.982415413475346e-05, "loss": 2.4391, "step": 455 }, { "epoch": 0.18, "learning_rate": 1.9820279837015742e-05, "loss": 2.4358, "step": 460 }, { "epoch": 0.18, "learning_rate": 1.981636371003363e-05, "loss": 2.4809, "step": 465 }, { "epoch": 0.19, "learning_rate": 1.9812405770487763e-05, "loss": 2.516, "step": 470 }, { "epoch": 0.19, "learning_rate": 1.9808406035236897e-05, "loss": 2.4091, "step": 475 }, { "epoch": 0.19, "learning_rate": 1.9804364521317806e-05, "loss": 2.5363, "step": 480 }, { "epoch": 0.19, "learning_rate": 1.9800281245945217e-05, "loss": 2.51, "step": 485 }, { "epoch": 0.19, "learning_rate": 1.9796156226511747e-05, "loss": 2.5049, "step": 490 }, { "epoch": 0.2, "learning_rate": 1.9791989480587815e-05, "loss": 2.4722, "step": 495 }, { "epoch": 0.2, "learning_rate": 1.978778102592157e-05, "loss": 2.4111, "step": 500 } ], "logging_steps": 5, "max_steps": 7611, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 6744791407656960.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }