{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2721, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11025358324145534, "grad_norm": 0.8042088747024536, "learning_rate": 4.816244027930908e-05, "loss": 5.8614, "step": 100 }, { "epoch": 0.2205071664829107, "grad_norm": 0.7716182470321655, "learning_rate": 4.6324880558618154e-05, "loss": 4.914, "step": 200 }, { "epoch": 0.33076074972436603, "grad_norm": 0.7360222935676575, "learning_rate": 4.448732083792723e-05, "loss": 4.8208, "step": 300 }, { "epoch": 0.4410143329658214, "grad_norm": 0.6253624558448792, "learning_rate": 4.264976111723631e-05, "loss": 4.7569, "step": 400 }, { "epoch": 0.5512679162072768, "grad_norm": 0.8106631636619568, "learning_rate": 4.081220139654539e-05, "loss": 4.7186, "step": 500 }, { "epoch": 0.6615214994487321, "grad_norm": 0.7546982765197754, "learning_rate": 3.897464167585447e-05, "loss": 4.7019, "step": 600 }, { "epoch": 0.7717750826901875, "grad_norm": 0.6954119205474854, "learning_rate": 3.713708195516354e-05, "loss": 4.6666, "step": 700 }, { "epoch": 0.8820286659316428, "grad_norm": 0.8203008770942688, "learning_rate": 3.529952223447262e-05, "loss": 4.6249, "step": 800 }, { "epoch": 0.9922822491730982, "grad_norm": 0.6401004791259766, "learning_rate": 3.34619625137817e-05, "loss": 4.6244, "step": 900 }, { "epoch": 1.1025358324145536, "grad_norm": 0.7980089783668518, "learning_rate": 3.162440279309078e-05, "loss": 4.5987, "step": 1000 }, { "epoch": 1.2127894156560088, "grad_norm": 0.6513141393661499, "learning_rate": 2.9786843072399855e-05, "loss": 4.5563, "step": 1100 }, { "epoch": 1.3230429988974641, "grad_norm": 0.7528166770935059, "learning_rate": 2.7949283351708934e-05, "loss": 4.5834, "step": 1200 }, { "epoch": 1.4332965821389196, "grad_norm": 0.8176023960113525, "learning_rate": 2.611172363101801e-05, "loss": 4.5594, "step": 1300 }, { "epoch": 1.543550165380375, "grad_norm": 0.6479560732841492, "learning_rate": 2.4274163910327085e-05, "loss": 4.5616, "step": 1400 }, { "epoch": 1.6538037486218302, "grad_norm": 0.6746203899383545, "learning_rate": 2.2436604189636164e-05, "loss": 4.5177, "step": 1500 }, { "epoch": 1.7640573318632855, "grad_norm": 0.6707162857055664, "learning_rate": 2.059904446894524e-05, "loss": 4.5275, "step": 1600 }, { "epoch": 1.8743109151047408, "grad_norm": 0.6458820700645447, "learning_rate": 1.876148474825432e-05, "loss": 4.5453, "step": 1700 }, { "epoch": 1.9845644983461963, "grad_norm": 0.6533669829368591, "learning_rate": 1.6923925027563398e-05, "loss": 4.5206, "step": 1800 }, { "epoch": 2.0948180815876514, "grad_norm": 0.7627405524253845, "learning_rate": 1.5086365306872474e-05, "loss": 4.5118, "step": 1900 }, { "epoch": 2.205071664829107, "grad_norm": 0.7460177540779114, "learning_rate": 1.3248805586181551e-05, "loss": 4.4961, "step": 2000 }, { "epoch": 2.3153252480705624, "grad_norm": 0.6778430938720703, "learning_rate": 1.1411245865490629e-05, "loss": 4.4962, "step": 2100 }, { "epoch": 2.4255788313120177, "grad_norm": 0.6369034647941589, "learning_rate": 9.573686144799706e-06, "loss": 4.4971, "step": 2200 }, { "epoch": 2.535832414553473, "grad_norm": 0.6314408779144287, "learning_rate": 7.736126424108783e-06, "loss": 4.5085, "step": 2300 }, { "epoch": 2.6460859977949283, "grad_norm": 0.6529719233512878, "learning_rate": 5.8985667034178614e-06, "loss": 4.4746, "step": 2400 }, { "epoch": 2.7563395810363835, "grad_norm": 0.6391235589981079, "learning_rate": 4.061006982726939e-06, "loss": 4.4794, "step": 2500 }, { "epoch": 2.8665931642778393, "grad_norm": 0.8441604971885681, "learning_rate": 2.223447262036016e-06, "loss": 4.4858, "step": 2600 }, { "epoch": 2.9768467475192946, "grad_norm": 0.644396960735321, "learning_rate": 3.858875413450937e-07, "loss": 4.5054, "step": 2700 } ], "logging_steps": 100, "max_steps": 2721, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1472045103710208.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }