{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 32919, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 4.92405601628239e-05, "loss": 3.2169, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.848112032564781e-05, "loss": 2.872, "step": 1000 }, { "epoch": 0.14, "learning_rate": 4.77216804884717e-05, "loss": 2.6524, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.6962240651295605e-05, "loss": 2.5352, "step": 2000 }, { "epoch": 0.23, "learning_rate": 4.620280081411951e-05, "loss": 2.4293, "step": 2500 }, { "epoch": 0.27, "learning_rate": 4.544336097694341e-05, "loss": 2.3569, "step": 3000 }, { "epoch": 0.32, "learning_rate": 4.468392113976731e-05, "loss": 2.2906, "step": 3500 }, { "epoch": 0.36, "learning_rate": 4.392448130259121e-05, "loss": 2.2189, "step": 4000 }, { "epoch": 0.41, "learning_rate": 4.316504146541511e-05, "loss": 2.1845, "step": 4500 }, { "epoch": 0.46, "learning_rate": 4.240560162823901e-05, "loss": 2.1222, "step": 5000 }, { "epoch": 0.5, "learning_rate": 4.1646161791062915e-05, "loss": 2.0704, "step": 5500 }, { "epoch": 0.55, "learning_rate": 4.088672195388681e-05, "loss": 2.0429, "step": 6000 }, { "epoch": 0.59, "learning_rate": 4.012728211671072e-05, "loss": 1.9951, "step": 6500 }, { "epoch": 0.64, "learning_rate": 3.9367842279534616e-05, "loss": 1.9488, "step": 7000 }, { "epoch": 0.68, "learning_rate": 3.860840244235852e-05, "loss": 1.9073, "step": 7500 }, { "epoch": 0.73, "learning_rate": 3.784896260518242e-05, "loss": 1.8649, "step": 8000 }, { "epoch": 0.77, "learning_rate": 3.7089522768006316e-05, "loss": 1.8362, "step": 8500 }, { "epoch": 0.82, "learning_rate": 3.6330082930830225e-05, "loss": 1.8117, "step": 9000 }, { "epoch": 0.87, "learning_rate": 3.557064309365412e-05, "loss": 1.7883, "step": 9500 }, { "epoch": 0.91, "learning_rate": 3.4811203256478024e-05, "loss": 1.7724, "step": 10000 }, { "epoch": 0.96, "learning_rate": 3.4051763419301926e-05, "loss": 1.7215, "step": 10500 }, { "epoch": 1.0, "learning_rate": 3.329232358212583e-05, "loss": 1.671, "step": 11000 }, { "epoch": 1.05, "learning_rate": 3.2532883744949724e-05, "loss": 1.542, "step": 11500 }, { "epoch": 1.09, "learning_rate": 3.1773443907773626e-05, "loss": 1.5211, "step": 12000 }, { "epoch": 1.14, "learning_rate": 3.101400407059753e-05, "loss": 1.537, "step": 12500 }, { "epoch": 1.18, "learning_rate": 3.0254564233421428e-05, "loss": 1.519, "step": 13000 }, { "epoch": 1.23, "learning_rate": 2.9495124396245334e-05, "loss": 1.4805, "step": 13500 }, { "epoch": 1.28, "learning_rate": 2.8735684559069233e-05, "loss": 1.467, "step": 14000 }, { "epoch": 1.32, "learning_rate": 2.7976244721893135e-05, "loss": 1.4591, "step": 14500 }, { "epoch": 1.37, "learning_rate": 2.7216804884717034e-05, "loss": 1.4131, "step": 15000 }, { "epoch": 1.41, "learning_rate": 2.6457365047540933e-05, "loss": 1.4217, "step": 15500 }, { "epoch": 1.46, "learning_rate": 2.5697925210364836e-05, "loss": 1.3952, "step": 16000 }, { "epoch": 1.5, "learning_rate": 2.4938485373188738e-05, "loss": 1.3792, "step": 16500 }, { "epoch": 1.55, "learning_rate": 2.4179045536012637e-05, "loss": 1.3583, "step": 17000 }, { "epoch": 1.59, "learning_rate": 2.341960569883654e-05, "loss": 1.3674, "step": 17500 }, { "epoch": 1.64, "learning_rate": 2.266016586166044e-05, "loss": 1.3277, "step": 18000 }, { "epoch": 1.69, "learning_rate": 2.190072602448434e-05, "loss": 1.2849, "step": 18500 }, { "epoch": 1.73, "learning_rate": 2.1141286187308243e-05, "loss": 1.2953, "step": 19000 }, { "epoch": 1.78, "learning_rate": 2.0381846350132146e-05, "loss": 1.2479, "step": 19500 }, { "epoch": 1.82, "learning_rate": 1.9622406512956045e-05, "loss": 1.2357, "step": 20000 }, { "epoch": 1.87, "learning_rate": 1.8862966675779944e-05, "loss": 1.2418, "step": 20500 }, { "epoch": 1.91, "learning_rate": 1.8103526838603846e-05, "loss": 1.2233, "step": 21000 }, { "epoch": 1.96, "learning_rate": 1.734408700142775e-05, "loss": 1.1791, "step": 21500 }, { "epoch": 2.0, "learning_rate": 1.6584647164251648e-05, "loss": 1.1664, "step": 22000 }, { "epoch": 2.05, "learning_rate": 1.582520732707555e-05, "loss": 0.9787, "step": 22500 }, { "epoch": 2.1, "learning_rate": 1.5065767489899452e-05, "loss": 0.9935, "step": 23000 }, { "epoch": 2.14, "learning_rate": 1.4306327652723353e-05, "loss": 0.9884, "step": 23500 }, { "epoch": 2.19, "learning_rate": 1.3546887815547252e-05, "loss": 0.9676, "step": 24000 }, { "epoch": 2.23, "learning_rate": 1.2787447978371153e-05, "loss": 0.9653, "step": 24500 }, { "epoch": 2.28, "learning_rate": 1.2028008141195055e-05, "loss": 0.9492, "step": 25000 }, { "epoch": 2.32, "learning_rate": 1.1268568304018956e-05, "loss": 0.9345, "step": 25500 }, { "epoch": 2.37, "learning_rate": 1.0509128466842857e-05, "loss": 0.9182, "step": 26000 }, { "epoch": 2.42, "learning_rate": 9.749688629666759e-06, "loss": 0.9127, "step": 26500 }, { "epoch": 2.46, "learning_rate": 8.990248792490658e-06, "loss": 0.9039, "step": 27000 }, { "epoch": 2.51, "learning_rate": 8.23080895531456e-06, "loss": 0.8962, "step": 27500 }, { "epoch": 2.55, "learning_rate": 7.471369118138462e-06, "loss": 0.8984, "step": 28000 }, { "epoch": 2.6, "learning_rate": 6.711929280962362e-06, "loss": 0.8718, "step": 28500 }, { "epoch": 2.64, "learning_rate": 5.9524894437862635e-06, "loss": 0.8662, "step": 29000 }, { "epoch": 2.69, "learning_rate": 5.193049606610165e-06, "loss": 0.864, "step": 29500 }, { "epoch": 2.73, "learning_rate": 4.433609769434066e-06, "loss": 0.8497, "step": 30000 }, { "epoch": 2.78, "learning_rate": 3.674169932257967e-06, "loss": 0.8242, "step": 30500 }, { "epoch": 2.83, "learning_rate": 2.9147300950818677e-06, "loss": 0.8302, "step": 31000 }, { "epoch": 2.87, "learning_rate": 2.155290257905769e-06, "loss": 0.7988, "step": 31500 }, { "epoch": 2.92, "learning_rate": 1.3958504207296697e-06, "loss": 0.8169, "step": 32000 }, { "epoch": 2.96, "learning_rate": 6.364105835535709e-07, "loss": 0.8069, "step": 32500 }, { "epoch": 3.0, "step": 32919, "total_flos": 1.4064216261264e+17, "train_loss": 1.4714843301307519, "train_runtime": 14279.912, "train_samples_per_second": 73.765, "train_steps_per_second": 2.305 } ], "max_steps": 32919, "num_train_epochs": 3, "total_flos": 1.4064216261264e+17, "trial_name": null, "trial_params": null }