{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.925743170056565, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9.759268175252769e-05, "loss": 9.8702, "step": 100 }, { "epoch": 0.1, "learning_rate": 9.518536350505537e-05, "loss": 5.0407, "step": 200 }, { "epoch": 0.14, "learning_rate": 9.277804525758305e-05, "loss": 3.9664, "step": 300 }, { "epoch": 0.19, "learning_rate": 9.037072701011074e-05, "loss": 3.6556, "step": 400 }, { "epoch": 0.24, "learning_rate": 8.796340876263843e-05, "loss": 3.4608, "step": 500 }, { "epoch": 0.29, "learning_rate": 8.555609051516611e-05, "loss": 3.3129, "step": 600 }, { "epoch": 0.34, "learning_rate": 8.31487722676938e-05, "loss": 3.2072, "step": 700 }, { "epoch": 0.39, "learning_rate": 8.074145402022148e-05, "loss": 3.1383, "step": 800 }, { "epoch": 0.43, "learning_rate": 7.833413577274916e-05, "loss": 3.0606, "step": 900 }, { "epoch": 0.48, "learning_rate": 7.592681752527685e-05, "loss": 2.9994, "step": 1000 }, { "epoch": 0.53, "learning_rate": 7.351949927780452e-05, "loss": 2.9714, "step": 1100 }, { "epoch": 0.58, "learning_rate": 7.111218103033221e-05, "loss": 2.9432, "step": 1200 }, { "epoch": 0.63, "learning_rate": 6.87048627828599e-05, "loss": 2.8978, "step": 1300 }, { "epoch": 0.67, "learning_rate": 6.629754453538758e-05, "loss": 2.8998, "step": 1400 }, { "epoch": 0.72, "learning_rate": 6.389022628791527e-05, "loss": 2.8704, "step": 1500 }, { "epoch": 0.77, "learning_rate": 6.148290804044296e-05, "loss": 2.8642, "step": 1600 }, { "epoch": 0.82, "learning_rate": 5.907558979297063e-05, "loss": 2.832, "step": 1700 }, { "epoch": 0.87, "learning_rate": 5.666827154549832e-05, "loss": 2.8027, "step": 1800 }, { "epoch": 0.91, "learning_rate": 5.426095329802601e-05, "loss": 2.7968, "step": 1900 }, { "epoch": 0.96, "learning_rate": 5.185363505055368e-05, "loss": 2.7954, "step": 2000 }, { "epoch": 1.0, "eval_loss": 2.46354079246521, "eval_runtime": 2.8271, "eval_samples_per_second": 353.714, "eval_steps_per_second": 44.214, "step": 2077 }, { "epoch": 1.01, "learning_rate": 4.9446316803081375e-05, "loss": 2.7673, "step": 2100 }, { "epoch": 1.06, "learning_rate": 4.7038998555609055e-05, "loss": 2.7401, "step": 2200 }, { "epoch": 1.11, "learning_rate": 4.4631680308136736e-05, "loss": 2.7456, "step": 2300 }, { "epoch": 1.16, "learning_rate": 4.222436206066442e-05, "loss": 2.742, "step": 2400 }, { "epoch": 1.2, "learning_rate": 3.98170438131921e-05, "loss": 2.7218, "step": 2500 }, { "epoch": 1.25, "learning_rate": 3.740972556571979e-05, "loss": 2.7248, "step": 2600 }, { "epoch": 1.3, "learning_rate": 3.500240731824748e-05, "loss": 2.7145, "step": 2700 }, { "epoch": 1.35, "learning_rate": 3.259508907077516e-05, "loss": 2.7046, "step": 2800 }, { "epoch": 1.4, "learning_rate": 3.0187770823302842e-05, "loss": 2.7095, "step": 2900 }, { "epoch": 1.44, "learning_rate": 2.7780452575830522e-05, "loss": 2.7005, "step": 3000 }, { "epoch": 1.49, "learning_rate": 2.537313432835821e-05, "loss": 2.6941, "step": 3100 }, { "epoch": 1.54, "learning_rate": 2.2965816080885893e-05, "loss": 2.6727, "step": 3200 }, { "epoch": 1.59, "learning_rate": 2.0558497833413577e-05, "loss": 2.6959, "step": 3300 }, { "epoch": 1.64, "learning_rate": 1.8151179585941264e-05, "loss": 2.6839, "step": 3400 }, { "epoch": 1.69, "learning_rate": 1.5743861338468945e-05, "loss": 2.6781, "step": 3500 }, { "epoch": 1.73, "learning_rate": 1.333654309099663e-05, "loss": 2.6732, "step": 3600 }, { "epoch": 1.78, "learning_rate": 1.0929224843524314e-05, "loss": 2.6776, "step": 3700 }, { "epoch": 1.83, "learning_rate": 8.521906596051998e-06, "loss": 2.673, "step": 3800 }, { "epoch": 1.88, "learning_rate": 6.114588348579683e-06, "loss": 2.6832, "step": 3900 }, { "epoch": 1.93, "learning_rate": 3.7072701011073664e-06, "loss": 2.6659, "step": 4000 } ], "max_steps": 4154, "num_train_epochs": 2, "total_flos": 2.1297995361473126e+17, "trial_name": null, "trial_params": null }