{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.991123701605288, "global_step": 164, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 9.999059852242507e-05, "loss": 3.0887, "step": 3 }, { "epoch": 0.07, "learning_rate": 9.98496470583896e-05, "loss": 3.0765, "step": 6 }, { "epoch": 0.11, "learning_rate": 9.954002016824227e-05, "loss": 3.0999, "step": 9 }, { "epoch": 0.15, "learning_rate": 9.906276553136923e-05, "loss": 3.1509, "step": 12 }, { "epoch": 0.18, "learning_rate": 9.84194980263903e-05, "loss": 3.0782, "step": 15 }, { "epoch": 0.22, "learning_rate": 9.761239426692077e-05, "loss": 3.1602, "step": 18 }, { "epoch": 0.25, "learning_rate": 9.664418523660004e-05, "loss": 3.1038, "step": 21 }, { "epoch": 0.29, "learning_rate": 9.551814704830734e-05, "loss": 3.1121, "step": 24 }, { "epoch": 0.33, "learning_rate": 9.423808985883289e-05, "loss": 3.1254, "step": 27 }, { "epoch": 0.36, "learning_rate": 9.280834497651334e-05, "loss": 3.0885, "step": 30 }, { "epoch": 0.4, "learning_rate": 9.123375020545535e-05, "loss": 3.1051, "step": 33 }, { "epoch": 0.44, "learning_rate": 8.951963347593797e-05, "loss": 3.0836, "step": 36 }, { "epoch": 0.47, "learning_rate": 8.767179481638303e-05, "loss": 3.1576, "step": 39 }, { "epoch": 0.51, "learning_rate": 8.569648672789497e-05, "loss": 3.0979, "step": 42 }, { "epoch": 0.54, "learning_rate": 8.360039302777612e-05, "loss": 3.0679, "step": 45 }, { "epoch": 0.58, "learning_rate": 8.139060623360493e-05, "loss": 3.1112, "step": 48 }, { "epoch": 0.62, "learning_rate": 7.907460356440133e-05, "loss": 3.1174, "step": 51 }, { "epoch": 0.65, "learning_rate": 7.666022164008457e-05, "loss": 3.102, "step": 54 }, { "epoch": 0.69, "learning_rate": 7.415562996483192e-05, "loss": 3.1074, "step": 57 }, { "epoch": 0.73, "learning_rate": 7.156930328406268e-05, "loss": 3.1113, "step": 60 }, { "epoch": 0.76, "learning_rate": 6.890999290858214e-05, "loss": 3.0953, "step": 63 }, { "epoch": 0.8, "learning_rate": 6.618669710291606e-05, "loss": 3.1204, "step": 66 }, { "epoch": 0.83, "learning_rate": 6.340863063803188e-05, "loss": 3.1302, "step": 69 }, { "epoch": 0.87, "learning_rate": 6.058519361147055e-05, "loss": 3.0968, "step": 72 }, { "epoch": 0.91, "learning_rate": 5.772593964039203e-05, "loss": 3.0979, "step": 75 }, { "epoch": 0.94, "learning_rate": 5.484054353515896e-05, "loss": 3.0957, "step": 78 }, { "epoch": 0.98, "learning_rate": 5.193876856284085e-05, "loss": 3.1284, "step": 81 }, { "epoch": 1.02, "learning_rate": 4.903043341140879e-05, "loss": 3.8455, "step": 84 }, { "epoch": 1.06, "learning_rate": 4.612537896640346e-05, "loss": 3.0487, "step": 87 }, { "epoch": 1.1, "learning_rate": 4.323343501249346e-05, "loss": 3.0762, "step": 90 }, { "epoch": 1.13, "learning_rate": 4.036438697259551e-05, "loss": 3.1029, "step": 93 }, { "epoch": 1.17, "learning_rate": 3.752794279710094e-05, "loss": 3.0846, "step": 96 }, { "epoch": 1.21, "learning_rate": 3.473370011524435e-05, "loss": 3.0986, "step": 99 }, { "epoch": 1.24, "learning_rate": 3.199111375976449e-05, "loss": 3.1148, "step": 102 }, { "epoch": 1.28, "learning_rate": 2.9309463774743046e-05, "loss": 3.076, "step": 105 }, { "epoch": 1.31, "learning_rate": 2.6697824014873075e-05, "loss": 3.0731, "step": 108 }, { "epoch": 1.35, "learning_rate": 2.4165031442406855e-05, "loss": 3.1045, "step": 111 }, { "epoch": 1.39, "learning_rate": 2.171965622567308e-05, "loss": 3.1026, "step": 114 }, { "epoch": 1.42, "learning_rate": 1.936997274033986e-05, "loss": 3.0352, "step": 117 }, { "epoch": 1.46, "learning_rate": 1.7123931571546827e-05, "loss": 3.0999, "step": 120 }, { "epoch": 1.5, "learning_rate": 1.4989132611641576e-05, "loss": 3.0609, "step": 123 }, { "epoch": 1.53, "learning_rate": 1.297279934454978e-05, "loss": 3.0611, "step": 126 }, { "epoch": 1.57, "learning_rate": 1.1081754403791999e-05, "loss": 3.1195, "step": 129 }, { "epoch": 1.6, "learning_rate": 9.322396486851626e-06, "loss": 3.0566, "step": 132 }, { "epoch": 1.64, "learning_rate": 7.700678704007947e-06, "loss": 3.1333, "step": 135 }, { "epoch": 1.68, "learning_rate": 6.222088434895462e-06, "loss": 3.0838, "step": 138 }, { "epoch": 1.71, "learning_rate": 4.891628760948114e-06, "loss": 3.1032, "step": 141 }, { "epoch": 1.75, "learning_rate": 3.7138015365554833e-06, "loss": 3.0524, "step": 144 }, { "epoch": 1.79, "learning_rate": 2.692592156212487e-06, "loss": 3.0527, "step": 147 }, { "epoch": 1.82, "learning_rate": 1.8314560692059835e-06, "loss": 3.0637, "step": 150 }, { "epoch": 1.86, "learning_rate": 1.1333070874682216e-06, "loss": 3.0875, "step": 153 }, { "epoch": 1.89, "learning_rate": 6.005075261595494e-07, "loss": 3.0881, "step": 156 }, { "epoch": 1.93, "learning_rate": 2.3486021034170857e-07, "loss": 3.0721, "step": 159 }, { "epoch": 1.97, "learning_rate": 3.760237478849793e-08, "loss": 3.0953, "step": 162 }, { "epoch": 1.99, "step": 164, "total_flos": 1.5551382941597696e+18, "train_loss": 3.1085294397865852, "train_runtime": 68661.2713, "train_samples_per_second": 0.308, "train_steps_per_second": 0.002 } ], "max_steps": 164, "num_train_epochs": 2, "total_flos": 1.5551382941597696e+18, "trial_name": null, "trial_params": null }