{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0001555625, "eval_steps": 500, "global_step": 21000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.995e-05, "loss": 3.1324, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.99e-05, "loss": 2.7883, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.9850000000000006e-05, "loss": 2.576, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.9800000000000004e-05, "loss": 2.5156, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.975e-05, "loss": 2.4018, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.97e-05, "loss": 2.3792, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.965e-05, "loss": 2.3919, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.96e-05, "loss": 2.2942, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.9550000000000005e-05, "loss": 2.2954, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.9500000000000004e-05, "loss": 2.2145, "step": 5000 }, { "epoch": 0.01, "learning_rate": 4.945e-05, "loss": 2.1963, "step": 5500 }, { "epoch": 0.01, "learning_rate": 4.94e-05, "loss": 2.218, "step": 6000 }, { "epoch": 0.01, "learning_rate": 4.935e-05, "loss": 2.1342, "step": 6500 }, { "epoch": 0.01, "learning_rate": 4.93e-05, "loss": 2.1712, "step": 7000 }, { "epoch": 0.01, "learning_rate": 4.9250000000000004e-05, "loss": 2.1653, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.92e-05, "loss": 2.1579, "step": 8000 }, { "epoch": 0.02, "learning_rate": 4.915e-05, "loss": 2.1273, "step": 8500 }, { "epoch": 0.02, "learning_rate": 4.91e-05, "loss": 2.1075, "step": 9000 }, { "epoch": 0.02, "learning_rate": 4.905e-05, "loss": 2.0985, "step": 9500 }, { "epoch": 0.02, "learning_rate": 4.9e-05, "loss": 2.0555, "step": 10000 }, { "epoch": 0.02, "learning_rate": 4.8950000000000004e-05, "loss": 2.0641, "step": 10500 }, { "epoch": 0.02, "learning_rate": 4.89e-05, "loss": 2.062, "step": 11000 }, { "epoch": 0.02, "learning_rate": 4.885e-05, "loss": 2.0178, "step": 11500 }, { "epoch": 0.02, "learning_rate": 4.88e-05, "loss": 2.0227, "step": 12000 }, { "epoch": 0.03, "learning_rate": 4.875e-05, "loss": 2.0456, "step": 12500 }, { "epoch": 0.03, "learning_rate": 4.87e-05, "loss": 1.9856, "step": 13000 }, { "epoch": 0.03, "learning_rate": 4.8650000000000003e-05, "loss": 2.0806, "step": 13500 }, { "epoch": 0.03, "learning_rate": 4.86e-05, "loss": 1.9999, "step": 14000 }, { "epoch": 0.03, "learning_rate": 4.855e-05, "loss": 1.9597, "step": 14500 }, { "epoch": 0.03, "learning_rate": 4.85e-05, "loss": 1.9367, "step": 15000 }, { "epoch": 0.03, "learning_rate": 4.845e-05, "loss": 1.9949, "step": 15500 }, { "epoch": 0.03, "learning_rate": 4.8400000000000004e-05, "loss": 1.9795, "step": 16000 }, { "epoch": 0.03, "learning_rate": 4.835e-05, "loss": 1.9473, "step": 16500 }, { "epoch": 0.03, "learning_rate": 4.83e-05, "loss": 1.949, "step": 17000 }, { "epoch": 0.04, "learning_rate": 4.825e-05, "loss": 1.9626, "step": 17500 }, { "epoch": 0.04, "learning_rate": 4.82e-05, "loss": 1.9295, "step": 18000 }, { "epoch": 0.04, "learning_rate": 4.815e-05, "loss": 1.9425, "step": 18500 }, { "epoch": 0.04, "learning_rate": 4.8100000000000004e-05, "loss": 1.8936, "step": 19000 }, { "epoch": 0.04, "learning_rate": 4.805e-05, "loss": 1.9084, "step": 19500 }, { "epoch": 0.04, "learning_rate": 4.8e-05, "loss": 1.907, "step": 20000 }, { "epoch": 0.04, "learning_rate": 4.795e-05, "loss": 1.8878, "step": 20500 }, { "epoch": 1.0, "learning_rate": 4.79e-05, "loss": 1.9235, "step": 21000 } ], "logging_steps": 500, "max_steps": 500000, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 9.977612193497088e+18, "trial_name": null, "trial_params": null }