{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 18598, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 9.731153887514787e-06, "loss": 1.8656, "step": 500 }, { "epoch": 0.11, "learning_rate": 9.462307775029573e-06, "loss": 1.8079, "step": 1000 }, { "epoch": 0.16, "learning_rate": 9.193461662544361e-06, "loss": 1.7757, "step": 1500 }, { "epoch": 0.22, "learning_rate": 8.924615550059147e-06, "loss": 1.7549, "step": 2000 }, { "epoch": 0.27, "learning_rate": 8.655769437573934e-06, "loss": 1.7183, "step": 2500 }, { "epoch": 0.32, "learning_rate": 8.38692332508872e-06, "loss": 1.7325, "step": 3000 }, { "epoch": 0.38, "learning_rate": 8.118077212603506e-06, "loss": 1.7002, "step": 3500 }, { "epoch": 0.43, "learning_rate": 7.849231100118292e-06, "loss": 1.7217, "step": 4000 }, { "epoch": 0.48, "learning_rate": 7.580384987633079e-06, "loss": 1.7195, "step": 4500 }, { "epoch": 0.54, "learning_rate": 7.311538875147866e-06, "loss": 1.6911, "step": 5000 }, { "epoch": 0.59, "learning_rate": 7.042692762662653e-06, "loss": 1.6482, "step": 5500 }, { "epoch": 0.65, "learning_rate": 6.773846650177439e-06, "loss": 1.6911, "step": 6000 }, { "epoch": 0.7, "learning_rate": 6.505000537692225e-06, "loss": 1.6361, "step": 6500 }, { "epoch": 0.75, "learning_rate": 6.236154425207011e-06, "loss": 1.6181, "step": 7000 }, { "epoch": 0.81, "learning_rate": 5.967308312721799e-06, "loss": 1.6496, "step": 7500 }, { "epoch": 0.86, "learning_rate": 5.6984622002365855e-06, "loss": 1.6331, "step": 8000 }, { "epoch": 0.91, "learning_rate": 5.429616087751372e-06, "loss": 1.6057, "step": 8500 }, { "epoch": 0.97, "learning_rate": 5.160769975266158e-06, "loss": 1.6225, "step": 9000 }, { "epoch": 1.02, "learning_rate": 4.891923862780945e-06, "loss": 1.5955, "step": 9500 }, { "epoch": 1.08, "learning_rate": 4.623077750295731e-06, "loss": 1.528, "step": 10000 }, { "epoch": 1.13, "learning_rate": 4.354231637810517e-06, "loss": 1.5348, "step": 10500 }, { "epoch": 1.18, "learning_rate": 4.085385525325304e-06, "loss": 1.5142, "step": 11000 }, { "epoch": 1.24, "learning_rate": 3.816539412840091e-06, "loss": 1.4924, "step": 11500 }, { "epoch": 1.29, "learning_rate": 3.547693300354877e-06, "loss": 1.5045, "step": 12000 }, { "epoch": 1.34, "learning_rate": 3.2788471878696636e-06, "loss": 1.4959, "step": 12500 }, { "epoch": 1.4, "learning_rate": 3.01000107538445e-06, "loss": 1.5248, "step": 13000 }, { "epoch": 1.45, "learning_rate": 2.741154962899237e-06, "loss": 1.4984, "step": 13500 }, { "epoch": 1.51, "learning_rate": 2.4723088504140235e-06, "loss": 1.5079, "step": 14000 }, { "epoch": 1.56, "learning_rate": 2.2034627379288097e-06, "loss": 1.5105, "step": 14500 }, { "epoch": 1.61, "learning_rate": 1.9346166254435964e-06, "loss": 1.5058, "step": 15000 }, { "epoch": 1.67, "learning_rate": 1.6657705129583828e-06, "loss": 1.4941, "step": 15500 }, { "epoch": 1.72, "learning_rate": 1.3969244004731695e-06, "loss": 1.5275, "step": 16000 }, { "epoch": 1.77, "learning_rate": 1.1280782879879559e-06, "loss": 1.5091, "step": 16500 }, { "epoch": 1.83, "learning_rate": 8.592321755027423e-07, "loss": 1.4995, "step": 17000 }, { "epoch": 1.88, "learning_rate": 5.903860630175289e-07, "loss": 1.5139, "step": 17500 }, { "epoch": 1.94, "learning_rate": 3.215399505323153e-07, "loss": 1.4951, "step": 18000 }, { "epoch": 1.99, "learning_rate": 5.2693838047101844e-08, "loss": 1.478, "step": 18500 }, { "epoch": 2.0, "step": 18598, "total_flos": 2.094047285686272e+16, "train_loss": 1.6027834024951335, "train_runtime": 7360.333, "train_samples_per_second": 7.58, "train_steps_per_second": 2.527 } ], "logging_steps": 500, "max_steps": 18598, "num_train_epochs": 2, "save_steps": 4000, "total_flos": 2.094047285686272e+16, "trial_name": null, "trial_params": null }