{ "best_metric": 0.2851985559566787, "best_model_checkpoint": "results/checkpoint-7944", "epoch": 4.0, "global_step": 7944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1.96012084592145e-05, "loss": 2.8915, "step": 198 }, { "epoch": 0.2, "learning_rate": 1.9202416918429007e-05, "loss": 2.4239, "step": 396 }, { "epoch": 0.3, "learning_rate": 1.8803625377643506e-05, "loss": 2.2481, "step": 594 }, { "epoch": 0.4, "learning_rate": 1.840483383685801e-05, "loss": 2.1996, "step": 792 }, { "epoch": 0.5, "learning_rate": 1.800604229607251e-05, "loss": 2.1824, "step": 990 }, { "epoch": 0.6, "learning_rate": 1.760725075528701e-05, "loss": 2.0517, "step": 1188 }, { "epoch": 0.7, "learning_rate": 1.7208459214501513e-05, "loss": 2.0375, "step": 1386 }, { "epoch": 0.8, "learning_rate": 1.6809667673716012e-05, "loss": 2.0427, "step": 1584 }, { "epoch": 0.9, "learning_rate": 1.6410876132930515e-05, "loss": 1.967, "step": 1782 }, { "epoch": 1.0, "learning_rate": 1.6012084592145018e-05, "loss": 1.982, "step": 1980 }, { "epoch": 1.0, "eval_accuracy": 0.2740715098165274, "eval_loss": 2.498654365539551, "eval_runtime": 49.553, "eval_samples_per_second": 408.068, "eval_steps_per_second": 40.825, "step": 1986 }, { "epoch": 1.1, "learning_rate": 1.5613293051359517e-05, "loss": 1.8129, "step": 2178 }, { "epoch": 1.2, "learning_rate": 1.521450151057402e-05, "loss": 1.7615, "step": 2376 }, { "epoch": 1.3, "learning_rate": 1.481570996978852e-05, "loss": 1.82, "step": 2574 }, { "epoch": 1.4, "learning_rate": 1.4416918429003021e-05, "loss": 1.7927, "step": 2772 }, { "epoch": 1.5, "learning_rate": 1.4018126888217525e-05, "loss": 1.7828, "step": 2970 }, { "epoch": 1.6, "learning_rate": 1.3619335347432026e-05, "loss": 1.7809, "step": 3168 }, { "epoch": 1.69, "learning_rate": 1.3220543806646526e-05, "loss": 1.7006, "step": 3366 }, { "epoch": 1.79, "learning_rate": 1.2821752265861027e-05, "loss": 1.7561, "step": 3564 }, { "epoch": 1.89, "learning_rate": 1.242296072507553e-05, "loss": 1.7157, "step": 3762 }, { "epoch": 1.99, "learning_rate": 1.2024169184290032e-05, "loss": 1.7302, "step": 3960 }, { "epoch": 2.0, "eval_accuracy": 0.27822560704218385, "eval_loss": 2.486177444458008, "eval_runtime": 49.6405, "eval_samples_per_second": 407.349, "eval_steps_per_second": 40.753, "step": 3972 }, { "epoch": 2.09, "learning_rate": 1.1625377643504531e-05, "loss": 1.5066, "step": 4158 }, { "epoch": 2.19, "learning_rate": 1.1226586102719035e-05, "loss": 1.5125, "step": 4356 }, { "epoch": 2.29, "learning_rate": 1.0827794561933536e-05, "loss": 1.5064, "step": 4554 }, { "epoch": 2.39, "learning_rate": 1.0429003021148037e-05, "loss": 1.5471, "step": 4752 }, { "epoch": 2.49, "learning_rate": 1.003021148036254e-05, "loss": 1.5293, "step": 4950 }, { "epoch": 2.59, "learning_rate": 9.63141993957704e-06, "loss": 1.5216, "step": 5148 }, { "epoch": 2.69, "learning_rate": 9.232628398791542e-06, "loss": 1.6028, "step": 5346 }, { "epoch": 2.79, "learning_rate": 8.833836858006042e-06, "loss": 1.5475, "step": 5544 }, { "epoch": 2.89, "learning_rate": 8.435045317220545e-06, "loss": 1.506, "step": 5742 }, { "epoch": 2.99, "learning_rate": 8.036253776435046e-06, "loss": 1.5518, "step": 5940 }, { "epoch": 3.0, "eval_accuracy": 0.2739231492013254, "eval_loss": 2.57331919670105, "eval_runtime": 49.6582, "eval_samples_per_second": 407.204, "eval_steps_per_second": 40.739, "step": 5958 }, { "epoch": 3.09, "learning_rate": 7.637462235649547e-06, "loss": 1.3584, "step": 6138 }, { "epoch": 3.19, "learning_rate": 7.238670694864049e-06, "loss": 1.3197, "step": 6336 }, { "epoch": 3.29, "learning_rate": 6.83987915407855e-06, "loss": 1.3561, "step": 6534 }, { "epoch": 3.39, "learning_rate": 6.441087613293052e-06, "loss": 1.3575, "step": 6732 }, { "epoch": 3.49, "learning_rate": 6.042296072507553e-06, "loss": 1.3798, "step": 6930 }, { "epoch": 3.59, "learning_rate": 5.643504531722055e-06, "loss": 1.355, "step": 7128 }, { "epoch": 3.69, "learning_rate": 5.2447129909365566e-06, "loss": 1.3312, "step": 7326 }, { "epoch": 3.79, "learning_rate": 4.8459214501510575e-06, "loss": 1.4011, "step": 7524 }, { "epoch": 3.89, "learning_rate": 4.447129909365559e-06, "loss": 1.4124, "step": 7722 }, { "epoch": 3.99, "learning_rate": 4.048338368580061e-06, "loss": 1.3597, "step": 7920 }, { "epoch": 4.0, "eval_accuracy": 0.2851985559566787, "eval_loss": 2.571429491043091, "eval_runtime": 49.4476, "eval_samples_per_second": 408.938, "eval_steps_per_second": 40.912, "step": 7944 } ], "max_steps": 9930, "num_train_epochs": 5, "total_flos": 2302111312251120.0, "trial_name": null, "trial_params": null }