{ "best_metric": 0.9402609599838997, "best_model_checkpoint": "model_saves/xlnet-large_spell_10k_3_p3/checkpoint-536", "epoch": 5.0, "global_step": 1340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.9399423070472612, "eval_loss": 0.4410298764705658, "eval_runtime": 4.8733, "eval_samples_per_second": 899.177, "eval_steps_per_second": 7.182, "step": 268 }, { "epoch": 2.0, "eval_accuracy": 0.9402609599838997, "eval_loss": 0.44978341460227966, "eval_runtime": 5.0594, "eval_samples_per_second": 866.102, "eval_steps_per_second": 6.918, "step": 536 }, { "epoch": 3.0, "eval_accuracy": 0.9393385435883674, "eval_loss": 0.4808991849422455, "eval_runtime": 4.8831, "eval_samples_per_second": 897.377, "eval_steps_per_second": 7.168, "step": 804 }, { "epoch": 3.73, "learning_rate": 1e-05, "loss": 0.323, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.9394279900267222, "eval_loss": 0.5203689932823181, "eval_runtime": 4.8771, "eval_samples_per_second": 898.49, "eval_steps_per_second": 7.176, "step": 1072 }, { "epoch": 5.0, "eval_accuracy": 0.9399255358400698, "eval_loss": 0.5856708288192749, "eval_runtime": 4.8903, "eval_samples_per_second": 896.057, "eval_steps_per_second": 7.157, "step": 1340 }, { "epoch": 5.0, "step": 1340, "total_flos": 2.1552156360310784e+16, "train_loss": 0.2938878016685372, "train_runtime": 558.6733, "train_samples_per_second": 921.039, "train_steps_per_second": 7.196 } ], "max_steps": 4020, "num_train_epochs": 15, "total_flos": 2.1552156360310784e+16, "trial_name": null, "trial_params": null }