{ "best_metric": 0.0367523655295372, "best_model_checkpoint": "./beans_outputs/checkpoint-390", "epoch": 10.0, "eval_steps": 500, "global_step": 1300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7692307692307693, "grad_norm": 4.539805889129639, "learning_rate": 0.0005, "loss": 0.4713, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.9548872180451128, "eval_loss": 0.09497568756341934, "eval_runtime": 1.0331, "eval_samples_per_second": 128.734, "eval_steps_per_second": 16.455, "step": 130 }, { "epoch": 1.5384615384615383, "grad_norm": 0.8214866518974304, "learning_rate": 0.0005, "loss": 0.1717, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.9699248120300752, "eval_loss": 0.08848864585161209, "eval_runtime": 0.9725, "eval_samples_per_second": 136.757, "eval_steps_per_second": 17.48, "step": 260 }, { "epoch": 2.3076923076923075, "grad_norm": 5.174630165100098, "learning_rate": 0.0005, "loss": 0.1941, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.9924812030075187, "eval_loss": 0.0367523655295372, "eval_runtime": 1.0358, "eval_samples_per_second": 128.405, "eval_steps_per_second": 16.413, "step": 390 }, { "epoch": 3.076923076923077, "grad_norm": 6.90311336517334, "learning_rate": 0.0005, "loss": 0.178, "step": 400 }, { "epoch": 3.8461538461538463, "grad_norm": 0.0417233482003212, "learning_rate": 0.0005, "loss": 0.1484, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.9699248120300752, "eval_loss": 0.10054013133049011, "eval_runtime": 1.0366, "eval_samples_per_second": 128.308, "eval_steps_per_second": 16.4, "step": 520 }, { "epoch": 4.615384615384615, "grad_norm": 3.222952365875244, "learning_rate": 0.0005, "loss": 0.1574, "step": 600 }, { "epoch": 5.0, "eval_accuracy": 0.9774436090225563, "eval_loss": 0.06399582326412201, "eval_runtime": 1.1232, "eval_samples_per_second": 118.415, "eval_steps_per_second": 15.136, "step": 650 }, { "epoch": 5.384615384615385, "grad_norm": 0.5326167345046997, "learning_rate": 0.0005, "loss": 0.1948, "step": 700 }, { "epoch": 6.0, "eval_accuracy": 0.9172932330827067, "eval_loss": 0.18919076025485992, "eval_runtime": 1.1717, "eval_samples_per_second": 113.51, "eval_steps_per_second": 14.509, "step": 780 }, { "epoch": 6.153846153846154, "grad_norm": 2.2912113666534424, "learning_rate": 0.0005, "loss": 0.2541, "step": 800 }, { "epoch": 6.923076923076923, "grad_norm": 4.357283592224121, "learning_rate": 0.0005, "loss": 0.4159, "step": 900 }, { "epoch": 7.0, "eval_accuracy": 0.9699248120300752, "eval_loss": 0.15302829444408417, "eval_runtime": 1.1496, "eval_samples_per_second": 115.692, "eval_steps_per_second": 14.788, "step": 910 }, { "epoch": 7.6923076923076925, "grad_norm": 2.716820240020752, "learning_rate": 0.0005, "loss": 0.4951, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.6842105263157895, "eval_loss": 0.7176167368888855, "eval_runtime": 0.9764, "eval_samples_per_second": 136.21, "eval_steps_per_second": 17.41, "step": 1040 }, { "epoch": 8.461538461538462, "grad_norm": 1.9368160963058472, "learning_rate": 0.0005, "loss": 0.6818, "step": 1100 }, { "epoch": 9.0, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.4557244777679443, "eval_runtime": 1.1951, "eval_samples_per_second": 111.285, "eval_steps_per_second": 14.224, "step": 1170 }, { "epoch": 9.23076923076923, "grad_norm": 1.1992454528808594, "learning_rate": 0.0005, "loss": 0.7561, "step": 1200 }, { "epoch": 10.0, "grad_norm": 3.8898727893829346, "learning_rate": 0.0005, "loss": 0.7749, "step": 1300 }, { "epoch": 10.0, "eval_accuracy": 0.7293233082706767, "eval_loss": 0.7070097923278809, "eval_runtime": 1.1878, "eval_samples_per_second": 111.974, "eval_steps_per_second": 14.312, "step": 1300 }, { "epoch": 10.0, "step": 1300, "total_flos": 8.772706474360013e+17, "train_loss": 0.3764252471923828, "train_runtime": 205.4123, "train_samples_per_second": 50.338, "train_steps_per_second": 6.329 } ], "logging_steps": 100, "max_steps": 1300, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.772706474360013e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }