{ "best_metric": 0.017028138041496277, "best_model_checkpoint": "vit-fine-tuned-image-classification-beans_2\\checkpoint-300", "epoch": 5.0, "eval_steps": 100, "global_step": 325, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15384615384615385, "grad_norm": 1.838330626487732, "learning_rate": 0.00019384615384615385, "loss": 0.9029, "step": 10 }, { "epoch": 0.3076923076923077, "grad_norm": 0.8585432767868042, "learning_rate": 0.0001876923076923077, "loss": 0.3239, "step": 20 }, { "epoch": 0.46153846153846156, "grad_norm": 2.5284998416900635, "learning_rate": 0.00018153846153846155, "loss": 0.2423, "step": 30 }, { "epoch": 0.6153846153846154, "grad_norm": 5.096222877502441, "learning_rate": 0.0001753846153846154, "loss": 0.3654, "step": 40 }, { "epoch": 0.7692307692307693, "grad_norm": 2.209157943725586, "learning_rate": 0.00016923076923076923, "loss": 0.1337, "step": 50 }, { "epoch": 0.9230769230769231, "grad_norm": 1.0063344240188599, "learning_rate": 0.0001630769230769231, "loss": 0.2958, "step": 60 }, { "epoch": 1.0769230769230769, "grad_norm": 5.1612725257873535, "learning_rate": 0.00015692307692307693, "loss": 0.2864, "step": 70 }, { "epoch": 1.2307692307692308, "grad_norm": 0.8443673253059387, "learning_rate": 0.00015076923076923077, "loss": 0.1165, "step": 80 }, { "epoch": 1.3846153846153846, "grad_norm": 0.1971602886915207, "learning_rate": 0.0001446153846153846, "loss": 0.0356, "step": 90 }, { "epoch": 1.5384615384615383, "grad_norm": 0.22484387457370758, "learning_rate": 0.00013846153846153847, "loss": 0.0648, "step": 100 }, { "epoch": 1.5384615384615383, "eval_accuracy": 0.9924812030075187, "eval_loss": 0.0420377217233181, "eval_runtime": 30.8858, "eval_samples_per_second": 4.306, "eval_steps_per_second": 0.55, "step": 100 }, { "epoch": 1.6923076923076923, "grad_norm": 0.10582908987998962, "learning_rate": 0.0001323076923076923, "loss": 0.0537, "step": 110 }, { "epoch": 1.8461538461538463, "grad_norm": 0.09471665322780609, "learning_rate": 0.00012615384615384615, "loss": 0.1001, "step": 120 }, { "epoch": 2.0, "grad_norm": 0.08882123231887817, "learning_rate": 0.00012, "loss": 0.0386, "step": 130 }, { "epoch": 2.1538461538461537, "grad_norm": 2.4845492839813232, "learning_rate": 0.00011384615384615384, "loss": 0.0227, "step": 140 }, { "epoch": 2.3076923076923075, "grad_norm": 4.820040702819824, "learning_rate": 0.0001076923076923077, "loss": 0.0325, "step": 150 }, { "epoch": 2.4615384615384617, "grad_norm": 0.06727772951126099, "learning_rate": 0.00010153846153846153, "loss": 0.0224, "step": 160 }, { "epoch": 2.6153846153846154, "grad_norm": 6.172357082366943, "learning_rate": 9.53846153846154e-05, "loss": 0.0455, "step": 170 }, { "epoch": 2.769230769230769, "grad_norm": 0.7179319262504578, "learning_rate": 8.923076923076924e-05, "loss": 0.0756, "step": 180 }, { "epoch": 2.9230769230769234, "grad_norm": 0.07661673426628113, "learning_rate": 8.307692307692309e-05, "loss": 0.0196, "step": 190 }, { "epoch": 3.076923076923077, "grad_norm": 0.05913277715444565, "learning_rate": 7.692307692307693e-05, "loss": 0.0422, "step": 200 }, { "epoch": 3.076923076923077, "eval_accuracy": 0.9924812030075187, "eval_loss": 0.04675745964050293, "eval_runtime": 37.7884, "eval_samples_per_second": 3.52, "eval_steps_per_second": 0.45, "step": 200 }, { "epoch": 3.230769230769231, "grad_norm": 0.0708017349243164, "learning_rate": 7.076923076923078e-05, "loss": 0.0107, "step": 210 }, { "epoch": 3.3846153846153846, "grad_norm": 0.05266530439257622, "learning_rate": 6.461538461538462e-05, "loss": 0.0215, "step": 220 }, { "epoch": 3.5384615384615383, "grad_norm": 0.05381350219249725, "learning_rate": 5.846153846153847e-05, "loss": 0.0329, "step": 230 }, { "epoch": 3.6923076923076925, "grad_norm": 0.04883628711104393, "learning_rate": 5.230769230769231e-05, "loss": 0.0094, "step": 240 }, { "epoch": 3.8461538461538463, "grad_norm": 0.04827561974525452, "learning_rate": 4.615384615384616e-05, "loss": 0.0089, "step": 250 }, { "epoch": 4.0, "grad_norm": 0.046552080661058426, "learning_rate": 4e-05, "loss": 0.0094, "step": 260 }, { "epoch": 4.153846153846154, "grad_norm": 0.04622137174010277, "learning_rate": 3.384615384615385e-05, "loss": 0.0229, "step": 270 }, { "epoch": 4.3076923076923075, "grad_norm": 0.04905983805656433, "learning_rate": 2.7692307692307694e-05, "loss": 0.0083, "step": 280 }, { "epoch": 4.461538461538462, "grad_norm": 0.04810818284749985, "learning_rate": 2.1538461538461542e-05, "loss": 0.008, "step": 290 }, { "epoch": 4.615384615384615, "grad_norm": 0.04192296415567398, "learning_rate": 1.5384615384615387e-05, "loss": 0.008, "step": 300 }, { "epoch": 4.615384615384615, "eval_accuracy": 1.0, "eval_loss": 0.017028138041496277, "eval_runtime": 28.3225, "eval_samples_per_second": 4.696, "eval_steps_per_second": 0.6, "step": 300 }, { "epoch": 4.769230769230769, "grad_norm": 0.04521344602108002, "learning_rate": 9.230769230769232e-06, "loss": 0.0078, "step": 310 }, { "epoch": 4.923076923076923, "grad_norm": 0.04102473333477974, "learning_rate": 3.0769230769230774e-06, "loss": 0.0078, "step": 320 }, { "epoch": 5.0, "step": 325, "total_flos": 4.006371770595533e+17, "train_loss": 0.10399184557107778, "train_runtime": 4370.2022, "train_samples_per_second": 1.183, "train_steps_per_second": 0.074 } ], "logging_steps": 10, "max_steps": 325, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.006371770595533e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }