{ "best_metric": 0.20142494142055511, "best_model_checkpoint": "./vit-base-brain-alzheimer-detection/checkpoint-2000", "epoch": 13.671875, "eval_steps": 500, "global_step": 3500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1953125, "grad_norm": 0.01695910282433033, "learning_rate": 2.0000000000000003e-06, "loss": 0.0035, "step": 50 }, { "epoch": 0.390625, "grad_norm": 0.013198412954807281, "learning_rate": 4.000000000000001e-06, "loss": 0.0031, "step": 100 }, { "epoch": 0.5859375, "grad_norm": 0.01613868959248066, "learning_rate": 6e-06, "loss": 0.0031, "step": 150 }, { "epoch": 0.78125, "grad_norm": 0.011624956503510475, "learning_rate": 8.000000000000001e-06, "loss": 0.0031, "step": 200 }, { "epoch": 0.9765625, "grad_norm": 0.026181865483522415, "learning_rate": 1e-05, "loss": 0.0026, "step": 250 }, { "epoch": 1.171875, "grad_norm": 0.010051410645246506, "learning_rate": 1.2e-05, "loss": 0.0026, "step": 300 }, { "epoch": 1.3671875, "grad_norm": 0.009718737564980984, "learning_rate": 1.4e-05, "loss": 0.0022, "step": 350 }, { "epoch": 1.5625, "grad_norm": 0.00813582818955183, "learning_rate": 1.6000000000000003e-05, "loss": 0.0019, "step": 400 }, { "epoch": 1.7578125, "grad_norm": 14.344162940979004, "learning_rate": 1.8e-05, "loss": 0.3794, "step": 450 }, { "epoch": 1.953125, "grad_norm": 27.69124984741211, "learning_rate": 1.9960000000000002e-05, "loss": 0.4213, "step": 500 }, { "epoch": 1.953125, "eval_accuracy": 0.8955078125, "eval_loss": 0.3758355975151062, "eval_runtime": 7.1938, "eval_samples_per_second": 142.346, "eval_steps_per_second": 17.793, "step": 500 }, { "epoch": 2.1484375, "grad_norm": 0.10575103759765625, "learning_rate": 1.978787878787879e-05, "loss": 0.219, "step": 550 }, { "epoch": 2.34375, "grad_norm": 10.299068450927734, "learning_rate": 1.9571428571428572e-05, "loss": 0.2295, "step": 600 }, { "epoch": 2.5390625, "grad_norm": 30.670997619628906, "learning_rate": 1.935497835497836e-05, "loss": 0.0989, "step": 650 }, { "epoch": 2.734375, "grad_norm": 10.104991912841797, "learning_rate": 1.9138528138528142e-05, "loss": 0.0813, "step": 700 }, { "epoch": 2.9296875, "grad_norm": 0.01667148247361183, "learning_rate": 1.8922077922077925e-05, "loss": 0.0195, "step": 750 }, { "epoch": 3.125, "grad_norm": 4.969385147094727, "learning_rate": 1.8705627705627708e-05, "loss": 0.02, "step": 800 }, { "epoch": 3.3203125, "grad_norm": 0.010836214758455753, "learning_rate": 1.848917748917749e-05, "loss": 0.0306, "step": 850 }, { "epoch": 3.515625, "grad_norm": 0.009891550056636333, "learning_rate": 1.8272727272727275e-05, "loss": 0.0433, "step": 900 }, { "epoch": 3.7109375, "grad_norm": 0.08000622689723969, "learning_rate": 1.8056277056277058e-05, "loss": 0.0294, "step": 950 }, { "epoch": 3.90625, "grad_norm": 0.015158270485699177, "learning_rate": 1.783982683982684e-05, "loss": 0.0224, "step": 1000 }, { "epoch": 3.90625, "eval_accuracy": 0.9501953125, "eval_loss": 0.2260357141494751, "eval_runtime": 5.6203, "eval_samples_per_second": 182.196, "eval_steps_per_second": 22.774, "step": 1000 }, { "epoch": 4.1015625, "grad_norm": 0.011108304373919964, "learning_rate": 1.7623376623376624e-05, "loss": 0.0121, "step": 1050 }, { "epoch": 4.296875, "grad_norm": 0.014846593141555786, "learning_rate": 1.7406926406926407e-05, "loss": 0.0205, "step": 1100 }, { "epoch": 4.4921875, "grad_norm": 0.032500285655260086, "learning_rate": 1.7190476190476194e-05, "loss": 0.0261, "step": 1150 }, { "epoch": 4.6875, "grad_norm": 0.01257646270096302, "learning_rate": 1.6974025974025977e-05, "loss": 0.0252, "step": 1200 }, { "epoch": 4.8828125, "grad_norm": 0.06579085439443588, "learning_rate": 1.675757575757576e-05, "loss": 0.024, "step": 1250 }, { "epoch": 5.078125, "grad_norm": 2.4693167209625244, "learning_rate": 1.6541125541125543e-05, "loss": 0.0324, "step": 1300 }, { "epoch": 5.2734375, "grad_norm": 0.005182833410799503, "learning_rate": 1.6324675324675326e-05, "loss": 0.018, "step": 1350 }, { "epoch": 5.46875, "grad_norm": 0.006213045679032803, "learning_rate": 1.610822510822511e-05, "loss": 0.0479, "step": 1400 }, { "epoch": 5.6640625, "grad_norm": 50.79215621948242, "learning_rate": 1.5891774891774893e-05, "loss": 0.0251, "step": 1450 }, { "epoch": 5.859375, "grad_norm": 0.021290121600031853, "learning_rate": 1.5675324675324676e-05, "loss": 0.0012, "step": 1500 }, { "epoch": 5.859375, "eval_accuracy": 0.95703125, "eval_loss": 0.21266639232635498, "eval_runtime": 5.9949, "eval_samples_per_second": 170.813, "eval_steps_per_second": 21.352, "step": 1500 }, { "epoch": 6.0546875, "grad_norm": 12.610783576965332, "learning_rate": 1.545887445887446e-05, "loss": 0.0256, "step": 1550 }, { "epoch": 6.25, "grad_norm": 0.013099271804094315, "learning_rate": 1.5242424242424244e-05, "loss": 0.0143, "step": 1600 }, { "epoch": 6.4453125, "grad_norm": 0.04105505719780922, "learning_rate": 1.5025974025974027e-05, "loss": 0.0109, "step": 1650 }, { "epoch": 6.640625, "grad_norm": 0.005291896406561136, "learning_rate": 1.4809523809523812e-05, "loss": 0.0222, "step": 1700 }, { "epoch": 6.8359375, "grad_norm": 0.005415532272309065, "learning_rate": 1.4593073593073595e-05, "loss": 0.0052, "step": 1750 }, { "epoch": 7.03125, "grad_norm": 0.004291017074137926, "learning_rate": 1.4376623376623378e-05, "loss": 0.0077, "step": 1800 }, { "epoch": 7.2265625, "grad_norm": 0.004927146248519421, "learning_rate": 1.4160173160173161e-05, "loss": 0.0009, "step": 1850 }, { "epoch": 7.421875, "grad_norm": 0.004201130475848913, "learning_rate": 1.3943722943722944e-05, "loss": 0.0008, "step": 1900 }, { "epoch": 7.6171875, "grad_norm": 0.003896114882081747, "learning_rate": 1.3727272727272729e-05, "loss": 0.0007, "step": 1950 }, { "epoch": 7.8125, "grad_norm": 0.0028053205460309982, "learning_rate": 1.3510822510822512e-05, "loss": 0.0007, "step": 2000 }, { "epoch": 7.8125, "eval_accuracy": 0.9677734375, "eval_loss": 0.20142494142055511, "eval_runtime": 5.4298, "eval_samples_per_second": 188.588, "eval_steps_per_second": 23.573, "step": 2000 }, { "epoch": 8.0078125, "grad_norm": 0.003189160255715251, "learning_rate": 1.3294372294372295e-05, "loss": 0.0006, "step": 2050 }, { "epoch": 8.203125, "grad_norm": 0.005525187123566866, "learning_rate": 1.3077922077922078e-05, "loss": 0.0006, "step": 2100 }, { "epoch": 8.3984375, "grad_norm": 0.002676301635801792, "learning_rate": 1.2861471861471862e-05, "loss": 0.0006, "step": 2150 }, { "epoch": 8.59375, "grad_norm": 0.0033032067585736513, "learning_rate": 1.2645021645021646e-05, "loss": 0.0006, "step": 2200 }, { "epoch": 8.7890625, "grad_norm": 0.003413607832044363, "learning_rate": 1.242857142857143e-05, "loss": 0.0006, "step": 2250 }, { "epoch": 8.984375, "grad_norm": 0.00263944361358881, "learning_rate": 1.2212121212121213e-05, "loss": 0.0006, "step": 2300 }, { "epoch": 9.1796875, "grad_norm": 0.002298458479344845, "learning_rate": 1.1995670995670996e-05, "loss": 0.0006, "step": 2350 }, { "epoch": 9.375, "grad_norm": 0.002362418919801712, "learning_rate": 1.1779220779220779e-05, "loss": 0.0005, "step": 2400 }, { "epoch": 9.5703125, "grad_norm": 0.002904605120420456, "learning_rate": 1.1562770562770564e-05, "loss": 0.0005, "step": 2450 }, { "epoch": 9.765625, "grad_norm": 0.004212340340018272, "learning_rate": 1.1346320346320347e-05, "loss": 0.0005, "step": 2500 }, { "epoch": 9.765625, "eval_accuracy": 0.9697265625, "eval_loss": 0.20150402188301086, "eval_runtime": 10.8662, "eval_samples_per_second": 94.237, "eval_steps_per_second": 11.78, "step": 2500 }, { "epoch": 9.9609375, "grad_norm": 0.0020910820458084345, "learning_rate": 1.112987012987013e-05, "loss": 0.0005, "step": 2550 }, { "epoch": 10.15625, "grad_norm": 0.0021248324774205685, "learning_rate": 1.0913419913419913e-05, "loss": 0.0005, "step": 2600 }, { "epoch": 10.3515625, "grad_norm": 0.0025855612475425005, "learning_rate": 1.0696969696969696e-05, "loss": 0.0005, "step": 2650 }, { "epoch": 10.546875, "grad_norm": 0.0029177917167544365, "learning_rate": 1.0480519480519481e-05, "loss": 0.0005, "step": 2700 }, { "epoch": 10.7421875, "grad_norm": 0.002265964401885867, "learning_rate": 1.0264069264069264e-05, "loss": 0.0005, "step": 2750 }, { "epoch": 10.9375, "grad_norm": 0.0022889028768986464, "learning_rate": 1.0047619047619048e-05, "loss": 0.0004, "step": 2800 }, { "epoch": 11.1328125, "grad_norm": 0.0023099584504961967, "learning_rate": 9.831168831168832e-06, "loss": 0.0004, "step": 2850 }, { "epoch": 11.328125, "grad_norm": 0.0022109313867986202, "learning_rate": 9.614718614718616e-06, "loss": 0.0004, "step": 2900 }, { "epoch": 11.5234375, "grad_norm": 0.002484516939148307, "learning_rate": 9.398268398268399e-06, "loss": 0.0004, "step": 2950 }, { "epoch": 11.71875, "grad_norm": 0.001686307368800044, "learning_rate": 9.181818181818184e-06, "loss": 0.0004, "step": 3000 }, { "epoch": 11.71875, "eval_accuracy": 0.96875, "eval_loss": 0.20902524888515472, "eval_runtime": 6.1763, "eval_samples_per_second": 165.796, "eval_steps_per_second": 20.725, "step": 3000 }, { "epoch": 11.9140625, "grad_norm": 0.002134295180439949, "learning_rate": 8.965367965367967e-06, "loss": 0.0004, "step": 3050 }, { "epoch": 12.109375, "grad_norm": 0.002223787596449256, "learning_rate": 8.74891774891775e-06, "loss": 0.0004, "step": 3100 }, { "epoch": 12.3046875, "grad_norm": 0.0019202354596927762, "learning_rate": 8.532467532467533e-06, "loss": 0.0004, "step": 3150 }, { "epoch": 12.5, "grad_norm": 0.0023374806623905897, "learning_rate": 8.316017316017316e-06, "loss": 0.0004, "step": 3200 }, { "epoch": 12.6953125, "grad_norm": 0.0015720732044428587, "learning_rate": 8.099567099567101e-06, "loss": 0.0004, "step": 3250 }, { "epoch": 12.890625, "grad_norm": 0.0014028995065018535, "learning_rate": 7.883116883116884e-06, "loss": 0.0004, "step": 3300 }, { "epoch": 13.0859375, "grad_norm": 0.002278986619785428, "learning_rate": 7.666666666666667e-06, "loss": 0.0004, "step": 3350 }, { "epoch": 13.28125, "grad_norm": 0.0019616258796304464, "learning_rate": 7.450216450216451e-06, "loss": 0.0003, "step": 3400 }, { "epoch": 13.4765625, "grad_norm": 0.0017822927329689264, "learning_rate": 7.233766233766234e-06, "loss": 0.0003, "step": 3450 }, { "epoch": 13.671875, "grad_norm": 0.0021076672710478306, "learning_rate": 7.017316017316018e-06, "loss": 0.0004, "step": 3500 }, { "epoch": 13.671875, "eval_accuracy": 0.96875, "eval_loss": 0.21559299528598785, "eval_runtime": 6.0759, "eval_samples_per_second": 168.535, "eval_steps_per_second": 21.067, "step": 3500 }, { "epoch": 13.671875, "step": 3500, "total_flos": 4.339629206470656e+18, "train_loss": 0.02788944040451731, "train_runtime": 959.4355, "train_samples_per_second": 85.384, "train_steps_per_second": 5.336 } ], "logging_steps": 50, "max_steps": 5120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.339629206470656e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }