|
{ |
|
"best_metric": 0.20142494142055511, |
|
"best_model_checkpoint": "./vit-base-brain-alzheimer-detection/checkpoint-2000", |
|
"epoch": 13.671875, |
|
"eval_steps": 500, |
|
"global_step": 3500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 0.01695910282433033, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0035, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 0.013198412954807281, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.0031, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"grad_norm": 0.01613868959248066, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0031, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 0.011624956503510475, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.0031, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"grad_norm": 0.026181865483522415, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0026, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 0.010051410645246506, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0026, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3671875, |
|
"grad_norm": 0.009718737564980984, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.0022, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.00813582818955183, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.0019, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.7578125, |
|
"grad_norm": 14.344162940979004, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.3794, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.953125, |
|
"grad_norm": 27.69124984741211, |
|
"learning_rate": 1.9960000000000002e-05, |
|
"loss": 0.4213, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.953125, |
|
"eval_accuracy": 0.8955078125, |
|
"eval_loss": 0.3758355975151062, |
|
"eval_runtime": 7.1938, |
|
"eval_samples_per_second": 142.346, |
|
"eval_steps_per_second": 17.793, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.1484375, |
|
"grad_norm": 0.10575103759765625, |
|
"learning_rate": 1.978787878787879e-05, |
|
"loss": 0.219, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.34375, |
|
"grad_norm": 10.299068450927734, |
|
"learning_rate": 1.9571428571428572e-05, |
|
"loss": 0.2295, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.5390625, |
|
"grad_norm": 30.670997619628906, |
|
"learning_rate": 1.935497835497836e-05, |
|
"loss": 0.0989, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.734375, |
|
"grad_norm": 10.104991912841797, |
|
"learning_rate": 1.9138528138528142e-05, |
|
"loss": 0.0813, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.9296875, |
|
"grad_norm": 0.01667148247361183, |
|
"learning_rate": 1.8922077922077925e-05, |
|
"loss": 0.0195, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 4.969385147094727, |
|
"learning_rate": 1.8705627705627708e-05, |
|
"loss": 0.02, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.3203125, |
|
"grad_norm": 0.010836214758455753, |
|
"learning_rate": 1.848917748917749e-05, |
|
"loss": 0.0306, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.515625, |
|
"grad_norm": 0.009891550056636333, |
|
"learning_rate": 1.8272727272727275e-05, |
|
"loss": 0.0433, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.7109375, |
|
"grad_norm": 0.08000622689723969, |
|
"learning_rate": 1.8056277056277058e-05, |
|
"loss": 0.0294, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.90625, |
|
"grad_norm": 0.015158270485699177, |
|
"learning_rate": 1.783982683982684e-05, |
|
"loss": 0.0224, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.90625, |
|
"eval_accuracy": 0.9501953125, |
|
"eval_loss": 0.2260357141494751, |
|
"eval_runtime": 5.6203, |
|
"eval_samples_per_second": 182.196, |
|
"eval_steps_per_second": 22.774, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.1015625, |
|
"grad_norm": 0.011108304373919964, |
|
"learning_rate": 1.7623376623376624e-05, |
|
"loss": 0.0121, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.296875, |
|
"grad_norm": 0.014846593141555786, |
|
"learning_rate": 1.7406926406926407e-05, |
|
"loss": 0.0205, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.4921875, |
|
"grad_norm": 0.032500285655260086, |
|
"learning_rate": 1.7190476190476194e-05, |
|
"loss": 0.0261, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.6875, |
|
"grad_norm": 0.01257646270096302, |
|
"learning_rate": 1.6974025974025977e-05, |
|
"loss": 0.0252, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.8828125, |
|
"grad_norm": 0.06579085439443588, |
|
"learning_rate": 1.675757575757576e-05, |
|
"loss": 0.024, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.078125, |
|
"grad_norm": 2.4693167209625244, |
|
"learning_rate": 1.6541125541125543e-05, |
|
"loss": 0.0324, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.2734375, |
|
"grad_norm": 0.005182833410799503, |
|
"learning_rate": 1.6324675324675326e-05, |
|
"loss": 0.018, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.46875, |
|
"grad_norm": 0.006213045679032803, |
|
"learning_rate": 1.610822510822511e-05, |
|
"loss": 0.0479, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.6640625, |
|
"grad_norm": 50.79215621948242, |
|
"learning_rate": 1.5891774891774893e-05, |
|
"loss": 0.0251, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.859375, |
|
"grad_norm": 0.021290121600031853, |
|
"learning_rate": 1.5675324675324676e-05, |
|
"loss": 0.0012, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.859375, |
|
"eval_accuracy": 0.95703125, |
|
"eval_loss": 0.21266639232635498, |
|
"eval_runtime": 5.9949, |
|
"eval_samples_per_second": 170.813, |
|
"eval_steps_per_second": 21.352, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0546875, |
|
"grad_norm": 12.610783576965332, |
|
"learning_rate": 1.545887445887446e-05, |
|
"loss": 0.0256, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 0.013099271804094315, |
|
"learning_rate": 1.5242424242424244e-05, |
|
"loss": 0.0143, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.4453125, |
|
"grad_norm": 0.04105505719780922, |
|
"learning_rate": 1.5025974025974027e-05, |
|
"loss": 0.0109, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.640625, |
|
"grad_norm": 0.005291896406561136, |
|
"learning_rate": 1.4809523809523812e-05, |
|
"loss": 0.0222, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.8359375, |
|
"grad_norm": 0.005415532272309065, |
|
"learning_rate": 1.4593073593073595e-05, |
|
"loss": 0.0052, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7.03125, |
|
"grad_norm": 0.004291017074137926, |
|
"learning_rate": 1.4376623376623378e-05, |
|
"loss": 0.0077, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7.2265625, |
|
"grad_norm": 0.004927146248519421, |
|
"learning_rate": 1.4160173160173161e-05, |
|
"loss": 0.0009, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7.421875, |
|
"grad_norm": 0.004201130475848913, |
|
"learning_rate": 1.3943722943722944e-05, |
|
"loss": 0.0008, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.6171875, |
|
"grad_norm": 0.003896114882081747, |
|
"learning_rate": 1.3727272727272729e-05, |
|
"loss": 0.0007, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"grad_norm": 0.0028053205460309982, |
|
"learning_rate": 1.3510822510822512e-05, |
|
"loss": 0.0007, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"eval_accuracy": 0.9677734375, |
|
"eval_loss": 0.20142494142055511, |
|
"eval_runtime": 5.4298, |
|
"eval_samples_per_second": 188.588, |
|
"eval_steps_per_second": 23.573, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0078125, |
|
"grad_norm": 0.003189160255715251, |
|
"learning_rate": 1.3294372294372295e-05, |
|
"loss": 0.0006, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8.203125, |
|
"grad_norm": 0.005525187123566866, |
|
"learning_rate": 1.3077922077922078e-05, |
|
"loss": 0.0006, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.3984375, |
|
"grad_norm": 0.002676301635801792, |
|
"learning_rate": 1.2861471861471862e-05, |
|
"loss": 0.0006, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 8.59375, |
|
"grad_norm": 0.0033032067585736513, |
|
"learning_rate": 1.2645021645021646e-05, |
|
"loss": 0.0006, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.7890625, |
|
"grad_norm": 0.003413607832044363, |
|
"learning_rate": 1.242857142857143e-05, |
|
"loss": 0.0006, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 8.984375, |
|
"grad_norm": 0.00263944361358881, |
|
"learning_rate": 1.2212121212121213e-05, |
|
"loss": 0.0006, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9.1796875, |
|
"grad_norm": 0.002298458479344845, |
|
"learning_rate": 1.1995670995670996e-05, |
|
"loss": 0.0006, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 0.002362418919801712, |
|
"learning_rate": 1.1779220779220779e-05, |
|
"loss": 0.0005, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.5703125, |
|
"grad_norm": 0.002904605120420456, |
|
"learning_rate": 1.1562770562770564e-05, |
|
"loss": 0.0005, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 9.765625, |
|
"grad_norm": 0.004212340340018272, |
|
"learning_rate": 1.1346320346320347e-05, |
|
"loss": 0.0005, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.765625, |
|
"eval_accuracy": 0.9697265625, |
|
"eval_loss": 0.20150402188301086, |
|
"eval_runtime": 10.8662, |
|
"eval_samples_per_second": 94.237, |
|
"eval_steps_per_second": 11.78, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.9609375, |
|
"grad_norm": 0.0020910820458084345, |
|
"learning_rate": 1.112987012987013e-05, |
|
"loss": 0.0005, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10.15625, |
|
"grad_norm": 0.0021248324774205685, |
|
"learning_rate": 1.0913419913419913e-05, |
|
"loss": 0.0005, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10.3515625, |
|
"grad_norm": 0.0025855612475425005, |
|
"learning_rate": 1.0696969696969696e-05, |
|
"loss": 0.0005, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 10.546875, |
|
"grad_norm": 0.0029177917167544365, |
|
"learning_rate": 1.0480519480519481e-05, |
|
"loss": 0.0005, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.7421875, |
|
"grad_norm": 0.002265964401885867, |
|
"learning_rate": 1.0264069264069264e-05, |
|
"loss": 0.0005, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.9375, |
|
"grad_norm": 0.0022889028768986464, |
|
"learning_rate": 1.0047619047619048e-05, |
|
"loss": 0.0004, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11.1328125, |
|
"grad_norm": 0.0023099584504961967, |
|
"learning_rate": 9.831168831168832e-06, |
|
"loss": 0.0004, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 11.328125, |
|
"grad_norm": 0.0022109313867986202, |
|
"learning_rate": 9.614718614718616e-06, |
|
"loss": 0.0004, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 11.5234375, |
|
"grad_norm": 0.002484516939148307, |
|
"learning_rate": 9.398268398268399e-06, |
|
"loss": 0.0004, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 11.71875, |
|
"grad_norm": 0.001686307368800044, |
|
"learning_rate": 9.181818181818184e-06, |
|
"loss": 0.0004, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.71875, |
|
"eval_accuracy": 0.96875, |
|
"eval_loss": 0.20902524888515472, |
|
"eval_runtime": 6.1763, |
|
"eval_samples_per_second": 165.796, |
|
"eval_steps_per_second": 20.725, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.9140625, |
|
"grad_norm": 0.002134295180439949, |
|
"learning_rate": 8.965367965367967e-06, |
|
"loss": 0.0004, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 12.109375, |
|
"grad_norm": 0.002223787596449256, |
|
"learning_rate": 8.74891774891775e-06, |
|
"loss": 0.0004, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 12.3046875, |
|
"grad_norm": 0.0019202354596927762, |
|
"learning_rate": 8.532467532467533e-06, |
|
"loss": 0.0004, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 0.0023374806623905897, |
|
"learning_rate": 8.316017316017316e-06, |
|
"loss": 0.0004, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 12.6953125, |
|
"grad_norm": 0.0015720732044428587, |
|
"learning_rate": 8.099567099567101e-06, |
|
"loss": 0.0004, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 12.890625, |
|
"grad_norm": 0.0014028995065018535, |
|
"learning_rate": 7.883116883116884e-06, |
|
"loss": 0.0004, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13.0859375, |
|
"grad_norm": 0.002278986619785428, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 0.0004, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 13.28125, |
|
"grad_norm": 0.0019616258796304464, |
|
"learning_rate": 7.450216450216451e-06, |
|
"loss": 0.0003, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 13.4765625, |
|
"grad_norm": 0.0017822927329689264, |
|
"learning_rate": 7.233766233766234e-06, |
|
"loss": 0.0003, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 13.671875, |
|
"grad_norm": 0.0021076672710478306, |
|
"learning_rate": 7.017316017316018e-06, |
|
"loss": 0.0004, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.671875, |
|
"eval_accuracy": 0.96875, |
|
"eval_loss": 0.21559299528598785, |
|
"eval_runtime": 6.0759, |
|
"eval_samples_per_second": 168.535, |
|
"eval_steps_per_second": 21.067, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.671875, |
|
"step": 3500, |
|
"total_flos": 4.339629206470656e+18, |
|
"train_loss": 0.02788944040451731, |
|
"train_runtime": 959.4355, |
|
"train_samples_per_second": 85.384, |
|
"train_steps_per_second": 5.336 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.339629206470656e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|