|
{ |
|
"best_metric": 0.38773971796035767, |
|
"best_model_checkpoint": "./vit-base-brain-tumor-detection2/checkpoint-1500", |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 1920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 1.7783442735671997, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 1.3686, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 1.8243844509124756, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.3468, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.34375, |
|
"grad_norm": 2.272806167602539, |
|
"learning_rate": 7.5e-07, |
|
"loss": 1.2931, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 1.0346544981002808, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.1861, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.90625, |
|
"grad_norm": 0.897363007068634, |
|
"learning_rate": 1.25e-06, |
|
"loss": 1.092, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.6875, |
|
"grad_norm": 1.0376484394073486, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.0504, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.46875, |
|
"grad_norm": 1.2069748640060425, |
|
"learning_rate": 1.75e-06, |
|
"loss": 0.993, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.41449773311615, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.9428, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.03125, |
|
"grad_norm": 3.5506372451782227, |
|
"learning_rate": 2.25e-06, |
|
"loss": 0.9065, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"grad_norm": 2.408358573913574, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.8758, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.8125, |
|
"eval_accuracy": 0.6181640625, |
|
"eval_loss": 0.891155481338501, |
|
"eval_runtime": 5.9944, |
|
"eval_samples_per_second": 170.826, |
|
"eval_steps_per_second": 21.353, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.59375, |
|
"grad_norm": 4.127248764038086, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 0.8371, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.375, |
|
"grad_norm": 4.548064231872559, |
|
"learning_rate": 3e-06, |
|
"loss": 0.7951, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.15625, |
|
"grad_norm": 7.226322174072266, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 0.7666, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 10.9375, |
|
"grad_norm": 4.653890132904053, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.7219, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.71875, |
|
"grad_norm": 3.7779295444488525, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.6926, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 3.6519362926483154, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.646, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.28125, |
|
"grad_norm": 24.227773666381836, |
|
"learning_rate": 4.25e-06, |
|
"loss": 0.6269, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.0625, |
|
"grad_norm": 8.960350036621094, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.5882, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 14.84375, |
|
"grad_norm": 6.60162878036499, |
|
"learning_rate": 4.75e-06, |
|
"loss": 0.544, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"grad_norm": 10.583850860595703, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 15.625, |
|
"eval_accuracy": 0.7607421875, |
|
"eval_loss": 0.6331567764282227, |
|
"eval_runtime": 5.4782, |
|
"eval_samples_per_second": 186.924, |
|
"eval_steps_per_second": 23.365, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.40625, |
|
"grad_norm": 3.9983010292053223, |
|
"learning_rate": 4.999850432733413e-06, |
|
"loss": 0.4768, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 17.1875, |
|
"grad_norm": 12.768200874328613, |
|
"learning_rate": 4.999401748829942e-06, |
|
"loss": 0.4863, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 17.96875, |
|
"grad_norm": 14.71330451965332, |
|
"learning_rate": 4.9986540019763296e-06, |
|
"loss": 0.4181, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 9.602484703063965, |
|
"learning_rate": 4.997607281643338e-06, |
|
"loss": 0.3901, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 19.53125, |
|
"grad_norm": 27.293039321899414, |
|
"learning_rate": 4.996261713075046e-06, |
|
"loss": 0.3496, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 20.3125, |
|
"grad_norm": 3.6233201026916504, |
|
"learning_rate": 4.994617457273862e-06, |
|
"loss": 0.3312, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 21.09375, |
|
"grad_norm": 11.52304458618164, |
|
"learning_rate": 4.992674710981266e-06, |
|
"loss": 0.2929, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 21.875, |
|
"grad_norm": 6.805290222167969, |
|
"learning_rate": 4.990433706654258e-06, |
|
"loss": 0.2653, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.65625, |
|
"grad_norm": 15.234956741333008, |
|
"learning_rate": 4.987894712437552e-06, |
|
"loss": 0.2635, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 23.4375, |
|
"grad_norm": 3.9582715034484863, |
|
"learning_rate": 4.985058032131488e-06, |
|
"loss": 0.2247, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.4375, |
|
"eval_accuracy": 0.8935546875, |
|
"eval_loss": 0.38773971796035767, |
|
"eval_runtime": 5.9931, |
|
"eval_samples_per_second": 170.863, |
|
"eval_steps_per_second": 21.358, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 24.21875, |
|
"grad_norm": 3.0134124755859375, |
|
"learning_rate": 4.98192400515568e-06, |
|
"loss": 0.1965, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"grad_norm": 5.4471540451049805, |
|
"learning_rate": 4.978564534206973e-06, |
|
"loss": 0.193, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 25.78125, |
|
"grad_norm": 11.847530364990234, |
|
"learning_rate": 4.974842901390282e-06, |
|
"loss": 0.1709, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 26.5625, |
|
"grad_norm": 4.015501022338867, |
|
"learning_rate": 4.970825144183195e-06, |
|
"loss": 0.158, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 27.34375, |
|
"grad_norm": 4.216943264007568, |
|
"learning_rate": 4.966511743325682e-06, |
|
"loss": 0.1452, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 28.125, |
|
"grad_norm": 23.560443878173828, |
|
"learning_rate": 4.961903214932604e-06, |
|
"loss": 0.1288, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 28.90625, |
|
"grad_norm": 0.9162831902503967, |
|
"learning_rate": 4.957000110431956e-06, |
|
"loss": 0.1195, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 29.6875, |
|
"grad_norm": 2.046557664871216, |
|
"learning_rate": 4.95180301649889e-06, |
|
"loss": 0.1002, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 1920, |
|
"total_flos": 9.52238637305561e+18, |
|
"train_loss": 0.5872499863306682, |
|
"train_runtime": 1494.2494, |
|
"train_samples_per_second": 82.235, |
|
"train_steps_per_second": 1.285 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.52238637305561e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|