|
{ |
|
"best_metric": 0.903125, |
|
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-finalterm/checkpoint-120", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.020592212677002, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.3728, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.515625, |
|
"eval_loss": 1.2643654346466064, |
|
"eval_runtime": 2.8905, |
|
"eval_samples_per_second": 110.709, |
|
"eval_steps_per_second": 3.46, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.058677673339844, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1308, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.625, |
|
"eval_loss": 0.8816311955451965, |
|
"eval_runtime": 2.7866, |
|
"eval_samples_per_second": 114.835, |
|
"eval_steps_per_second": 3.589, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.673556327819824, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.8721, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.70625, |
|
"eval_loss": 0.6829319000244141, |
|
"eval_runtime": 2.7596, |
|
"eval_samples_per_second": 115.959, |
|
"eval_steps_per_second": 3.624, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.715519905090332, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.6919, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.80625, |
|
"eval_loss": 0.5298391580581665, |
|
"eval_runtime": 2.8001, |
|
"eval_samples_per_second": 114.281, |
|
"eval_steps_per_second": 3.571, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 7.957090377807617, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5876, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.86875, |
|
"eval_loss": 0.4100002348423004, |
|
"eval_runtime": 2.7806, |
|
"eval_samples_per_second": 115.082, |
|
"eval_steps_per_second": 3.596, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 6.996800899505615, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.5504, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.853125, |
|
"eval_loss": 0.4152528643608093, |
|
"eval_runtime": 2.9983, |
|
"eval_samples_per_second": 106.727, |
|
"eval_steps_per_second": 3.335, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.525465965270996, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.459, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.859375, |
|
"eval_loss": 0.3827503025531769, |
|
"eval_runtime": 2.7918, |
|
"eval_samples_per_second": 114.62, |
|
"eval_steps_per_second": 3.582, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.351122856140137, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.4501, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8625, |
|
"eval_loss": 0.39407286047935486, |
|
"eval_runtime": 2.7588, |
|
"eval_samples_per_second": 115.992, |
|
"eval_steps_per_second": 3.625, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.307958602905273, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.4312, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.871875, |
|
"eval_loss": 0.36500272154808044, |
|
"eval_runtime": 2.7271, |
|
"eval_samples_per_second": 117.34, |
|
"eval_steps_per_second": 3.667, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.813844203948975, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.4119, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.3515123128890991, |
|
"eval_runtime": 2.736, |
|
"eval_samples_per_second": 116.961, |
|
"eval_steps_per_second": 3.655, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 8.393081665039062, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4014, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.896875, |
|
"eval_loss": 0.3110010325908661, |
|
"eval_runtime": 2.7536, |
|
"eval_samples_per_second": 116.212, |
|
"eval_steps_per_second": 3.632, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.969035625457764, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.3896, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.903125, |
|
"eval_loss": 0.3030492663383484, |
|
"eval_runtime": 2.79, |
|
"eval_samples_per_second": 114.693, |
|
"eval_steps_per_second": 3.584, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.165198802947998, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.3822, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.88125, |
|
"eval_loss": 0.34730494022369385, |
|
"eval_runtime": 2.7802, |
|
"eval_samples_per_second": 115.099, |
|
"eval_steps_per_second": 3.597, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 5.396005153656006, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.3985, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8875, |
|
"eval_loss": 0.32879379391670227, |
|
"eval_runtime": 2.7703, |
|
"eval_samples_per_second": 115.511, |
|
"eval_steps_per_second": 3.61, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 5.696004390716553, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.3826, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2924533486366272, |
|
"eval_runtime": 2.7303, |
|
"eval_samples_per_second": 117.204, |
|
"eval_steps_per_second": 3.663, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 7.218973636627197, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.3716, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.875, |
|
"eval_loss": 0.36188262701034546, |
|
"eval_runtime": 2.726, |
|
"eval_samples_per_second": 117.39, |
|
"eval_steps_per_second": 3.668, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 7.728688716888428, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.365, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9, |
|
"eval_loss": 0.2941049039363861, |
|
"eval_runtime": 2.7206, |
|
"eval_samples_per_second": 117.621, |
|
"eval_steps_per_second": 3.676, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 5.3787126541137695, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.3379, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.884375, |
|
"eval_loss": 0.32390105724334717, |
|
"eval_runtime": 2.7433, |
|
"eval_samples_per_second": 116.648, |
|
"eval_steps_per_second": 3.645, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 7.723823070526123, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.3365, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.890625, |
|
"eval_loss": 0.32600170373916626, |
|
"eval_runtime": 2.7308, |
|
"eval_samples_per_second": 117.184, |
|
"eval_steps_per_second": 3.662, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.916741132736206, |
|
"learning_rate": 0.0, |
|
"loss": 0.3429, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.89375, |
|
"eval_loss": 0.3095899522304535, |
|
"eval_runtime": 2.7374, |
|
"eval_samples_per_second": 116.898, |
|
"eval_steps_per_second": 3.653, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 200, |
|
"total_flos": 8.32925255860224e+17, |
|
"train_loss": 0.5333085978031158, |
|
"train_runtime": 482.5391, |
|
"train_samples_per_second": 53.053, |
|
"train_steps_per_second": 0.414 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.32925255860224e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|