|
{ |
|
"best_metric": 0.9049533605575562, |
|
"best_model_checkpoint": "./vit-large-brain-xray/checkpoint-300", |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 720, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05555555555555555, |
|
"grad_norm": 0.6702606678009033, |
|
"learning_rate": 0.00019722222222222225, |
|
"loss": 1.3662, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 1.5160739421844482, |
|
"learning_rate": 0.00019444444444444446, |
|
"loss": 1.2341, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 3.668222665786743, |
|
"learning_rate": 0.00019166666666666667, |
|
"loss": 0.9384, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 2.245746612548828, |
|
"learning_rate": 0.00018888888888888888, |
|
"loss": 0.6538, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 2.4727349281311035, |
|
"learning_rate": 0.00018611111111111112, |
|
"loss": 0.4997, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 3.7319023609161377, |
|
"learning_rate": 0.00018333333333333334, |
|
"loss": 0.3898, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3888888888888889, |
|
"grad_norm": 0.6027088165283203, |
|
"learning_rate": 0.00018055555555555557, |
|
"loss": 0.3304, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 0.37244492769241333, |
|
"learning_rate": 0.00017777777777777779, |
|
"loss": 0.2543, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.6335736513137817, |
|
"learning_rate": 0.000175, |
|
"loss": 0.3538, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 1.6272715330123901, |
|
"learning_rate": 0.00017222222222222224, |
|
"loss": 0.352, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"eval_accuracy": 0.6294416243654822, |
|
"eval_loss": 1.2266901731491089, |
|
"eval_runtime": 8.2003, |
|
"eval_samples_per_second": 48.047, |
|
"eval_steps_per_second": 6.097, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6111111111111112, |
|
"grad_norm": 1.5688170194625854, |
|
"learning_rate": 0.00016944444444444445, |
|
"loss": 0.3773, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.8265367150306702, |
|
"learning_rate": 0.0001666666666666667, |
|
"loss": 0.226, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7222222222222222, |
|
"grad_norm": 2.6476309299468994, |
|
"learning_rate": 0.0001638888888888889, |
|
"loss": 0.3515, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 0.8978700637817383, |
|
"learning_rate": 0.0001611111111111111, |
|
"loss": 0.2367, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 1.1820647716522217, |
|
"learning_rate": 0.00015833333333333332, |
|
"loss": 0.3112, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 2.701751708984375, |
|
"learning_rate": 0.00015555555555555556, |
|
"loss": 0.2741, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9444444444444444, |
|
"grad_norm": 1.249694585800171, |
|
"learning_rate": 0.00015277777777777777, |
|
"loss": 0.2529, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.23323917388916016, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.2239, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0555555555555556, |
|
"grad_norm": 0.4952305853366852, |
|
"learning_rate": 0.00014722222222222223, |
|
"loss": 0.1749, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 0.2073395550251007, |
|
"learning_rate": 0.00014444444444444444, |
|
"loss": 0.1612, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"eval_accuracy": 0.7538071065989848, |
|
"eval_loss": 1.0894657373428345, |
|
"eval_runtime": 7.609, |
|
"eval_samples_per_second": 51.781, |
|
"eval_steps_per_second": 6.571, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.16568297147750854, |
|
"learning_rate": 0.00014166666666666668, |
|
"loss": 0.0718, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2222222222222223, |
|
"grad_norm": 0.0984596461057663, |
|
"learning_rate": 0.0001388888888888889, |
|
"loss": 0.1395, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2777777777777777, |
|
"grad_norm": 0.6732985973358154, |
|
"learning_rate": 0.00013611111111111113, |
|
"loss": 0.1421, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 2.5877740383148193, |
|
"learning_rate": 0.00013333333333333334, |
|
"loss": 0.1118, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 3.4756956100463867, |
|
"learning_rate": 0.00013055555555555555, |
|
"loss": 0.1562, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4444444444444444, |
|
"grad_norm": 0.09811172634363174, |
|
"learning_rate": 0.00012777777777777776, |
|
"loss": 0.1463, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 3.1999194622039795, |
|
"learning_rate": 0.000125, |
|
"loss": 0.1179, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 0.45012134313583374, |
|
"learning_rate": 0.00012222222222222224, |
|
"loss": 0.0929, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6111111111111112, |
|
"grad_norm": 1.862654209136963, |
|
"learning_rate": 0.00011944444444444445, |
|
"loss": 0.186, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 1.39608895778656, |
|
"learning_rate": 0.00011666666666666668, |
|
"loss": 0.0473, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"eval_accuracy": 0.7741116751269036, |
|
"eval_loss": 0.9049533605575562, |
|
"eval_runtime": 6.1753, |
|
"eval_samples_per_second": 63.803, |
|
"eval_steps_per_second": 8.097, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7222222222222223, |
|
"grad_norm": 0.0962180569767952, |
|
"learning_rate": 0.00011388888888888889, |
|
"loss": 0.0858, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 0.08585009723901749, |
|
"learning_rate": 0.00011111111111111112, |
|
"loss": 0.0519, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.06303343176841736, |
|
"learning_rate": 0.00010833333333333333, |
|
"loss": 0.0221, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.8888888888888888, |
|
"grad_norm": 5.68204402923584, |
|
"learning_rate": 0.00010555555555555557, |
|
"loss": 0.0589, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.9444444444444444, |
|
"grad_norm": 5.385427474975586, |
|
"learning_rate": 0.00010277777777777778, |
|
"loss": 0.1881, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.201244831085205, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0758, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.0555555555555554, |
|
"grad_norm": 1.2587229013442993, |
|
"learning_rate": 9.722222222222223e-05, |
|
"loss": 0.0461, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.111111111111111, |
|
"grad_norm": 0.07729563117027283, |
|
"learning_rate": 9.444444444444444e-05, |
|
"loss": 0.0155, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.1666666666666665, |
|
"grad_norm": 5.077848434448242, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 0.0724, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.05171338841319084, |
|
"learning_rate": 8.888888888888889e-05, |
|
"loss": 0.0525, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"eval_accuracy": 0.7690355329949239, |
|
"eval_loss": 1.0663037300109863, |
|
"eval_runtime": 6.6314, |
|
"eval_samples_per_second": 59.415, |
|
"eval_steps_per_second": 7.54, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.2777777777777777, |
|
"grad_norm": 0.056168586015701294, |
|
"learning_rate": 8.611111111111112e-05, |
|
"loss": 0.1214, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.3333333333333335, |
|
"grad_norm": 0.2696777582168579, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.0148, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.388888888888889, |
|
"grad_norm": 0.045138537883758545, |
|
"learning_rate": 8.055555555555556e-05, |
|
"loss": 0.0175, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.03756405785679817, |
|
"learning_rate": 7.777777777777778e-05, |
|
"loss": 0.0264, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.17634020745754242, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.0101, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.5555555555555554, |
|
"grad_norm": 0.037890926003456116, |
|
"learning_rate": 7.222222222222222e-05, |
|
"loss": 0.0484, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.611111111111111, |
|
"grad_norm": 4.751524448394775, |
|
"learning_rate": 6.944444444444444e-05, |
|
"loss": 0.0525, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 0.15853020548820496, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.0719, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.7222222222222223, |
|
"grad_norm": 0.039081115275621414, |
|
"learning_rate": 6.388888888888888e-05, |
|
"loss": 0.0085, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 0.4480770230293274, |
|
"learning_rate": 6.111111111111112e-05, |
|
"loss": 0.0123, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"eval_accuracy": 0.7461928934010152, |
|
"eval_loss": 1.2449774742126465, |
|
"eval_runtime": 5.9167, |
|
"eval_samples_per_second": 66.591, |
|
"eval_steps_per_second": 8.451, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.8333333333333335, |
|
"grad_norm": 7.5741801261901855, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 0.0278, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 0.1201184019446373, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.0091, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.9444444444444446, |
|
"grad_norm": 0.032710809260606766, |
|
"learning_rate": 5.2777777777777784e-05, |
|
"loss": 0.0077, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.03236711025238037, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0557, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.0555555555555554, |
|
"grad_norm": 0.034722838550806046, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.0191, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 4.018179416656494, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0176, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.1666666666666665, |
|
"grad_norm": 0.5732712745666504, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0087, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.2222222222222223, |
|
"grad_norm": 0.027404414489865303, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.0079, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.2777777777777777, |
|
"grad_norm": 0.02965979278087616, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.0068, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.026871565729379654, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0066, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"eval_accuracy": 0.7817258883248731, |
|
"eval_loss": 1.1282514333724976, |
|
"eval_runtime": 6.7045, |
|
"eval_samples_per_second": 58.767, |
|
"eval_steps_per_second": 7.458, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.388888888888889, |
|
"grad_norm": 0.03278065472841263, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.0086, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.4444444444444446, |
|
"grad_norm": 0.07111264020204544, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0295, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 0.028257286176085472, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0065, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 0.02719848044216633, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0418, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.611111111111111, |
|
"grad_norm": 0.026137089356780052, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0073, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6666666666666665, |
|
"grad_norm": 0.030431417748332024, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0101, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.7222222222222223, |
|
"grad_norm": 0.025364473462104797, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0062, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 0.02630157209932804, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0076, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.8333333333333335, |
|
"grad_norm": 0.025917503982782364, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0062, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"grad_norm": 6.676637649536133, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0126, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.888888888888889, |
|
"eval_accuracy": 0.7842639593908629, |
|
"eval_loss": 1.1716859340667725, |
|
"eval_runtime": 6.745, |
|
"eval_samples_per_second": 58.414, |
|
"eval_steps_per_second": 7.413, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.9444444444444446, |
|
"grad_norm": 0.025717712938785553, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.008, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.07326529920101166, |
|
"learning_rate": 0.0, |
|
"loss": 0.0065, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 720, |
|
"total_flos": 3.16768696086528e+18, |
|
"train_loss": 0.16034429804939362, |
|
"train_runtime": 1070.1524, |
|
"train_samples_per_second": 10.727, |
|
"train_steps_per_second": 0.673 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 720, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 3.16768696086528e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|