|
{ |
|
"best_metric": 1.0025324821472168, |
|
"best_model_checkpoint": "./vit-base-renovation/checkpoint-75", |
|
"epoch": 4.0, |
|
"eval_steps": 25, |
|
"global_step": 496, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.653707504272461, |
|
"learning_rate": 0.00019596774193548388, |
|
"loss": 0.4901, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.4146682024002075, |
|
"learning_rate": 0.00019193548387096775, |
|
"loss": 0.382, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6073059360730594, |
|
"eval_loss": 1.1102608442306519, |
|
"eval_runtime": 7.3313, |
|
"eval_samples_per_second": 29.872, |
|
"eval_steps_per_second": 3.819, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.3104939460754395, |
|
"learning_rate": 0.00018790322580645164, |
|
"loss": 0.3537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 6.281633377075195, |
|
"learning_rate": 0.00018387096774193548, |
|
"loss": 0.5919, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.1501330137252808, |
|
"learning_rate": 0.00017983870967741935, |
|
"loss": 0.5741, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.6210045662100456, |
|
"eval_loss": 1.0627731084823608, |
|
"eval_runtime": 7.7376, |
|
"eval_samples_per_second": 28.303, |
|
"eval_steps_per_second": 3.619, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.414377689361572, |
|
"learning_rate": 0.00017580645161290325, |
|
"loss": 0.4974, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.614995241165161, |
|
"learning_rate": 0.00017177419354838711, |
|
"loss": 0.5589, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.0025324821472168, |
|
"eval_runtime": 8.1199, |
|
"eval_samples_per_second": 26.971, |
|
"eval_steps_per_second": 3.448, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.475475549697876, |
|
"learning_rate": 0.00016774193548387098, |
|
"loss": 0.3703, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 5.414321422576904, |
|
"learning_rate": 0.00016370967741935485, |
|
"loss": 0.4059, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.9260778427124023, |
|
"learning_rate": 0.00015967741935483872, |
|
"loss": 0.4074, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.6073059360730594, |
|
"eval_loss": 1.1324039697647095, |
|
"eval_runtime": 7.9656, |
|
"eval_samples_per_second": 27.493, |
|
"eval_steps_per_second": 3.515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 1.745337724685669, |
|
"learning_rate": 0.0001556451612903226, |
|
"loss": 0.2648, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.7676700353622437, |
|
"learning_rate": 0.00015161290322580646, |
|
"loss": 0.3581, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.6438356164383562, |
|
"eval_loss": 1.1934621334075928, |
|
"eval_runtime": 7.3834, |
|
"eval_samples_per_second": 29.661, |
|
"eval_steps_per_second": 3.792, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.40987706184387207, |
|
"learning_rate": 0.00014758064516129032, |
|
"loss": 0.1496, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.4426233768463135, |
|
"learning_rate": 0.00014354838709677422, |
|
"loss": 0.1734, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 4.183903694152832, |
|
"learning_rate": 0.0001395161290322581, |
|
"loss": 0.2618, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.502283105022831, |
|
"eval_loss": 1.8300055265426636, |
|
"eval_runtime": 8.5064, |
|
"eval_samples_per_second": 25.745, |
|
"eval_steps_per_second": 3.292, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 5.4273681640625, |
|
"learning_rate": 0.00013548387096774193, |
|
"loss": 0.4102, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.8633623123168945, |
|
"learning_rate": 0.0001314516129032258, |
|
"loss": 0.1299, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_accuracy": 0.6301369863013698, |
|
"eval_loss": 1.2576818466186523, |
|
"eval_runtime": 8.1086, |
|
"eval_samples_per_second": 27.008, |
|
"eval_steps_per_second": 3.453, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 4.694212436676025, |
|
"learning_rate": 0.0001274193548387097, |
|
"loss": 0.2189, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 3.287609338760376, |
|
"learning_rate": 0.00012338709677419356, |
|
"loss": 0.1625, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 3.42903470993042, |
|
"learning_rate": 0.00011935483870967743, |
|
"loss": 0.2562, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.6894977168949772, |
|
"eval_loss": 1.092441439628601, |
|
"eval_runtime": 8.405, |
|
"eval_samples_per_second": 26.056, |
|
"eval_steps_per_second": 3.331, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 3.9116501808166504, |
|
"learning_rate": 0.00011532258064516131, |
|
"loss": 0.3386, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.250923752784729, |
|
"learning_rate": 0.00011129032258064515, |
|
"loss": 0.2573, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 1.1285330057144165, |
|
"eval_runtime": 8.1457, |
|
"eval_samples_per_second": 26.885, |
|
"eval_steps_per_second": 3.437, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.8015751242637634, |
|
"learning_rate": 0.00010725806451612903, |
|
"loss": 0.1359, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 3.1585347652435303, |
|
"learning_rate": 0.0001032258064516129, |
|
"loss": 0.2325, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 1.220850944519043, |
|
"learning_rate": 9.919354838709678e-05, |
|
"loss": 0.2471, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.6255707762557078, |
|
"eval_loss": 1.3386634588241577, |
|
"eval_runtime": 8.4514, |
|
"eval_samples_per_second": 25.913, |
|
"eval_steps_per_second": 3.313, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.14146128296852112, |
|
"learning_rate": 9.516129032258065e-05, |
|
"loss": 0.0773, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 0.13016600906848907, |
|
"learning_rate": 9.112903225806452e-05, |
|
"loss": 0.0618, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.2246029376983643, |
|
"eval_runtime": 7.483, |
|
"eval_samples_per_second": 29.267, |
|
"eval_steps_per_second": 3.742, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 0.1758795827627182, |
|
"learning_rate": 8.709677419354839e-05, |
|
"loss": 0.108, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 1.5025098323822021, |
|
"learning_rate": 8.306451612903227e-05, |
|
"loss": 0.0662, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 8.820311546325684, |
|
"learning_rate": 7.903225806451613e-05, |
|
"loss": 0.0658, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.634703196347032, |
|
"eval_loss": 1.4131733179092407, |
|
"eval_runtime": 8.0515, |
|
"eval_samples_per_second": 27.2, |
|
"eval_steps_per_second": 3.478, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.2889515161514282, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.1059, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"grad_norm": 0.25573813915252686, |
|
"learning_rate": 7.096774193548388e-05, |
|
"loss": 0.0592, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.6529680365296804, |
|
"eval_loss": 1.4326461553573608, |
|
"eval_runtime": 8.3973, |
|
"eval_samples_per_second": 26.08, |
|
"eval_steps_per_second": 3.334, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 4.445276260375977, |
|
"learning_rate": 6.693548387096774e-05, |
|
"loss": 0.0717, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 0.5493516325950623, |
|
"learning_rate": 6.290322580645161e-05, |
|
"loss": 0.0981, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 3.7721645832061768, |
|
"learning_rate": 5.887096774193549e-05, |
|
"loss": 0.0464, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.684931506849315, |
|
"eval_loss": 1.2483667135238647, |
|
"eval_runtime": 7.3404, |
|
"eval_samples_per_second": 29.835, |
|
"eval_steps_per_second": 3.815, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.07467525452375412, |
|
"learning_rate": 5.4838709677419355e-05, |
|
"loss": 0.0249, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.5583500862121582, |
|
"learning_rate": 5.080645161290323e-05, |
|
"loss": 0.0567, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.634703196347032, |
|
"eval_loss": 1.5350439548492432, |
|
"eval_runtime": 7.9868, |
|
"eval_samples_per_second": 27.42, |
|
"eval_steps_per_second": 3.506, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 0.06753262132406235, |
|
"learning_rate": 4.67741935483871e-05, |
|
"loss": 0.0185, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 0.0861305296421051, |
|
"learning_rate": 4.2741935483870973e-05, |
|
"loss": 0.0179, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 0.07843153923749924, |
|
"learning_rate": 3.870967741935484e-05, |
|
"loss": 0.0269, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.479716181755066, |
|
"eval_runtime": 10.862, |
|
"eval_samples_per_second": 20.162, |
|
"eval_steps_per_second": 2.578, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"grad_norm": 0.5874373912811279, |
|
"learning_rate": 3.467741935483872e-05, |
|
"loss": 0.0181, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"grad_norm": 0.15166939795017242, |
|
"learning_rate": 3.0645161290322585e-05, |
|
"loss": 0.0239, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_accuracy": 0.6529680365296804, |
|
"eval_loss": 1.444357991218567, |
|
"eval_runtime": 8.4171, |
|
"eval_samples_per_second": 26.019, |
|
"eval_steps_per_second": 3.327, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 0.2967914938926697, |
|
"learning_rate": 2.661290322580645e-05, |
|
"loss": 0.0217, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 0.084845632314682, |
|
"learning_rate": 2.258064516129032e-05, |
|
"loss": 0.021, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"grad_norm": 0.06501659750938416, |
|
"learning_rate": 1.8548387096774193e-05, |
|
"loss": 0.0184, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.6575342465753424, |
|
"eval_loss": 1.4473841190338135, |
|
"eval_runtime": 8.0514, |
|
"eval_samples_per_second": 27.2, |
|
"eval_steps_per_second": 3.478, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 0.09078585356473923, |
|
"learning_rate": 1.4516129032258066e-05, |
|
"loss": 0.0162, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 0.06605294346809387, |
|
"learning_rate": 1.0483870967741936e-05, |
|
"loss": 0.0286, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 1.4620742797851562, |
|
"eval_runtime": 8.3261, |
|
"eval_samples_per_second": 26.303, |
|
"eval_steps_per_second": 3.363, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 0.05681619793176651, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 0.0162, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 0.07769191265106201, |
|
"learning_rate": 2.4193548387096776e-06, |
|
"loss": 0.0261, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 496, |
|
"total_flos": 6.10974224738132e+17, |
|
"train_loss": 0.18820726821920084, |
|
"train_runtime": 922.3598, |
|
"train_samples_per_second": 8.548, |
|
"train_steps_per_second": 0.538 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 496, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 25, |
|
"total_flos": 6.10974224738132e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|