|
{ |
|
"best_metric": 0.9096045197740112, |
|
"best_model_checkpoint": "convnextv2-base-22k-224-finetuned-eurosat-2/checkpoint-694", |
|
"epoch": 9.974811083123425, |
|
"eval_steps": 500, |
|
"global_step": 990, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 1.2172, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 1.0082, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.8736, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.6853, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 0.7756, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.71, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.5979, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.6413, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.5227, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7796610169491526, |
|
"eval_loss": 0.5333397388458252, |
|
"eval_runtime": 5.7182, |
|
"eval_samples_per_second": 30.954, |
|
"eval_steps_per_second": 7.87, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.994388327721661e-05, |
|
"loss": 0.5778, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.5749, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.882154882154882e-05, |
|
"loss": 0.4366, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.8260381593714935e-05, |
|
"loss": 0.3629, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.7699214365881036e-05, |
|
"loss": 0.5, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.713804713804714e-05, |
|
"loss": 0.4734, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.6576879910213244e-05, |
|
"loss": 0.4109, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.601571268237935e-05, |
|
"loss": 0.5251, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.3881, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.4893378226711566e-05, |
|
"loss": 0.4248, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.8531073446327684, |
|
"eval_loss": 0.4144964814186096, |
|
"eval_runtime": 5.6437, |
|
"eval_samples_per_second": 31.363, |
|
"eval_steps_per_second": 7.974, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.433221099887767e-05, |
|
"loss": 0.3864, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.3771043771043774e-05, |
|
"loss": 0.2727, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.3986, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.264870931537598e-05, |
|
"loss": 0.2392, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.208754208754209e-05, |
|
"loss": 0.357, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.15263748597082e-05, |
|
"loss": 0.2285, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.0965207631874305e-05, |
|
"loss": 0.3227, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.1759, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.984287317620651e-05, |
|
"loss": 0.3358, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.9281705948372613e-05, |
|
"loss": 0.2998, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.8757062146892656, |
|
"eval_loss": 0.33070680499076843, |
|
"eval_runtime": 5.6516, |
|
"eval_samples_per_second": 31.318, |
|
"eval_steps_per_second": 7.962, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.872053872053872e-05, |
|
"loss": 0.4704, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.815937149270483e-05, |
|
"loss": 0.1983, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.7598204264870936e-05, |
|
"loss": 0.188, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2139, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.6475869809203144e-05, |
|
"loss": 0.2216, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.5914702581369245e-05, |
|
"loss": 0.2099, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.278, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3.4792368125701466e-05, |
|
"loss": 0.226, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.423120089786757e-05, |
|
"loss": 0.169, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.3670033670033675e-05, |
|
"loss": 0.1704, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8926553672316384, |
|
"eval_loss": 0.26643165946006775, |
|
"eval_runtime": 5.6592, |
|
"eval_samples_per_second": 31.277, |
|
"eval_steps_per_second": 7.952, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.3108866442199775e-05, |
|
"loss": 0.1063, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.254769921436588e-05, |
|
"loss": 0.2143, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.198653198653199e-05, |
|
"loss": 0.1802, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 3.14253647586981e-05, |
|
"loss": 0.1159, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.1606, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.1828, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.9741863075196406e-05, |
|
"loss": 0.1338, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.9180695847362517e-05, |
|
"loss": 0.208, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 2.8619528619528618e-05, |
|
"loss": 0.1392, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 2.8058361391694725e-05, |
|
"loss": 0.0684, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8700564971751412, |
|
"eval_loss": 0.4352767765522003, |
|
"eval_runtime": 5.6244, |
|
"eval_samples_per_second": 31.47, |
|
"eval_steps_per_second": 8.001, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 2.7497194163860833e-05, |
|
"loss": 0.1994, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 2.6936026936026937e-05, |
|
"loss": 0.1901, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 2.6374859708193044e-05, |
|
"loss": 0.1812, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 2.581369248035915e-05, |
|
"loss": 0.1002, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 0.0685, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.1548, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 2.4130190796857467e-05, |
|
"loss": 0.1409, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 2.356902356902357e-05, |
|
"loss": 0.1636, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 2.3007856341189676e-05, |
|
"loss": 0.1475, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 2.2446689113355783e-05, |
|
"loss": 0.1546, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.8870056497175142, |
|
"eval_loss": 0.39201802015304565, |
|
"eval_runtime": 5.5831, |
|
"eval_samples_per_second": 31.703, |
|
"eval_steps_per_second": 8.06, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 2.1885521885521887e-05, |
|
"loss": 0.1232, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 2.132435465768799e-05, |
|
"loss": 0.0651, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 2.07631874298541e-05, |
|
"loss": 0.0332, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.0536, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1.9640852974186307e-05, |
|
"loss": 0.2526, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.9079685746352414e-05, |
|
"loss": 0.218, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0199, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.7957351290684622e-05, |
|
"loss": 0.0679, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.7396184062850733e-05, |
|
"loss": 0.1341, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.6835016835016837e-05, |
|
"loss": 0.0593, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.9096045197740112, |
|
"eval_loss": 0.3801310956478119, |
|
"eval_runtime": 5.6406, |
|
"eval_samples_per_second": 31.379, |
|
"eval_steps_per_second": 7.978, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.627384960718294e-05, |
|
"loss": 0.0364, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.571268237934905e-05, |
|
"loss": 0.0853, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.1463, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 1.4590347923681259e-05, |
|
"loss": 0.0741, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 1.4029180695847363e-05, |
|
"loss": 0.0982, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 1.3468013468013468e-05, |
|
"loss": 0.0335, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 1.2906846240179574e-05, |
|
"loss": 0.0494, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.0645, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 1.1784511784511786e-05, |
|
"loss": 0.0971, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 1.1223344556677892e-05, |
|
"loss": 0.0745, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8983050847457628, |
|
"eval_loss": 0.40303125977516174, |
|
"eval_runtime": 5.6355, |
|
"eval_samples_per_second": 31.408, |
|
"eval_steps_per_second": 7.985, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 1.0662177328843996e-05, |
|
"loss": 0.095, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 0.0438, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 9.539842873176207e-06, |
|
"loss": 0.0136, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.978675645342311e-06, |
|
"loss": 0.0702, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 8.417508417508419e-06, |
|
"loss": 0.0943, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.856341189674524e-06, |
|
"loss": 0.0746, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 7.295173961840629e-06, |
|
"loss": 0.0562, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.734006734006734e-06, |
|
"loss": 0.0372, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.0282, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 5.611672278338946e-06, |
|
"loss": 0.0877, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.903954802259887, |
|
"eval_loss": 0.38457363843917847, |
|
"eval_runtime": 5.5773, |
|
"eval_samples_per_second": 31.736, |
|
"eval_steps_per_second": 8.068, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 0.0087, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 4.489337822671156e-06, |
|
"loss": 0.1049, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 3.928170594837262e-06, |
|
"loss": 0.0294, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3.367003367003367e-06, |
|
"loss": 0.0892, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.805836139169473e-06, |
|
"loss": 0.1025, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.244668911335578e-06, |
|
"loss": 0.1009, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.6835016835016836e-06, |
|
"loss": 0.0784, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.122334455667789e-06, |
|
"loss": 0.0662, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 5.611672278338944e-07, |
|
"loss": 0.0257, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 0.0, |
|
"loss": 0.09, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_accuracy": 0.903954802259887, |
|
"eval_loss": 0.3815687894821167, |
|
"eval_runtime": 5.5808, |
|
"eval_samples_per_second": 31.716, |
|
"eval_steps_per_second": 8.063, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"step": 990, |
|
"total_flos": 1.252454560310016e+18, |
|
"train_loss": 0.23395052750905354, |
|
"train_runtime": 1693.9771, |
|
"train_samples_per_second": 9.357, |
|
"train_steps_per_second": 0.584 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 990, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.252454560310016e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|