|
{ |
|
"best_metric": 0.855129650507328, |
|
"best_model_checkpoint": "vit-large-patch16-224-finetuned-galaxy10-decals/checkpoint-906", |
|
"epoch": 29.76, |
|
"eval_steps": 500, |
|
"global_step": 930, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.0752688172043012e-05, |
|
"loss": 2.2239, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.1505376344086024e-05, |
|
"loss": 1.609, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 1.0358, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7361894024802705, |
|
"eval_f1": 0.7266397888949025, |
|
"eval_loss": 0.7598127126693726, |
|
"eval_precision": 0.7424926838922145, |
|
"eval_recall": 0.7361894024802705, |
|
"eval_runtime": 17.3826, |
|
"eval_samples_per_second": 102.056, |
|
"eval_steps_per_second": 0.805, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.301075268817205e-05, |
|
"loss": 0.8138, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 5.3763440860215054e-05, |
|
"loss": 0.7072, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 6.451612903225807e-05, |
|
"loss": 0.6822, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_accuracy": 0.7919954904171365, |
|
"eval_f1": 0.7898931758849426, |
|
"eval_loss": 0.6136134266853333, |
|
"eval_precision": 0.7971665728434488, |
|
"eval_recall": 0.7919954904171365, |
|
"eval_runtime": 17.1163, |
|
"eval_samples_per_second": 103.644, |
|
"eval_steps_per_second": 0.818, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.526881720430108e-05, |
|
"loss": 0.6682, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.60215053763441e-05, |
|
"loss": 0.6424, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 9.677419354838711e-05, |
|
"loss": 0.6535, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.8105975197294251, |
|
"eval_f1": 0.8061910960708782, |
|
"eval_loss": 0.5416004061698914, |
|
"eval_precision": 0.8140360122948396, |
|
"eval_recall": 0.8105975197294251, |
|
"eval_runtime": 17.566, |
|
"eval_samples_per_second": 100.99, |
|
"eval_steps_per_second": 0.797, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.916367980884111e-05, |
|
"loss": 0.6169, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 9.79689366786141e-05, |
|
"loss": 0.6022, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 9.677419354838711e-05, |
|
"loss": 0.5696, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8162344983089064, |
|
"eval_f1": 0.8139861710754493, |
|
"eval_loss": 0.5304952263832092, |
|
"eval_precision": 0.8195322178917354, |
|
"eval_recall": 0.8162344983089064, |
|
"eval_runtime": 17.3334, |
|
"eval_samples_per_second": 102.346, |
|
"eval_steps_per_second": 0.808, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 9.55794504181601e-05, |
|
"loss": 0.5455, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 9.438470728793309e-05, |
|
"loss": 0.5421, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 9.31899641577061e-05, |
|
"loss": 0.5435, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.814543404735062, |
|
"eval_f1": 0.8161156144737257, |
|
"eval_loss": 0.5555492043495178, |
|
"eval_precision": 0.8241810477987659, |
|
"eval_recall": 0.814543404735062, |
|
"eval_runtime": 17.1642, |
|
"eval_samples_per_second": 103.354, |
|
"eval_steps_per_second": 0.816, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 9.199522102747909e-05, |
|
"loss": 0.5186, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 9.080047789725208e-05, |
|
"loss": 0.5074, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 8.960573476702509e-05, |
|
"loss": 0.4621, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_accuracy": 0.8297632468996617, |
|
"eval_f1": 0.8254361806374156, |
|
"eval_loss": 0.5074881315231323, |
|
"eval_precision": 0.8343689618318259, |
|
"eval_recall": 0.8297632468996617, |
|
"eval_runtime": 17.1936, |
|
"eval_samples_per_second": 103.178, |
|
"eval_steps_per_second": 0.814, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 8.84109916367981e-05, |
|
"loss": 0.4713, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 8.72162485065711e-05, |
|
"loss": 0.4516, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 8.60215053763441e-05, |
|
"loss": 0.4479, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_accuracy": 0.8280721533258174, |
|
"eval_f1": 0.8268976469809478, |
|
"eval_loss": 0.5117892622947693, |
|
"eval_precision": 0.8290711099056762, |
|
"eval_recall": 0.8280721533258174, |
|
"eval_runtime": 17.1887, |
|
"eval_samples_per_second": 103.207, |
|
"eval_steps_per_second": 0.814, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 8.482676224611709e-05, |
|
"loss": 0.4864, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 8.363201911589009e-05, |
|
"loss": 0.4469, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 8.243727598566309e-05, |
|
"loss": 0.4427, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 8.124253285543608e-05, |
|
"loss": 0.4318, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8196166854565953, |
|
"eval_f1": 0.8165810295817906, |
|
"eval_loss": 0.5163812041282654, |
|
"eval_precision": 0.8254762320844335, |
|
"eval_recall": 0.8196166854565953, |
|
"eval_runtime": 17.2603, |
|
"eval_samples_per_second": 102.779, |
|
"eval_steps_per_second": 0.811, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.004778972520908e-05, |
|
"loss": 0.3936, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 7.885304659498209e-05, |
|
"loss": 0.4184, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 7.765830346475508e-05, |
|
"loss": 0.4011, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8410372040586246, |
|
"eval_f1": 0.8361869699508147, |
|
"eval_loss": 0.5086681246757507, |
|
"eval_precision": 0.8369303660828149, |
|
"eval_recall": 0.8410372040586246, |
|
"eval_runtime": 17.2372, |
|
"eval_samples_per_second": 102.917, |
|
"eval_steps_per_second": 0.812, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 7.646356033452809e-05, |
|
"loss": 0.3623, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 7.526881720430108e-05, |
|
"loss": 0.3859, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.355, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_accuracy": 0.8410372040586246, |
|
"eval_f1": 0.8405397327667177, |
|
"eval_loss": 0.5063195824623108, |
|
"eval_precision": 0.8432750192889569, |
|
"eval_recall": 0.8410372040586246, |
|
"eval_runtime": 16.9471, |
|
"eval_samples_per_second": 104.678, |
|
"eval_steps_per_second": 0.826, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 7.287933094384708e-05, |
|
"loss": 0.3546, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 7.168458781362007e-05, |
|
"loss": 0.3651, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 7.048984468339306e-05, |
|
"loss": 0.3655, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_accuracy": 0.8325817361894025, |
|
"eval_f1": 0.8304645126548728, |
|
"eval_loss": 0.5418781042098999, |
|
"eval_precision": 0.8342869154143101, |
|
"eval_recall": 0.8325817361894025, |
|
"eval_runtime": 17.1542, |
|
"eval_samples_per_second": 103.415, |
|
"eval_steps_per_second": 0.816, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 6.929510155316607e-05, |
|
"loss": 0.321, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 6.810035842293908e-05, |
|
"loss": 0.3149, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 6.690561529271207e-05, |
|
"loss": 0.3292, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8438556933483653, |
|
"eval_f1": 0.8414694905603178, |
|
"eval_loss": 0.5134132504463196, |
|
"eval_precision": 0.8441999477281247, |
|
"eval_recall": 0.8438556933483653, |
|
"eval_runtime": 17.5078, |
|
"eval_samples_per_second": 101.326, |
|
"eval_steps_per_second": 0.8, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 6.571087216248507e-05, |
|
"loss": 0.307, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 6.451612903225807e-05, |
|
"loss": 0.2954, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 6.332138590203107e-05, |
|
"loss": 0.3207, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.818489289740699, |
|
"eval_f1": 0.8178062229123537, |
|
"eval_loss": 0.6284513473510742, |
|
"eval_precision": 0.8292555687045076, |
|
"eval_recall": 0.818489289740699, |
|
"eval_runtime": 17.3492, |
|
"eval_samples_per_second": 102.253, |
|
"eval_steps_per_second": 0.807, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 6.212664277180407e-05, |
|
"loss": 0.3036, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 6.093189964157706e-05, |
|
"loss": 0.2996, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 13.76, |
|
"learning_rate": 5.9737156511350064e-05, |
|
"loss": 0.2931, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_accuracy": 0.8382187147688839, |
|
"eval_f1": 0.8370541465865403, |
|
"eval_loss": 0.5627130270004272, |
|
"eval_precision": 0.839486824110364, |
|
"eval_recall": 0.8382187147688839, |
|
"eval_runtime": 17.3615, |
|
"eval_samples_per_second": 102.18, |
|
"eval_steps_per_second": 0.806, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 5.8542413381123063e-05, |
|
"loss": 0.285, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 5.7347670250896056e-05, |
|
"loss": 0.2668, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 5.615292712066906e-05, |
|
"loss": 0.2817, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"eval_accuracy": 0.8207440811724915, |
|
"eval_f1": 0.8215045979425666, |
|
"eval_loss": 0.6059415936470032, |
|
"eval_precision": 0.8277800413747494, |
|
"eval_recall": 0.8207440811724915, |
|
"eval_runtime": 17.4368, |
|
"eval_samples_per_second": 101.739, |
|
"eval_steps_per_second": 0.803, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 5.495818399044206e-05, |
|
"loss": 0.2788, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 5.3763440860215054e-05, |
|
"loss": 0.2477, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 5.256869772998806e-05, |
|
"loss": 0.2632, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 5.137395459976105e-05, |
|
"loss": 0.2713, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8382187147688839, |
|
"eval_f1": 0.833707206410208, |
|
"eval_loss": 0.6140456795692444, |
|
"eval_precision": 0.8367410596472065, |
|
"eval_recall": 0.8382187147688839, |
|
"eval_runtime": 16.853, |
|
"eval_samples_per_second": 105.263, |
|
"eval_steps_per_second": 0.831, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 5.017921146953405e-05, |
|
"loss": 0.2436, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 4.898446833930705e-05, |
|
"loss": 0.2609, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 4.778972520908005e-05, |
|
"loss": 0.233, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8382187147688839, |
|
"eval_f1": 0.8374417130087097, |
|
"eval_loss": 0.5992226004600525, |
|
"eval_precision": 0.8384211210396012, |
|
"eval_recall": 0.8382187147688839, |
|
"eval_runtime": 17.2766, |
|
"eval_samples_per_second": 102.682, |
|
"eval_steps_per_second": 0.81, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 4.659498207885305e-05, |
|
"loss": 0.2495, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.540023894862604e-05, |
|
"loss": 0.2439, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 4.420549581839905e-05, |
|
"loss": 0.2313, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"eval_accuracy": 0.8291995490417137, |
|
"eval_f1": 0.8277700567784572, |
|
"eval_loss": 0.6679310202598572, |
|
"eval_precision": 0.8343220081023114, |
|
"eval_recall": 0.8291995490417137, |
|
"eval_runtime": 17.1635, |
|
"eval_samples_per_second": 103.359, |
|
"eval_steps_per_second": 0.816, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 4.301075268817205e-05, |
|
"loss": 0.2372, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 4.1816009557945046e-05, |
|
"loss": 0.2221, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 18.88, |
|
"learning_rate": 4.062126642771804e-05, |
|
"loss": 0.223, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"eval_accuracy": 0.8342728297632469, |
|
"eval_f1": 0.8347284925921231, |
|
"eval_loss": 0.650068461894989, |
|
"eval_precision": 0.83863912442667, |
|
"eval_recall": 0.8342728297632469, |
|
"eval_runtime": 17.3595, |
|
"eval_samples_per_second": 102.192, |
|
"eval_steps_per_second": 0.806, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 3.9426523297491045e-05, |
|
"loss": 0.2233, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 3.8231780167264044e-05, |
|
"loss": 0.2252, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2126, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8342728297632469, |
|
"eval_f1": 0.8296208996152092, |
|
"eval_loss": 0.6730501651763916, |
|
"eval_precision": 0.8304365724870174, |
|
"eval_recall": 0.8342728297632469, |
|
"eval_runtime": 17.1049, |
|
"eval_samples_per_second": 103.713, |
|
"eval_steps_per_second": 0.818, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 3.5842293906810036e-05, |
|
"loss": 0.2106, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 20.48, |
|
"learning_rate": 3.4647550776583035e-05, |
|
"loss": 0.2086, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 3.3452807646356034e-05, |
|
"loss": 0.2078, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.8387824126268321, |
|
"eval_f1": 0.8383399204498011, |
|
"eval_loss": 0.6334655284881592, |
|
"eval_precision": 0.8409778004650714, |
|
"eval_recall": 0.8387824126268321, |
|
"eval_runtime": 16.9486, |
|
"eval_samples_per_second": 104.669, |
|
"eval_steps_per_second": 0.826, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 0.2071, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 3.106332138590203e-05, |
|
"loss": 0.2069, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 2.9868578255675032e-05, |
|
"loss": 0.201, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"eval_accuracy": 0.850620067643743, |
|
"eval_f1": 0.8485040018671677, |
|
"eval_loss": 0.6119940876960754, |
|
"eval_precision": 0.84779749511018, |
|
"eval_recall": 0.850620067643743, |
|
"eval_runtime": 17.2379, |
|
"eval_samples_per_second": 102.913, |
|
"eval_steps_per_second": 0.812, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"learning_rate": 2.8673835125448028e-05, |
|
"loss": 0.2058, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 2.747909199522103e-05, |
|
"loss": 0.1894, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 22.72, |
|
"learning_rate": 2.628434886499403e-05, |
|
"loss": 0.2045, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"eval_accuracy": 0.8410372040586246, |
|
"eval_f1": 0.8370636059789568, |
|
"eval_loss": 0.6590279340744019, |
|
"eval_precision": 0.8389140338894852, |
|
"eval_recall": 0.8410372040586246, |
|
"eval_runtime": 17.4487, |
|
"eval_samples_per_second": 101.669, |
|
"eval_steps_per_second": 0.802, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"learning_rate": 2.5089605734767026e-05, |
|
"loss": 0.2042, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 2.3894862604540025e-05, |
|
"loss": 0.2018, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 2.270011947431302e-05, |
|
"loss": 0.201, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2.1505376344086024e-05, |
|
"loss": 0.1759, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8489289740698985, |
|
"eval_f1": 0.8457283475787394, |
|
"eval_loss": 0.6477726697921753, |
|
"eval_precision": 0.8463914374344644, |
|
"eval_recall": 0.8489289740698985, |
|
"eval_runtime": 17.1928, |
|
"eval_samples_per_second": 103.183, |
|
"eval_steps_per_second": 0.814, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 2.031063321385902e-05, |
|
"loss": 0.2004, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 1.9115890083632022e-05, |
|
"loss": 0.1828, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"learning_rate": 1.7921146953405018e-05, |
|
"loss": 0.1856, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.8444193912063134, |
|
"eval_f1": 0.8420114736469138, |
|
"eval_loss": 0.6604239344596863, |
|
"eval_precision": 0.8413499043245124, |
|
"eval_recall": 0.8444193912063134, |
|
"eval_runtime": 17.0578, |
|
"eval_samples_per_second": 103.999, |
|
"eval_steps_per_second": 0.821, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 25.28, |
|
"learning_rate": 1.6726403823178017e-05, |
|
"loss": 0.1685, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 1.5531660692951016e-05, |
|
"loss": 0.1717, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 1.4336917562724014e-05, |
|
"loss": 0.1766, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"eval_accuracy": 0.8500563697857948, |
|
"eval_f1": 0.8484278487289038, |
|
"eval_loss": 0.6922361850738525, |
|
"eval_precision": 0.8491261339823137, |
|
"eval_recall": 0.8500563697857948, |
|
"eval_runtime": 17.4796, |
|
"eval_samples_per_second": 101.49, |
|
"eval_steps_per_second": 0.801, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"learning_rate": 1.3142174432497015e-05, |
|
"loss": 0.1839, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 26.56, |
|
"learning_rate": 1.1947431302270013e-05, |
|
"loss": 0.1758, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 26.88, |
|
"learning_rate": 1.0752688172043012e-05, |
|
"loss": 0.1841, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 26.98, |
|
"eval_accuracy": 0.8500563697857948, |
|
"eval_f1": 0.8485537434372422, |
|
"eval_loss": 0.6484794616699219, |
|
"eval_precision": 0.8493151913843657, |
|
"eval_recall": 0.8500563697857948, |
|
"eval_runtime": 17.1293, |
|
"eval_samples_per_second": 103.565, |
|
"eval_steps_per_second": 0.817, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 9.557945041816011e-06, |
|
"loss": 0.1818, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 27.52, |
|
"learning_rate": 8.363201911589009e-06, |
|
"loss": 0.1798, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 7.168458781362007e-06, |
|
"loss": 0.1707, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.846674182638106, |
|
"eval_f1": 0.844633535513836, |
|
"eval_loss": 0.6392838954925537, |
|
"eval_precision": 0.8439910314731612, |
|
"eval_recall": 0.846674182638106, |
|
"eval_runtime": 17.3476, |
|
"eval_samples_per_second": 102.262, |
|
"eval_steps_per_second": 0.807, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 28.16, |
|
"learning_rate": 5.973715651135006e-06, |
|
"loss": 0.1883, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 28.48, |
|
"learning_rate": 4.7789725209080055e-06, |
|
"loss": 0.1712, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 3.5842293906810035e-06, |
|
"loss": 0.1792, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.855129650507328, |
|
"eval_f1": 0.8525596225280389, |
|
"eval_loss": 0.6403793692588806, |
|
"eval_precision": 0.8525018926508404, |
|
"eval_recall": 0.855129650507328, |
|
"eval_runtime": 17.1962, |
|
"eval_samples_per_second": 103.162, |
|
"eval_steps_per_second": 0.814, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 29.12, |
|
"learning_rate": 2.3894862604540028e-06, |
|
"loss": 0.1795, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 1.1947431302270014e-06, |
|
"loss": 0.1619, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"learning_rate": 0.0, |
|
"loss": 0.1713, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"eval_accuracy": 0.8534385569334837, |
|
"eval_f1": 0.851097208124706, |
|
"eval_loss": 0.6397678256034851, |
|
"eval_precision": 0.8513110036968645, |
|
"eval_recall": 0.8534385569334837, |
|
"eval_runtime": 17.1103, |
|
"eval_samples_per_second": 103.68, |
|
"eval_steps_per_second": 0.818, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 29.76, |
|
"step": 930, |
|
"total_flos": 1.3013814188701234e+20, |
|
"train_loss": 0.3639387649874533, |
|
"train_runtime": 9909.908, |
|
"train_samples_per_second": 48.321, |
|
"train_steps_per_second": 0.094 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 930, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 1.3013814188701234e+20, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|