lombardata's picture
Evaluation on the test set completed on 2024_09_18.
55a2a36 verified
{
"best_metric": 0.2951599955558777,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/drone_DinoVdeau-large-2024_09_18-batch-size64_epochs100_freeze/checkpoint-11584",
"epoch": 74.0,
"eval_steps": 500,
"global_step": 13394,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.240894901144641,
"eval_f1_macro": 0.5745037518359608,
"eval_f1_micro": 0.8203088441740758,
"eval_loss": 0.33405476808547974,
"eval_roc_auc": 0.8507572583590943,
"eval_runtime": 70.5588,
"eval_samples_per_second": 54.479,
"eval_steps_per_second": 0.865,
"learning_rate": 0.001,
"step": 181
},
{
"epoch": 2.0,
"eval_accuracy": 0.24011446409989595,
"eval_f1_macro": 0.5964117272843149,
"eval_f1_micro": 0.8326538829332126,
"eval_loss": 0.31857866048812866,
"eval_roc_auc": 0.861811323650532,
"eval_runtime": 79.7549,
"eval_samples_per_second": 48.198,
"eval_steps_per_second": 0.765,
"learning_rate": 0.001,
"step": 362
},
{
"epoch": 2.7624309392265194,
"grad_norm": 0.4326592683792114,
"learning_rate": 0.001,
"loss": 0.41,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.2515608740894901,
"eval_f1_macro": 0.5896466586231784,
"eval_f1_micro": 0.8322937087863127,
"eval_loss": 0.3189840316772461,
"eval_roc_auc": 0.8611107288488111,
"eval_runtime": 73.8912,
"eval_samples_per_second": 52.022,
"eval_steps_per_second": 0.826,
"learning_rate": 0.001,
"step": 543
},
{
"epoch": 4.0,
"eval_accuracy": 0.2572840790842872,
"eval_f1_macro": 0.5800422752857416,
"eval_f1_micro": 0.8322264673453166,
"eval_loss": 0.31490686535835266,
"eval_roc_auc": 0.8608139926082374,
"eval_runtime": 74.4071,
"eval_samples_per_second": 51.662,
"eval_steps_per_second": 0.82,
"learning_rate": 0.001,
"step": 724
},
{
"epoch": 5.0,
"eval_accuracy": 0.25260145681581686,
"eval_f1_macro": 0.5992015270576083,
"eval_f1_micro": 0.831389835721059,
"eval_loss": 0.3153345584869385,
"eval_roc_auc": 0.860148186700414,
"eval_runtime": 75.6696,
"eval_samples_per_second": 50.8,
"eval_steps_per_second": 0.806,
"learning_rate": 0.001,
"step": 905
},
{
"epoch": 5.524861878453039,
"grad_norm": 0.3112102448940277,
"learning_rate": 0.001,
"loss": 0.3412,
"step": 1000
},
{
"epoch": 6.0,
"eval_accuracy": 0.2505202913631634,
"eval_f1_macro": 0.6058921391497434,
"eval_f1_micro": 0.8368403933244926,
"eval_loss": 0.3146817088127136,
"eval_roc_auc": 0.8655139600288801,
"eval_runtime": 71.4939,
"eval_samples_per_second": 53.767,
"eval_steps_per_second": 0.853,
"learning_rate": 0.001,
"step": 1086
},
{
"epoch": 7.0,
"eval_accuracy": 0.2643080124869927,
"eval_f1_macro": 0.5671643866070547,
"eval_f1_micro": 0.8297352130260625,
"eval_loss": 0.31298699975013733,
"eval_roc_auc": 0.858329231354068,
"eval_runtime": 72.5707,
"eval_samples_per_second": 52.969,
"eval_steps_per_second": 0.841,
"learning_rate": 0.001,
"step": 1267
},
{
"epoch": 8.0,
"eval_accuracy": 0.2643080124869927,
"eval_f1_macro": 0.5794085884877922,
"eval_f1_micro": 0.8304935064935066,
"eval_loss": 0.31266748905181885,
"eval_roc_auc": 0.8589842911078356,
"eval_runtime": 68.868,
"eval_samples_per_second": 55.817,
"eval_steps_per_second": 0.886,
"learning_rate": 0.001,
"step": 1448
},
{
"epoch": 8.287292817679559,
"grad_norm": 0.2548049986362457,
"learning_rate": 0.001,
"loss": 0.3338,
"step": 1500
},
{
"epoch": 9.0,
"eval_accuracy": 0.2700312174817898,
"eval_f1_macro": 0.5719440884615888,
"eval_f1_micro": 0.8326600372902424,
"eval_loss": 0.31311556696891785,
"eval_roc_auc": 0.8608317149678372,
"eval_runtime": 68.5911,
"eval_samples_per_second": 56.042,
"eval_steps_per_second": 0.889,
"learning_rate": 0.001,
"step": 1629
},
{
"epoch": 10.0,
"eval_accuracy": 0.2518210197710718,
"eval_f1_macro": 0.5897148649683502,
"eval_f1_micro": 0.8355268182165314,
"eval_loss": 0.3096640110015869,
"eval_roc_auc": 0.8637739072507734,
"eval_runtime": 68.7801,
"eval_samples_per_second": 55.888,
"eval_steps_per_second": 0.887,
"learning_rate": 0.001,
"step": 1810
},
{
"epoch": 11.0,
"eval_accuracy": 0.27419354838709675,
"eval_f1_macro": 0.5735365394193856,
"eval_f1_micro": 0.8332294264339152,
"eval_loss": 0.31233683228492737,
"eval_roc_auc": 0.8611851168597349,
"eval_runtime": 66.8379,
"eval_samples_per_second": 57.512,
"eval_steps_per_second": 0.913,
"learning_rate": 0.001,
"step": 1991
},
{
"epoch": 11.049723756906078,
"grad_norm": 0.24715092778205872,
"learning_rate": 0.001,
"loss": 0.3303,
"step": 2000
},
{
"epoch": 12.0,
"eval_accuracy": 0.27055150884495316,
"eval_f1_macro": 0.5805560421457799,
"eval_f1_micro": 0.8331433309151208,
"eval_loss": 0.3085036873817444,
"eval_roc_auc": 0.8612045319689331,
"eval_runtime": 67.4806,
"eval_samples_per_second": 56.965,
"eval_steps_per_second": 0.904,
"learning_rate": 0.001,
"step": 2172
},
{
"epoch": 13.0,
"eval_accuracy": 0.2645681581685744,
"eval_f1_macro": 0.5951616619649476,
"eval_f1_micro": 0.8347951336196919,
"eval_loss": 0.3079213500022888,
"eval_roc_auc": 0.8627997612661887,
"eval_runtime": 64.7469,
"eval_samples_per_second": 59.37,
"eval_steps_per_second": 0.942,
"learning_rate": 0.001,
"step": 2353
},
{
"epoch": 13.812154696132596,
"grad_norm": 0.2406352013349533,
"learning_rate": 0.001,
"loss": 0.3278,
"step": 2500
},
{
"epoch": 14.0,
"eval_accuracy": 0.2533818938605619,
"eval_f1_macro": 0.5969453701098729,
"eval_f1_micro": 0.8339762460663892,
"eval_loss": 0.31647807359695435,
"eval_roc_auc": 0.8626087016827515,
"eval_runtime": 65.4843,
"eval_samples_per_second": 58.701,
"eval_steps_per_second": 0.932,
"learning_rate": 0.001,
"step": 2534
},
{
"epoch": 15.0,
"eval_accuracy": 0.27601456815816855,
"eval_f1_macro": 0.5790027839889411,
"eval_f1_micro": 0.8351292272084352,
"eval_loss": 0.3074161410331726,
"eval_roc_auc": 0.8630771728562265,
"eval_runtime": 64.7162,
"eval_samples_per_second": 59.398,
"eval_steps_per_second": 0.943,
"learning_rate": 0.001,
"step": 2715
},
{
"epoch": 16.0,
"eval_accuracy": 0.26352757544224764,
"eval_f1_macro": 0.5888561594424128,
"eval_f1_micro": 0.8355780022446689,
"eval_loss": 0.3094619810581207,
"eval_roc_auc": 0.8637418270912655,
"eval_runtime": 70.1431,
"eval_samples_per_second": 54.802,
"eval_steps_per_second": 0.87,
"learning_rate": 0.001,
"step": 2896
},
{
"epoch": 16.574585635359117,
"grad_norm": 0.1948590725660324,
"learning_rate": 0.001,
"loss": 0.3273,
"step": 3000
},
{
"epoch": 17.0,
"eval_accuracy": 0.24739854318418314,
"eval_f1_macro": 0.6137563857640569,
"eval_f1_micro": 0.8395418326693228,
"eval_loss": 0.31031784415245056,
"eval_roc_auc": 0.8679706824480403,
"eval_runtime": 71.0586,
"eval_samples_per_second": 54.096,
"eval_steps_per_second": 0.858,
"learning_rate": 0.001,
"step": 3077
},
{
"epoch": 18.0,
"eval_accuracy": 0.2762747138397503,
"eval_f1_macro": 0.5714548681788686,
"eval_f1_micro": 0.8334075140725227,
"eval_loss": 0.3063325881958008,
"eval_roc_auc": 0.8610597084880822,
"eval_runtime": 70.7944,
"eval_samples_per_second": 54.298,
"eval_steps_per_second": 0.862,
"learning_rate": 0.001,
"step": 3258
},
{
"epoch": 19.0,
"eval_accuracy": 0.26560874089490116,
"eval_f1_macro": 0.5919936165947856,
"eval_f1_micro": 0.8336647470495572,
"eval_loss": 0.3109950125217438,
"eval_roc_auc": 0.8617398269664828,
"eval_runtime": 71.2213,
"eval_samples_per_second": 53.973,
"eval_steps_per_second": 0.856,
"learning_rate": 0.001,
"step": 3439
},
{
"epoch": 19.337016574585636,
"grad_norm": 0.19638165831565857,
"learning_rate": 0.001,
"loss": 0.324,
"step": 3500
},
{
"epoch": 20.0,
"eval_accuracy": 0.2596253902185224,
"eval_f1_macro": 0.5983991279635611,
"eval_f1_micro": 0.8375498995652063,
"eval_loss": 0.3072282373905182,
"eval_roc_auc": 0.8654821738600246,
"eval_runtime": 69.3213,
"eval_samples_per_second": 55.452,
"eval_steps_per_second": 0.88,
"learning_rate": 0.001,
"step": 3620
},
{
"epoch": 21.0,
"eval_accuracy": 0.2559833506763788,
"eval_f1_macro": 0.6089994809825815,
"eval_f1_micro": 0.8388680190333083,
"eval_loss": 0.30741065740585327,
"eval_roc_auc": 0.8671726276510136,
"eval_runtime": 71.9913,
"eval_samples_per_second": 53.395,
"eval_steps_per_second": 0.847,
"learning_rate": 0.001,
"step": 3801
},
{
"epoch": 22.0,
"eval_accuracy": 0.26560874089490116,
"eval_f1_macro": 0.580833002136296,
"eval_f1_micro": 0.8355413783345766,
"eval_loss": 0.3070025146007538,
"eval_roc_auc": 0.8634013163963336,
"eval_runtime": 70.2825,
"eval_samples_per_second": 54.694,
"eval_steps_per_second": 0.868,
"learning_rate": 0.001,
"step": 3982
},
{
"epoch": 22.099447513812155,
"grad_norm": 0.21378174424171448,
"learning_rate": 0.001,
"loss": 0.3263,
"step": 4000
},
{
"epoch": 23.0,
"eval_accuracy": 0.2627471383975026,
"eval_f1_macro": 0.6160376071910248,
"eval_f1_micro": 0.8388706831358709,
"eval_loss": 0.3077291250228882,
"eval_roc_auc": 0.8669128705765787,
"eval_runtime": 71.2675,
"eval_samples_per_second": 53.938,
"eval_steps_per_second": 0.856,
"learning_rate": 0.001,
"step": 4163
},
{
"epoch": 24.0,
"eval_accuracy": 0.26560874089490116,
"eval_f1_macro": 0.5881215193388629,
"eval_f1_micro": 0.8362589632218766,
"eval_loss": 0.30613505840301514,
"eval_roc_auc": 0.8640193969839625,
"eval_runtime": 71.2696,
"eval_samples_per_second": 53.936,
"eval_steps_per_second": 0.856,
"learning_rate": 0.001,
"step": 4344
},
{
"epoch": 24.861878453038674,
"grad_norm": 0.1702233850955963,
"learning_rate": 0.001,
"loss": 0.3244,
"step": 4500
},
{
"epoch": 25.0,
"eval_accuracy": 0.2663891779396462,
"eval_f1_macro": 0.6102297364067529,
"eval_f1_micro": 0.8402073587052726,
"eval_loss": 0.3043115735054016,
"eval_roc_auc": 0.8678931350845757,
"eval_runtime": 75.5305,
"eval_samples_per_second": 50.893,
"eval_steps_per_second": 0.808,
"learning_rate": 0.001,
"step": 4525
},
{
"epoch": 26.0,
"eval_accuracy": 0.26586888657648283,
"eval_f1_macro": 0.5805627478330566,
"eval_f1_micro": 0.8326726046439789,
"eval_loss": 0.31102412939071655,
"eval_roc_auc": 0.8610437919514087,
"eval_runtime": 75.2782,
"eval_samples_per_second": 51.064,
"eval_steps_per_second": 0.81,
"learning_rate": 0.001,
"step": 4706
},
{
"epoch": 27.0,
"eval_accuracy": 0.2713319458896982,
"eval_f1_macro": 0.585029632368333,
"eval_f1_micro": 0.8379808306709265,
"eval_loss": 0.30523133277893066,
"eval_roc_auc": 0.8656351026045385,
"eval_runtime": 73.6801,
"eval_samples_per_second": 52.171,
"eval_steps_per_second": 0.828,
"learning_rate": 0.001,
"step": 4887
},
{
"epoch": 27.624309392265193,
"grad_norm": 0.15147489309310913,
"learning_rate": 0.001,
"loss": 0.3257,
"step": 5000
},
{
"epoch": 28.0,
"eval_accuracy": 0.2663891779396462,
"eval_f1_macro": 0.5973785723470736,
"eval_f1_micro": 0.8397143725311454,
"eval_loss": 0.3030068874359131,
"eval_roc_auc": 0.8674308050491781,
"eval_runtime": 74.1975,
"eval_samples_per_second": 51.808,
"eval_steps_per_second": 0.822,
"learning_rate": 0.001,
"step": 5068
},
{
"epoch": 29.0,
"eval_accuracy": 0.2666493236212279,
"eval_f1_macro": 0.5901889833654033,
"eval_f1_micro": 0.8362408553742262,
"eval_loss": 0.30669936537742615,
"eval_roc_auc": 0.8641808816102908,
"eval_runtime": 71.9481,
"eval_samples_per_second": 53.427,
"eval_steps_per_second": 0.848,
"learning_rate": 0.001,
"step": 5249
},
{
"epoch": 30.0,
"eval_accuracy": 0.26352757544224764,
"eval_f1_macro": 0.5923557708061385,
"eval_f1_micro": 0.83628025477707,
"eval_loss": 0.3061116933822632,
"eval_roc_auc": 0.8643618491898142,
"eval_runtime": 77.3574,
"eval_samples_per_second": 49.691,
"eval_steps_per_second": 0.789,
"learning_rate": 0.001,
"step": 5430
},
{
"epoch": 30.386740331491712,
"grad_norm": 0.1558382362127304,
"learning_rate": 0.001,
"loss": 0.3243,
"step": 5500
},
{
"epoch": 31.0,
"eval_accuracy": 0.2708116545265349,
"eval_f1_macro": 0.5866580729382617,
"eval_f1_micro": 0.8373469177729752,
"eval_loss": 0.30276864767074585,
"eval_roc_auc": 0.8648652901745116,
"eval_runtime": 67.6643,
"eval_samples_per_second": 56.81,
"eval_steps_per_second": 0.902,
"learning_rate": 0.001,
"step": 5611
},
{
"epoch": 32.0,
"eval_accuracy": 0.25676378772112385,
"eval_f1_macro": 0.60937867416881,
"eval_f1_micro": 0.8387781935875391,
"eval_loss": 0.30601420998573303,
"eval_roc_auc": 0.8667133907993204,
"eval_runtime": 71.6477,
"eval_samples_per_second": 53.651,
"eval_steps_per_second": 0.851,
"learning_rate": 0.001,
"step": 5792
},
{
"epoch": 33.0,
"eval_accuracy": 0.26508844953173777,
"eval_f1_macro": 0.586599207562497,
"eval_f1_micro": 0.8341955642063208,
"eval_loss": 0.30689385533332825,
"eval_roc_auc": 0.8624638555872349,
"eval_runtime": 74.1949,
"eval_samples_per_second": 51.809,
"eval_steps_per_second": 0.822,
"learning_rate": 0.001,
"step": 5973
},
{
"epoch": 33.149171270718234,
"grad_norm": 0.1734226495027542,
"learning_rate": 0.001,
"loss": 0.3257,
"step": 6000
},
{
"epoch": 34.0,
"eval_accuracy": 0.2663891779396462,
"eval_f1_macro": 0.5901389994189593,
"eval_f1_micro": 0.8362871579163353,
"eval_loss": 0.3069196939468384,
"eval_roc_auc": 0.8641186401875288,
"eval_runtime": 74.4957,
"eval_samples_per_second": 51.6,
"eval_steps_per_second": 0.819,
"learning_rate": 0.001,
"step": 6154
},
{
"epoch": 35.0,
"eval_accuracy": 0.2627471383975026,
"eval_f1_macro": 0.6009387049254514,
"eval_f1_micro": 0.8379563532531105,
"eval_loss": 0.30412742495536804,
"eval_roc_auc": 0.8657129213439902,
"eval_runtime": 67.827,
"eval_samples_per_second": 56.674,
"eval_steps_per_second": 0.899,
"learning_rate": 0.001,
"step": 6335
},
{
"epoch": 35.91160220994475,
"grad_norm": 0.14143767952919006,
"learning_rate": 0.001,
"loss": 0.324,
"step": 6500
},
{
"epoch": 36.0,
"eval_accuracy": 0.2661290322580645,
"eval_f1_macro": 0.5947414485755244,
"eval_f1_micro": 0.8362663373469179,
"eval_loss": 0.30450889468193054,
"eval_roc_auc": 0.8639863234543288,
"eval_runtime": 72.0072,
"eval_samples_per_second": 53.384,
"eval_steps_per_second": 0.847,
"learning_rate": 0.001,
"step": 6516
},
{
"epoch": 37.0,
"eval_accuracy": 0.27601456815816855,
"eval_f1_macro": 0.599485078190572,
"eval_f1_micro": 0.8396123222507695,
"eval_loss": 0.30367332696914673,
"eval_roc_auc": 0.8671612725761473,
"eval_runtime": 69.1194,
"eval_samples_per_second": 55.614,
"eval_steps_per_second": 0.883,
"learning_rate": 0.001,
"step": 6697
},
{
"epoch": 38.0,
"eval_accuracy": 0.2736732570239334,
"eval_f1_macro": 0.5859770918782524,
"eval_f1_micro": 0.838785705136416,
"eval_loss": 0.3015189468860626,
"eval_roc_auc": 0.8662036068197154,
"eval_runtime": 67.6585,
"eval_samples_per_second": 56.815,
"eval_steps_per_second": 0.902,
"learning_rate": 0.0001,
"step": 6878
},
{
"epoch": 38.67403314917127,
"grad_norm": 0.17121317982673645,
"learning_rate": 0.0001,
"loss": 0.3203,
"step": 7000
},
{
"epoch": 39.0,
"eval_accuracy": 0.2736732570239334,
"eval_f1_macro": 0.5995341133824836,
"eval_f1_micro": 0.838145545925432,
"eval_loss": 0.30049240589141846,
"eval_roc_auc": 0.8656439298623401,
"eval_runtime": 68.079,
"eval_samples_per_second": 56.464,
"eval_steps_per_second": 0.896,
"learning_rate": 0.0001,
"step": 7059
},
{
"epoch": 40.0,
"eval_accuracy": 0.2695109261186264,
"eval_f1_macro": 0.6125564265199016,
"eval_f1_micro": 0.841743177276981,
"eval_loss": 0.30099743604660034,
"eval_roc_auc": 0.8691556197305788,
"eval_runtime": 66.33,
"eval_samples_per_second": 57.953,
"eval_steps_per_second": 0.92,
"learning_rate": 0.0001,
"step": 7240
},
{
"epoch": 41.0,
"eval_accuracy": 0.27419354838709675,
"eval_f1_macro": 0.6073049013268815,
"eval_f1_micro": 0.8403053435114504,
"eval_loss": 0.29900264739990234,
"eval_roc_auc": 0.8677200013927284,
"eval_runtime": 70.4045,
"eval_samples_per_second": 54.599,
"eval_steps_per_second": 0.866,
"learning_rate": 0.0001,
"step": 7421
},
{
"epoch": 41.43646408839779,
"grad_norm": 0.16666004061698914,
"learning_rate": 0.0001,
"loss": 0.3165,
"step": 7500
},
{
"epoch": 42.0,
"eval_accuracy": 0.2713319458896982,
"eval_f1_macro": 0.5991905306617578,
"eval_f1_micro": 0.8408500229322733,
"eval_loss": 0.2996482849121094,
"eval_roc_auc": 0.8681113305915592,
"eval_runtime": 64.5424,
"eval_samples_per_second": 59.558,
"eval_steps_per_second": 0.945,
"learning_rate": 0.0001,
"step": 7602
},
{
"epoch": 43.0,
"eval_accuracy": 0.2695109261186264,
"eval_f1_macro": 0.6091761352198065,
"eval_f1_micro": 0.8414383822001469,
"eval_loss": 0.29860275983810425,
"eval_roc_auc": 0.8688227620059982,
"eval_runtime": 62.4452,
"eval_samples_per_second": 61.558,
"eval_steps_per_second": 0.977,
"learning_rate": 0.0001,
"step": 7783
},
{
"epoch": 44.0,
"eval_accuracy": 0.2749739854318418,
"eval_f1_macro": 0.595393170760367,
"eval_f1_micro": 0.83963159650068,
"eval_loss": 0.2981945872306824,
"eval_roc_auc": 0.8668373099533652,
"eval_runtime": 63.2599,
"eval_samples_per_second": 60.765,
"eval_steps_per_second": 0.964,
"learning_rate": 0.0001,
"step": 7964
},
{
"epoch": 44.19889502762431,
"grad_norm": 0.15173059701919556,
"learning_rate": 0.0001,
"loss": 0.3138,
"step": 8000
},
{
"epoch": 45.0,
"eval_accuracy": 0.2757544224765869,
"eval_f1_macro": 0.6028068266895004,
"eval_f1_micro": 0.8400674260611943,
"eval_loss": 0.2977070212364197,
"eval_roc_auc": 0.8673735530917092,
"eval_runtime": 63.0465,
"eval_samples_per_second": 60.971,
"eval_steps_per_second": 0.968,
"learning_rate": 0.0001,
"step": 8145
},
{
"epoch": 46.0,
"eval_accuracy": 0.2754942767950052,
"eval_f1_macro": 0.5966152252417688,
"eval_f1_micro": 0.8406257200645179,
"eval_loss": 0.29817572236061096,
"eval_roc_auc": 0.8677318087609064,
"eval_runtime": 63.3542,
"eval_samples_per_second": 60.675,
"eval_steps_per_second": 0.963,
"learning_rate": 0.0001,
"step": 8326
},
{
"epoch": 46.96132596685083,
"grad_norm": 0.19454629719257355,
"learning_rate": 0.0001,
"loss": 0.3125,
"step": 8500
},
{
"epoch": 47.0,
"eval_accuracy": 0.2767950052029136,
"eval_f1_macro": 0.5893483505785619,
"eval_f1_micro": 0.8377610088898078,
"eval_loss": 0.29967373609542847,
"eval_roc_auc": 0.8650328028655897,
"eval_runtime": 66.3004,
"eval_samples_per_second": 57.979,
"eval_steps_per_second": 0.92,
"learning_rate": 0.0001,
"step": 8507
},
{
"epoch": 48.0,
"eval_accuracy": 0.27471383975026015,
"eval_f1_macro": 0.6135295162351735,
"eval_f1_micro": 0.8420492820421911,
"eval_loss": 0.2978098392486572,
"eval_roc_auc": 0.8693551899664994,
"eval_runtime": 64.6705,
"eval_samples_per_second": 59.44,
"eval_steps_per_second": 0.943,
"learning_rate": 0.0001,
"step": 8688
},
{
"epoch": 49.0,
"eval_accuracy": 0.27471383975026015,
"eval_f1_macro": 0.6017406274418097,
"eval_f1_micro": 0.8398618395804016,
"eval_loss": 0.29812732338905334,
"eval_roc_auc": 0.8671341366526011,
"eval_runtime": 62.9848,
"eval_samples_per_second": 61.031,
"eval_steps_per_second": 0.968,
"learning_rate": 0.0001,
"step": 8869
},
{
"epoch": 49.72375690607735,
"grad_norm": 0.1885526031255722,
"learning_rate": 0.0001,
"loss": 0.312,
"step": 9000
},
{
"epoch": 50.0,
"eval_accuracy": 0.2702913631633715,
"eval_f1_macro": 0.6112978412836683,
"eval_f1_micro": 0.8410134158403285,
"eval_loss": 0.29766079783439636,
"eval_roc_auc": 0.8684402374503033,
"eval_runtime": 65.3588,
"eval_samples_per_second": 58.814,
"eval_steps_per_second": 0.933,
"learning_rate": 0.0001,
"step": 9050
},
{
"epoch": 51.0,
"eval_accuracy": 0.2715920915712799,
"eval_f1_macro": 0.6110237777757073,
"eval_f1_micro": 0.8419122264928279,
"eval_loss": 0.30011385679244995,
"eval_roc_auc": 0.8695847405483492,
"eval_runtime": 69.6934,
"eval_samples_per_second": 55.156,
"eval_steps_per_second": 0.875,
"learning_rate": 0.0001,
"step": 9231
},
{
"epoch": 52.0,
"eval_accuracy": 0.27887617065556713,
"eval_f1_macro": 0.6011341201375136,
"eval_f1_micro": 0.8379989163807117,
"eval_loss": 0.29774174094200134,
"eval_roc_auc": 0.8652877966213873,
"eval_runtime": 70.0846,
"eval_samples_per_second": 54.848,
"eval_steps_per_second": 0.87,
"learning_rate": 0.0001,
"step": 9412
},
{
"epoch": 52.48618784530387,
"grad_norm": 0.20562438666820526,
"learning_rate": 0.0001,
"loss": 0.3115,
"step": 9500
},
{
"epoch": 53.0,
"eval_accuracy": 0.27289281997918835,
"eval_f1_macro": 0.6151233978242734,
"eval_f1_micro": 0.8425461059111437,
"eval_loss": 0.2966245412826538,
"eval_roc_auc": 0.8699284348856478,
"eval_runtime": 67.2883,
"eval_samples_per_second": 57.127,
"eval_steps_per_second": 0.907,
"learning_rate": 0.0001,
"step": 9593
},
{
"epoch": 54.0,
"eval_accuracy": 0.2809573361082206,
"eval_f1_macro": 0.5974038753077745,
"eval_f1_micro": 0.839878532091204,
"eval_loss": 0.2976503372192383,
"eval_roc_auc": 0.8669142710106849,
"eval_runtime": 68.333,
"eval_samples_per_second": 56.254,
"eval_steps_per_second": 0.893,
"learning_rate": 0.0001,
"step": 9774
},
{
"epoch": 55.0,
"eval_accuracy": 0.27237252861602496,
"eval_f1_macro": 0.6118791964065535,
"eval_f1_micro": 0.8407133757961783,
"eval_loss": 0.2964698374271393,
"eval_roc_auc": 0.8680032827448569,
"eval_runtime": 66.9844,
"eval_samples_per_second": 57.387,
"eval_steps_per_second": 0.911,
"learning_rate": 0.0001,
"step": 9955
},
{
"epoch": 55.248618784530386,
"grad_norm": 0.16818420588970184,
"learning_rate": 0.0001,
"loss": 0.3105,
"step": 10000
},
{
"epoch": 56.0,
"eval_accuracy": 0.2786160249739854,
"eval_f1_macro": 0.6058471375996606,
"eval_f1_micro": 0.8408194622279129,
"eval_loss": 0.29658928513526917,
"eval_roc_auc": 0.8678807414102527,
"eval_runtime": 69.2914,
"eval_samples_per_second": 55.476,
"eval_steps_per_second": 0.88,
"learning_rate": 0.0001,
"step": 10136
},
{
"epoch": 57.0,
"eval_accuracy": 0.27471383975026015,
"eval_f1_macro": 0.606792526005266,
"eval_f1_micro": 0.8398854864270742,
"eval_loss": 0.29776841402053833,
"eval_roc_auc": 0.8671915469127291,
"eval_runtime": 71.354,
"eval_samples_per_second": 53.872,
"eval_steps_per_second": 0.855,
"learning_rate": 0.0001,
"step": 10317
},
{
"epoch": 58.0,
"eval_accuracy": 0.2721123829344433,
"eval_f1_macro": 0.614564922504406,
"eval_f1_micro": 0.8427313221299859,
"eval_loss": 0.29647430777549744,
"eval_roc_auc": 0.8699460215572541,
"eval_runtime": 68.4,
"eval_samples_per_second": 56.199,
"eval_steps_per_second": 0.892,
"learning_rate": 0.0001,
"step": 10498
},
{
"epoch": 58.011049723756905,
"grad_norm": 0.2239256352186203,
"learning_rate": 0.0001,
"loss": 0.3105,
"step": 10500
},
{
"epoch": 59.0,
"eval_accuracy": 0.2793964620187305,
"eval_f1_macro": 0.6005863231905055,
"eval_f1_micro": 0.8405752518649542,
"eval_loss": 0.2961284816265106,
"eval_roc_auc": 0.8676383900900995,
"eval_runtime": 74.2032,
"eval_samples_per_second": 51.804,
"eval_steps_per_second": 0.822,
"learning_rate": 0.0001,
"step": 10679
},
{
"epoch": 60.0,
"eval_accuracy": 0.2796566077003122,
"eval_f1_macro": 0.6113253981378993,
"eval_f1_micro": 0.8428984624752626,
"eval_loss": 0.29608383774757385,
"eval_roc_auc": 0.8699772214199647,
"eval_runtime": 67.9781,
"eval_samples_per_second": 56.548,
"eval_steps_per_second": 0.897,
"learning_rate": 0.0001,
"step": 10860
},
{
"epoch": 60.773480662983424,
"grad_norm": 0.20308265089988708,
"learning_rate": 0.0001,
"loss": 0.308,
"step": 11000
},
{
"epoch": 61.0,
"eval_accuracy": 0.2796566077003122,
"eval_f1_macro": 0.5998781229497869,
"eval_f1_micro": 0.8415255756768527,
"eval_loss": 0.2963137924671173,
"eval_roc_auc": 0.8684482296406385,
"eval_runtime": 65.3138,
"eval_samples_per_second": 58.854,
"eval_steps_per_second": 0.934,
"learning_rate": 0.0001,
"step": 11041
},
{
"epoch": 62.0,
"eval_accuracy": 0.27887617065556713,
"eval_f1_macro": 0.6017143414363826,
"eval_f1_micro": 0.8405433111225014,
"eval_loss": 0.29606395959854126,
"eval_roc_auc": 0.8676237835288696,
"eval_runtime": 66.6671,
"eval_samples_per_second": 57.66,
"eval_steps_per_second": 0.915,
"learning_rate": 0.0001,
"step": 11222
},
{
"epoch": 63.0,
"eval_accuracy": 0.2736732570239334,
"eval_f1_macro": 0.6107600812503934,
"eval_f1_micro": 0.8421159560149325,
"eval_loss": 0.29548379778862,
"eval_roc_auc": 0.8692919777882774,
"eval_runtime": 65.9628,
"eval_samples_per_second": 58.275,
"eval_steps_per_second": 0.925,
"learning_rate": 0.0001,
"step": 11403
},
{
"epoch": 63.53591160220994,
"grad_norm": 0.22083976864814758,
"learning_rate": 0.0001,
"loss": 0.3083,
"step": 11500
},
{
"epoch": 64.0,
"eval_accuracy": 0.2791363163371488,
"eval_f1_macro": 0.6127112192164306,
"eval_f1_micro": 0.8407360081778686,
"eval_loss": 0.2951599955558777,
"eval_roc_auc": 0.8678962734976112,
"eval_runtime": 67.2265,
"eval_samples_per_second": 57.18,
"eval_steps_per_second": 0.907,
"learning_rate": 0.0001,
"step": 11584
},
{
"epoch": 65.0,
"eval_accuracy": 0.27809573361082207,
"eval_f1_macro": 0.6022403129456154,
"eval_f1_micro": 0.839130323176836,
"eval_loss": 0.29839980602264404,
"eval_roc_auc": 0.8664061119366814,
"eval_runtime": 69.4982,
"eval_samples_per_second": 55.311,
"eval_steps_per_second": 0.878,
"learning_rate": 0.0001,
"step": 11765
},
{
"epoch": 66.0,
"eval_accuracy": 0.2739334027055151,
"eval_f1_macro": 0.6104238923347367,
"eval_f1_micro": 0.841473646741756,
"eval_loss": 0.2956618666648865,
"eval_roc_auc": 0.8687138112478329,
"eval_runtime": 65.4466,
"eval_samples_per_second": 58.735,
"eval_steps_per_second": 0.932,
"learning_rate": 0.0001,
"step": 11946
},
{
"epoch": 66.29834254143647,
"grad_norm": 0.245449036359787,
"learning_rate": 0.0001,
"loss": 0.3051,
"step": 12000
},
{
"epoch": 67.0,
"eval_accuracy": 0.27601456815816855,
"eval_f1_macro": 0.6142232564356239,
"eval_f1_micro": 0.841559361481444,
"eval_loss": 0.29616212844848633,
"eval_roc_auc": 0.868995015401048,
"eval_runtime": 63.2029,
"eval_samples_per_second": 60.82,
"eval_steps_per_second": 0.965,
"learning_rate": 0.0001,
"step": 12127
},
{
"epoch": 68.0,
"eval_accuracy": 0.277315296566077,
"eval_f1_macro": 0.6083667891934388,
"eval_f1_micro": 0.8412997050747482,
"eval_loss": 0.29670244455337524,
"eval_roc_auc": 0.8685716965013714,
"eval_runtime": 62.807,
"eval_samples_per_second": 61.203,
"eval_steps_per_second": 0.971,
"learning_rate": 0.0001,
"step": 12308
},
{
"epoch": 69.0,
"eval_accuracy": 0.27289281997918835,
"eval_f1_macro": 0.6160532530170337,
"eval_f1_micro": 0.8406188667703209,
"eval_loss": 0.29603201150894165,
"eval_roc_auc": 0.8679078999484645,
"eval_runtime": 66.3361,
"eval_samples_per_second": 57.947,
"eval_steps_per_second": 0.92,
"learning_rate": 0.0001,
"step": 12489
},
{
"epoch": 69.06077348066299,
"grad_norm": 0.240888312458992,
"learning_rate": 0.0001,
"loss": 0.3066,
"step": 12500
},
{
"epoch": 70.0,
"eval_accuracy": 0.268210197710718,
"eval_f1_macro": 0.6247633367937411,
"eval_f1_micro": 0.8433517310587054,
"eval_loss": 0.2971898317337036,
"eval_roc_auc": 0.8708423261877216,
"eval_runtime": 65.1371,
"eval_samples_per_second": 59.014,
"eval_steps_per_second": 0.936,
"learning_rate": 0.0001,
"step": 12670
},
{
"epoch": 71.0,
"eval_accuracy": 0.28017689906347554,
"eval_f1_macro": 0.6081264630991828,
"eval_f1_micro": 0.8396197327852004,
"eval_loss": 0.2964514493942261,
"eval_roc_auc": 0.8667672572575927,
"eval_runtime": 66.6789,
"eval_samples_per_second": 57.649,
"eval_steps_per_second": 0.915,
"learning_rate": 1e-05,
"step": 12851
},
{
"epoch": 71.8232044198895,
"grad_norm": 0.2170819193124771,
"learning_rate": 1e-05,
"loss": 0.3061,
"step": 13000
},
{
"epoch": 72.0,
"eval_accuracy": 0.28069719042663893,
"eval_f1_macro": 0.603429563858726,
"eval_f1_micro": 0.8422399017676703,
"eval_loss": 0.29609331488609314,
"eval_roc_auc": 0.8690857705667967,
"eval_runtime": 67.2956,
"eval_samples_per_second": 57.121,
"eval_steps_per_second": 0.906,
"learning_rate": 1e-05,
"step": 13032
},
{
"epoch": 73.0,
"eval_accuracy": 0.2809573361082206,
"eval_f1_macro": 0.6080269607484758,
"eval_f1_micro": 0.8408677611863842,
"eval_loss": 0.2954128384590149,
"eval_roc_auc": 0.867912866799092,
"eval_runtime": 67.8811,
"eval_samples_per_second": 56.628,
"eval_steps_per_second": 0.899,
"learning_rate": 1e-05,
"step": 13213
},
{
"epoch": 74.0,
"eval_accuracy": 0.2809573361082206,
"eval_f1_macro": 0.6093453328873296,
"eval_f1_micro": 0.8411459000025601,
"eval_loss": 0.29537200927734375,
"eval_roc_auc": 0.8681591237557506,
"eval_runtime": 65.6885,
"eval_samples_per_second": 58.519,
"eval_steps_per_second": 0.929,
"learning_rate": 1e-05,
"step": 13394
},
{
"epoch": 74.0,
"learning_rate": 1e-05,
"step": 13394,
"total_flos": 1.9211796047151268e+19,
"train_loss": 0.3220266872187844,
"train_runtime": 23053.7141,
"train_samples_per_second": 49.979,
"train_steps_per_second": 0.785
}
],
"logging_steps": 500,
"max_steps": 18100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.9211796047151268e+19,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}