|
{ |
|
"best_metric": 0.2951599955558777, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/fine_scale/drone_DinoVdeau-large-2024_09_18-batch-size64_epochs100_freeze/checkpoint-11584", |
|
"epoch": 74.0, |
|
"eval_steps": 500, |
|
"global_step": 13394, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.240894901144641, |
|
"eval_f1_macro": 0.5745037518359608, |
|
"eval_f1_micro": 0.8203088441740758, |
|
"eval_loss": 0.33405476808547974, |
|
"eval_roc_auc": 0.8507572583590943, |
|
"eval_runtime": 70.5588, |
|
"eval_samples_per_second": 54.479, |
|
"eval_steps_per_second": 0.865, |
|
"learning_rate": 0.001, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.24011446409989595, |
|
"eval_f1_macro": 0.5964117272843149, |
|
"eval_f1_micro": 0.8326538829332126, |
|
"eval_loss": 0.31857866048812866, |
|
"eval_roc_auc": 0.861811323650532, |
|
"eval_runtime": 79.7549, |
|
"eval_samples_per_second": 48.198, |
|
"eval_steps_per_second": 0.765, |
|
"learning_rate": 0.001, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.7624309392265194, |
|
"grad_norm": 0.4326592683792114, |
|
"learning_rate": 0.001, |
|
"loss": 0.41, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.2515608740894901, |
|
"eval_f1_macro": 0.5896466586231784, |
|
"eval_f1_micro": 0.8322937087863127, |
|
"eval_loss": 0.3189840316772461, |
|
"eval_roc_auc": 0.8611107288488111, |
|
"eval_runtime": 73.8912, |
|
"eval_samples_per_second": 52.022, |
|
"eval_steps_per_second": 0.826, |
|
"learning_rate": 0.001, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2572840790842872, |
|
"eval_f1_macro": 0.5800422752857416, |
|
"eval_f1_micro": 0.8322264673453166, |
|
"eval_loss": 0.31490686535835266, |
|
"eval_roc_auc": 0.8608139926082374, |
|
"eval_runtime": 74.4071, |
|
"eval_samples_per_second": 51.662, |
|
"eval_steps_per_second": 0.82, |
|
"learning_rate": 0.001, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.25260145681581686, |
|
"eval_f1_macro": 0.5992015270576083, |
|
"eval_f1_micro": 0.831389835721059, |
|
"eval_loss": 0.3153345584869385, |
|
"eval_roc_auc": 0.860148186700414, |
|
"eval_runtime": 75.6696, |
|
"eval_samples_per_second": 50.8, |
|
"eval_steps_per_second": 0.806, |
|
"learning_rate": 0.001, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 5.524861878453039, |
|
"grad_norm": 0.3112102448940277, |
|
"learning_rate": 0.001, |
|
"loss": 0.3412, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.2505202913631634, |
|
"eval_f1_macro": 0.6058921391497434, |
|
"eval_f1_micro": 0.8368403933244926, |
|
"eval_loss": 0.3146817088127136, |
|
"eval_roc_auc": 0.8655139600288801, |
|
"eval_runtime": 71.4939, |
|
"eval_samples_per_second": 53.767, |
|
"eval_steps_per_second": 0.853, |
|
"learning_rate": 0.001, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.2643080124869927, |
|
"eval_f1_macro": 0.5671643866070547, |
|
"eval_f1_micro": 0.8297352130260625, |
|
"eval_loss": 0.31298699975013733, |
|
"eval_roc_auc": 0.858329231354068, |
|
"eval_runtime": 72.5707, |
|
"eval_samples_per_second": 52.969, |
|
"eval_steps_per_second": 0.841, |
|
"learning_rate": 0.001, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.2643080124869927, |
|
"eval_f1_macro": 0.5794085884877922, |
|
"eval_f1_micro": 0.8304935064935066, |
|
"eval_loss": 0.31266748905181885, |
|
"eval_roc_auc": 0.8589842911078356, |
|
"eval_runtime": 68.868, |
|
"eval_samples_per_second": 55.817, |
|
"eval_steps_per_second": 0.886, |
|
"learning_rate": 0.001, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 8.287292817679559, |
|
"grad_norm": 0.2548049986362457, |
|
"learning_rate": 0.001, |
|
"loss": 0.3338, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.2700312174817898, |
|
"eval_f1_macro": 0.5719440884615888, |
|
"eval_f1_micro": 0.8326600372902424, |
|
"eval_loss": 0.31311556696891785, |
|
"eval_roc_auc": 0.8608317149678372, |
|
"eval_runtime": 68.5911, |
|
"eval_samples_per_second": 56.042, |
|
"eval_steps_per_second": 0.889, |
|
"learning_rate": 0.001, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2518210197710718, |
|
"eval_f1_macro": 0.5897148649683502, |
|
"eval_f1_micro": 0.8355268182165314, |
|
"eval_loss": 0.3096640110015869, |
|
"eval_roc_auc": 0.8637739072507734, |
|
"eval_runtime": 68.7801, |
|
"eval_samples_per_second": 55.888, |
|
"eval_steps_per_second": 0.887, |
|
"learning_rate": 0.001, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.27419354838709675, |
|
"eval_f1_macro": 0.5735365394193856, |
|
"eval_f1_micro": 0.8332294264339152, |
|
"eval_loss": 0.31233683228492737, |
|
"eval_roc_auc": 0.8611851168597349, |
|
"eval_runtime": 66.8379, |
|
"eval_samples_per_second": 57.512, |
|
"eval_steps_per_second": 0.913, |
|
"learning_rate": 0.001, |
|
"step": 1991 |
|
}, |
|
{ |
|
"epoch": 11.049723756906078, |
|
"grad_norm": 0.24715092778205872, |
|
"learning_rate": 0.001, |
|
"loss": 0.3303, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.27055150884495316, |
|
"eval_f1_macro": 0.5805560421457799, |
|
"eval_f1_micro": 0.8331433309151208, |
|
"eval_loss": 0.3085036873817444, |
|
"eval_roc_auc": 0.8612045319689331, |
|
"eval_runtime": 67.4806, |
|
"eval_samples_per_second": 56.965, |
|
"eval_steps_per_second": 0.904, |
|
"learning_rate": 0.001, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2645681581685744, |
|
"eval_f1_macro": 0.5951616619649476, |
|
"eval_f1_micro": 0.8347951336196919, |
|
"eval_loss": 0.3079213500022888, |
|
"eval_roc_auc": 0.8627997612661887, |
|
"eval_runtime": 64.7469, |
|
"eval_samples_per_second": 59.37, |
|
"eval_steps_per_second": 0.942, |
|
"learning_rate": 0.001, |
|
"step": 2353 |
|
}, |
|
{ |
|
"epoch": 13.812154696132596, |
|
"grad_norm": 0.2406352013349533, |
|
"learning_rate": 0.001, |
|
"loss": 0.3278, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.2533818938605619, |
|
"eval_f1_macro": 0.5969453701098729, |
|
"eval_f1_micro": 0.8339762460663892, |
|
"eval_loss": 0.31647807359695435, |
|
"eval_roc_auc": 0.8626087016827515, |
|
"eval_runtime": 65.4843, |
|
"eval_samples_per_second": 58.701, |
|
"eval_steps_per_second": 0.932, |
|
"learning_rate": 0.001, |
|
"step": 2534 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.27601456815816855, |
|
"eval_f1_macro": 0.5790027839889411, |
|
"eval_f1_micro": 0.8351292272084352, |
|
"eval_loss": 0.3074161410331726, |
|
"eval_roc_auc": 0.8630771728562265, |
|
"eval_runtime": 64.7162, |
|
"eval_samples_per_second": 59.398, |
|
"eval_steps_per_second": 0.943, |
|
"learning_rate": 0.001, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.26352757544224764, |
|
"eval_f1_macro": 0.5888561594424128, |
|
"eval_f1_micro": 0.8355780022446689, |
|
"eval_loss": 0.3094619810581207, |
|
"eval_roc_auc": 0.8637418270912655, |
|
"eval_runtime": 70.1431, |
|
"eval_samples_per_second": 54.802, |
|
"eval_steps_per_second": 0.87, |
|
"learning_rate": 0.001, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 16.574585635359117, |
|
"grad_norm": 0.1948590725660324, |
|
"learning_rate": 0.001, |
|
"loss": 0.3273, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.24739854318418314, |
|
"eval_f1_macro": 0.6137563857640569, |
|
"eval_f1_micro": 0.8395418326693228, |
|
"eval_loss": 0.31031784415245056, |
|
"eval_roc_auc": 0.8679706824480403, |
|
"eval_runtime": 71.0586, |
|
"eval_samples_per_second": 54.096, |
|
"eval_steps_per_second": 0.858, |
|
"learning_rate": 0.001, |
|
"step": 3077 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.2762747138397503, |
|
"eval_f1_macro": 0.5714548681788686, |
|
"eval_f1_micro": 0.8334075140725227, |
|
"eval_loss": 0.3063325881958008, |
|
"eval_roc_auc": 0.8610597084880822, |
|
"eval_runtime": 70.7944, |
|
"eval_samples_per_second": 54.298, |
|
"eval_steps_per_second": 0.862, |
|
"learning_rate": 0.001, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.26560874089490116, |
|
"eval_f1_macro": 0.5919936165947856, |
|
"eval_f1_micro": 0.8336647470495572, |
|
"eval_loss": 0.3109950125217438, |
|
"eval_roc_auc": 0.8617398269664828, |
|
"eval_runtime": 71.2213, |
|
"eval_samples_per_second": 53.973, |
|
"eval_steps_per_second": 0.856, |
|
"learning_rate": 0.001, |
|
"step": 3439 |
|
}, |
|
{ |
|
"epoch": 19.337016574585636, |
|
"grad_norm": 0.19638165831565857, |
|
"learning_rate": 0.001, |
|
"loss": 0.324, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.2596253902185224, |
|
"eval_f1_macro": 0.5983991279635611, |
|
"eval_f1_micro": 0.8375498995652063, |
|
"eval_loss": 0.3072282373905182, |
|
"eval_roc_auc": 0.8654821738600246, |
|
"eval_runtime": 69.3213, |
|
"eval_samples_per_second": 55.452, |
|
"eval_steps_per_second": 0.88, |
|
"learning_rate": 0.001, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.2559833506763788, |
|
"eval_f1_macro": 0.6089994809825815, |
|
"eval_f1_micro": 0.8388680190333083, |
|
"eval_loss": 0.30741065740585327, |
|
"eval_roc_auc": 0.8671726276510136, |
|
"eval_runtime": 71.9913, |
|
"eval_samples_per_second": 53.395, |
|
"eval_steps_per_second": 0.847, |
|
"learning_rate": 0.001, |
|
"step": 3801 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.26560874089490116, |
|
"eval_f1_macro": 0.580833002136296, |
|
"eval_f1_micro": 0.8355413783345766, |
|
"eval_loss": 0.3070025146007538, |
|
"eval_roc_auc": 0.8634013163963336, |
|
"eval_runtime": 70.2825, |
|
"eval_samples_per_second": 54.694, |
|
"eval_steps_per_second": 0.868, |
|
"learning_rate": 0.001, |
|
"step": 3982 |
|
}, |
|
{ |
|
"epoch": 22.099447513812155, |
|
"grad_norm": 0.21378174424171448, |
|
"learning_rate": 0.001, |
|
"loss": 0.3263, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2627471383975026, |
|
"eval_f1_macro": 0.6160376071910248, |
|
"eval_f1_micro": 0.8388706831358709, |
|
"eval_loss": 0.3077291250228882, |
|
"eval_roc_auc": 0.8669128705765787, |
|
"eval_runtime": 71.2675, |
|
"eval_samples_per_second": 53.938, |
|
"eval_steps_per_second": 0.856, |
|
"learning_rate": 0.001, |
|
"step": 4163 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.26560874089490116, |
|
"eval_f1_macro": 0.5881215193388629, |
|
"eval_f1_micro": 0.8362589632218766, |
|
"eval_loss": 0.30613505840301514, |
|
"eval_roc_auc": 0.8640193969839625, |
|
"eval_runtime": 71.2696, |
|
"eval_samples_per_second": 53.936, |
|
"eval_steps_per_second": 0.856, |
|
"learning_rate": 0.001, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 24.861878453038674, |
|
"grad_norm": 0.1702233850955963, |
|
"learning_rate": 0.001, |
|
"loss": 0.3244, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.2663891779396462, |
|
"eval_f1_macro": 0.6102297364067529, |
|
"eval_f1_micro": 0.8402073587052726, |
|
"eval_loss": 0.3043115735054016, |
|
"eval_roc_auc": 0.8678931350845757, |
|
"eval_runtime": 75.5305, |
|
"eval_samples_per_second": 50.893, |
|
"eval_steps_per_second": 0.808, |
|
"learning_rate": 0.001, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.26586888657648283, |
|
"eval_f1_macro": 0.5805627478330566, |
|
"eval_f1_micro": 0.8326726046439789, |
|
"eval_loss": 0.31102412939071655, |
|
"eval_roc_auc": 0.8610437919514087, |
|
"eval_runtime": 75.2782, |
|
"eval_samples_per_second": 51.064, |
|
"eval_steps_per_second": 0.81, |
|
"learning_rate": 0.001, |
|
"step": 4706 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2713319458896982, |
|
"eval_f1_macro": 0.585029632368333, |
|
"eval_f1_micro": 0.8379808306709265, |
|
"eval_loss": 0.30523133277893066, |
|
"eval_roc_auc": 0.8656351026045385, |
|
"eval_runtime": 73.6801, |
|
"eval_samples_per_second": 52.171, |
|
"eval_steps_per_second": 0.828, |
|
"learning_rate": 0.001, |
|
"step": 4887 |
|
}, |
|
{ |
|
"epoch": 27.624309392265193, |
|
"grad_norm": 0.15147489309310913, |
|
"learning_rate": 0.001, |
|
"loss": 0.3257, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.2663891779396462, |
|
"eval_f1_macro": 0.5973785723470736, |
|
"eval_f1_micro": 0.8397143725311454, |
|
"eval_loss": 0.3030068874359131, |
|
"eval_roc_auc": 0.8674308050491781, |
|
"eval_runtime": 74.1975, |
|
"eval_samples_per_second": 51.808, |
|
"eval_steps_per_second": 0.822, |
|
"learning_rate": 0.001, |
|
"step": 5068 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2666493236212279, |
|
"eval_f1_macro": 0.5901889833654033, |
|
"eval_f1_micro": 0.8362408553742262, |
|
"eval_loss": 0.30669936537742615, |
|
"eval_roc_auc": 0.8641808816102908, |
|
"eval_runtime": 71.9481, |
|
"eval_samples_per_second": 53.427, |
|
"eval_steps_per_second": 0.848, |
|
"learning_rate": 0.001, |
|
"step": 5249 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.26352757544224764, |
|
"eval_f1_macro": 0.5923557708061385, |
|
"eval_f1_micro": 0.83628025477707, |
|
"eval_loss": 0.3061116933822632, |
|
"eval_roc_auc": 0.8643618491898142, |
|
"eval_runtime": 77.3574, |
|
"eval_samples_per_second": 49.691, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 0.001, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 30.386740331491712, |
|
"grad_norm": 0.1558382362127304, |
|
"learning_rate": 0.001, |
|
"loss": 0.3243, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.2708116545265349, |
|
"eval_f1_macro": 0.5866580729382617, |
|
"eval_f1_micro": 0.8373469177729752, |
|
"eval_loss": 0.30276864767074585, |
|
"eval_roc_auc": 0.8648652901745116, |
|
"eval_runtime": 67.6643, |
|
"eval_samples_per_second": 56.81, |
|
"eval_steps_per_second": 0.902, |
|
"learning_rate": 0.001, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.25676378772112385, |
|
"eval_f1_macro": 0.60937867416881, |
|
"eval_f1_micro": 0.8387781935875391, |
|
"eval_loss": 0.30601420998573303, |
|
"eval_roc_auc": 0.8667133907993204, |
|
"eval_runtime": 71.6477, |
|
"eval_samples_per_second": 53.651, |
|
"eval_steps_per_second": 0.851, |
|
"learning_rate": 0.001, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.26508844953173777, |
|
"eval_f1_macro": 0.586599207562497, |
|
"eval_f1_micro": 0.8341955642063208, |
|
"eval_loss": 0.30689385533332825, |
|
"eval_roc_auc": 0.8624638555872349, |
|
"eval_runtime": 74.1949, |
|
"eval_samples_per_second": 51.809, |
|
"eval_steps_per_second": 0.822, |
|
"learning_rate": 0.001, |
|
"step": 5973 |
|
}, |
|
{ |
|
"epoch": 33.149171270718234, |
|
"grad_norm": 0.1734226495027542, |
|
"learning_rate": 0.001, |
|
"loss": 0.3257, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.2663891779396462, |
|
"eval_f1_macro": 0.5901389994189593, |
|
"eval_f1_micro": 0.8362871579163353, |
|
"eval_loss": 0.3069196939468384, |
|
"eval_roc_auc": 0.8641186401875288, |
|
"eval_runtime": 74.4957, |
|
"eval_samples_per_second": 51.6, |
|
"eval_steps_per_second": 0.819, |
|
"learning_rate": 0.001, |
|
"step": 6154 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.2627471383975026, |
|
"eval_f1_macro": 0.6009387049254514, |
|
"eval_f1_micro": 0.8379563532531105, |
|
"eval_loss": 0.30412742495536804, |
|
"eval_roc_auc": 0.8657129213439902, |
|
"eval_runtime": 67.827, |
|
"eval_samples_per_second": 56.674, |
|
"eval_steps_per_second": 0.899, |
|
"learning_rate": 0.001, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 35.91160220994475, |
|
"grad_norm": 0.14143767952919006, |
|
"learning_rate": 0.001, |
|
"loss": 0.324, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.2661290322580645, |
|
"eval_f1_macro": 0.5947414485755244, |
|
"eval_f1_micro": 0.8362663373469179, |
|
"eval_loss": 0.30450889468193054, |
|
"eval_roc_auc": 0.8639863234543288, |
|
"eval_runtime": 72.0072, |
|
"eval_samples_per_second": 53.384, |
|
"eval_steps_per_second": 0.847, |
|
"learning_rate": 0.001, |
|
"step": 6516 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.27601456815816855, |
|
"eval_f1_macro": 0.599485078190572, |
|
"eval_f1_micro": 0.8396123222507695, |
|
"eval_loss": 0.30367332696914673, |
|
"eval_roc_auc": 0.8671612725761473, |
|
"eval_runtime": 69.1194, |
|
"eval_samples_per_second": 55.614, |
|
"eval_steps_per_second": 0.883, |
|
"learning_rate": 0.001, |
|
"step": 6697 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.2736732570239334, |
|
"eval_f1_macro": 0.5859770918782524, |
|
"eval_f1_micro": 0.838785705136416, |
|
"eval_loss": 0.3015189468860626, |
|
"eval_roc_auc": 0.8662036068197154, |
|
"eval_runtime": 67.6585, |
|
"eval_samples_per_second": 56.815, |
|
"eval_steps_per_second": 0.902, |
|
"learning_rate": 0.0001, |
|
"step": 6878 |
|
}, |
|
{ |
|
"epoch": 38.67403314917127, |
|
"grad_norm": 0.17121317982673645, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3203, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.2736732570239334, |
|
"eval_f1_macro": 0.5995341133824836, |
|
"eval_f1_micro": 0.838145545925432, |
|
"eval_loss": 0.30049240589141846, |
|
"eval_roc_auc": 0.8656439298623401, |
|
"eval_runtime": 68.079, |
|
"eval_samples_per_second": 56.464, |
|
"eval_steps_per_second": 0.896, |
|
"learning_rate": 0.0001, |
|
"step": 7059 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.2695109261186264, |
|
"eval_f1_macro": 0.6125564265199016, |
|
"eval_f1_micro": 0.841743177276981, |
|
"eval_loss": 0.30099743604660034, |
|
"eval_roc_auc": 0.8691556197305788, |
|
"eval_runtime": 66.33, |
|
"eval_samples_per_second": 57.953, |
|
"eval_steps_per_second": 0.92, |
|
"learning_rate": 0.0001, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.27419354838709675, |
|
"eval_f1_macro": 0.6073049013268815, |
|
"eval_f1_micro": 0.8403053435114504, |
|
"eval_loss": 0.29900264739990234, |
|
"eval_roc_auc": 0.8677200013927284, |
|
"eval_runtime": 70.4045, |
|
"eval_samples_per_second": 54.599, |
|
"eval_steps_per_second": 0.866, |
|
"learning_rate": 0.0001, |
|
"step": 7421 |
|
}, |
|
{ |
|
"epoch": 41.43646408839779, |
|
"grad_norm": 0.16666004061698914, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3165, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.2713319458896982, |
|
"eval_f1_macro": 0.5991905306617578, |
|
"eval_f1_micro": 0.8408500229322733, |
|
"eval_loss": 0.2996482849121094, |
|
"eval_roc_auc": 0.8681113305915592, |
|
"eval_runtime": 64.5424, |
|
"eval_samples_per_second": 59.558, |
|
"eval_steps_per_second": 0.945, |
|
"learning_rate": 0.0001, |
|
"step": 7602 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.2695109261186264, |
|
"eval_f1_macro": 0.6091761352198065, |
|
"eval_f1_micro": 0.8414383822001469, |
|
"eval_loss": 0.29860275983810425, |
|
"eval_roc_auc": 0.8688227620059982, |
|
"eval_runtime": 62.4452, |
|
"eval_samples_per_second": 61.558, |
|
"eval_steps_per_second": 0.977, |
|
"learning_rate": 0.0001, |
|
"step": 7783 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.2749739854318418, |
|
"eval_f1_macro": 0.595393170760367, |
|
"eval_f1_micro": 0.83963159650068, |
|
"eval_loss": 0.2981945872306824, |
|
"eval_roc_auc": 0.8668373099533652, |
|
"eval_runtime": 63.2599, |
|
"eval_samples_per_second": 60.765, |
|
"eval_steps_per_second": 0.964, |
|
"learning_rate": 0.0001, |
|
"step": 7964 |
|
}, |
|
{ |
|
"epoch": 44.19889502762431, |
|
"grad_norm": 0.15173059701919556, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3138, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.2757544224765869, |
|
"eval_f1_macro": 0.6028068266895004, |
|
"eval_f1_micro": 0.8400674260611943, |
|
"eval_loss": 0.2977070212364197, |
|
"eval_roc_auc": 0.8673735530917092, |
|
"eval_runtime": 63.0465, |
|
"eval_samples_per_second": 60.971, |
|
"eval_steps_per_second": 0.968, |
|
"learning_rate": 0.0001, |
|
"step": 8145 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.2754942767950052, |
|
"eval_f1_macro": 0.5966152252417688, |
|
"eval_f1_micro": 0.8406257200645179, |
|
"eval_loss": 0.29817572236061096, |
|
"eval_roc_auc": 0.8677318087609064, |
|
"eval_runtime": 63.3542, |
|
"eval_samples_per_second": 60.675, |
|
"eval_steps_per_second": 0.963, |
|
"learning_rate": 0.0001, |
|
"step": 8326 |
|
}, |
|
{ |
|
"epoch": 46.96132596685083, |
|
"grad_norm": 0.19454629719257355, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3125, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2767950052029136, |
|
"eval_f1_macro": 0.5893483505785619, |
|
"eval_f1_micro": 0.8377610088898078, |
|
"eval_loss": 0.29967373609542847, |
|
"eval_roc_auc": 0.8650328028655897, |
|
"eval_runtime": 66.3004, |
|
"eval_samples_per_second": 57.979, |
|
"eval_steps_per_second": 0.92, |
|
"learning_rate": 0.0001, |
|
"step": 8507 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.27471383975026015, |
|
"eval_f1_macro": 0.6135295162351735, |
|
"eval_f1_micro": 0.8420492820421911, |
|
"eval_loss": 0.2978098392486572, |
|
"eval_roc_auc": 0.8693551899664994, |
|
"eval_runtime": 64.6705, |
|
"eval_samples_per_second": 59.44, |
|
"eval_steps_per_second": 0.943, |
|
"learning_rate": 0.0001, |
|
"step": 8688 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.27471383975026015, |
|
"eval_f1_macro": 0.6017406274418097, |
|
"eval_f1_micro": 0.8398618395804016, |
|
"eval_loss": 0.29812732338905334, |
|
"eval_roc_auc": 0.8671341366526011, |
|
"eval_runtime": 62.9848, |
|
"eval_samples_per_second": 61.031, |
|
"eval_steps_per_second": 0.968, |
|
"learning_rate": 0.0001, |
|
"step": 8869 |
|
}, |
|
{ |
|
"epoch": 49.72375690607735, |
|
"grad_norm": 0.1885526031255722, |
|
"learning_rate": 0.0001, |
|
"loss": 0.312, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2702913631633715, |
|
"eval_f1_macro": 0.6112978412836683, |
|
"eval_f1_micro": 0.8410134158403285, |
|
"eval_loss": 0.29766079783439636, |
|
"eval_roc_auc": 0.8684402374503033, |
|
"eval_runtime": 65.3588, |
|
"eval_samples_per_second": 58.814, |
|
"eval_steps_per_second": 0.933, |
|
"learning_rate": 0.0001, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.2715920915712799, |
|
"eval_f1_macro": 0.6110237777757073, |
|
"eval_f1_micro": 0.8419122264928279, |
|
"eval_loss": 0.30011385679244995, |
|
"eval_roc_auc": 0.8695847405483492, |
|
"eval_runtime": 69.6934, |
|
"eval_samples_per_second": 55.156, |
|
"eval_steps_per_second": 0.875, |
|
"learning_rate": 0.0001, |
|
"step": 9231 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.27887617065556713, |
|
"eval_f1_macro": 0.6011341201375136, |
|
"eval_f1_micro": 0.8379989163807117, |
|
"eval_loss": 0.29774174094200134, |
|
"eval_roc_auc": 0.8652877966213873, |
|
"eval_runtime": 70.0846, |
|
"eval_samples_per_second": 54.848, |
|
"eval_steps_per_second": 0.87, |
|
"learning_rate": 0.0001, |
|
"step": 9412 |
|
}, |
|
{ |
|
"epoch": 52.48618784530387, |
|
"grad_norm": 0.20562438666820526, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3115, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.27289281997918835, |
|
"eval_f1_macro": 0.6151233978242734, |
|
"eval_f1_micro": 0.8425461059111437, |
|
"eval_loss": 0.2966245412826538, |
|
"eval_roc_auc": 0.8699284348856478, |
|
"eval_runtime": 67.2883, |
|
"eval_samples_per_second": 57.127, |
|
"eval_steps_per_second": 0.907, |
|
"learning_rate": 0.0001, |
|
"step": 9593 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.2809573361082206, |
|
"eval_f1_macro": 0.5974038753077745, |
|
"eval_f1_micro": 0.839878532091204, |
|
"eval_loss": 0.2976503372192383, |
|
"eval_roc_auc": 0.8669142710106849, |
|
"eval_runtime": 68.333, |
|
"eval_samples_per_second": 56.254, |
|
"eval_steps_per_second": 0.893, |
|
"learning_rate": 0.0001, |
|
"step": 9774 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.27237252861602496, |
|
"eval_f1_macro": 0.6118791964065535, |
|
"eval_f1_micro": 0.8407133757961783, |
|
"eval_loss": 0.2964698374271393, |
|
"eval_roc_auc": 0.8680032827448569, |
|
"eval_runtime": 66.9844, |
|
"eval_samples_per_second": 57.387, |
|
"eval_steps_per_second": 0.911, |
|
"learning_rate": 0.0001, |
|
"step": 9955 |
|
}, |
|
{ |
|
"epoch": 55.248618784530386, |
|
"grad_norm": 0.16818420588970184, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3105, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.2786160249739854, |
|
"eval_f1_macro": 0.6058471375996606, |
|
"eval_f1_micro": 0.8408194622279129, |
|
"eval_loss": 0.29658928513526917, |
|
"eval_roc_auc": 0.8678807414102527, |
|
"eval_runtime": 69.2914, |
|
"eval_samples_per_second": 55.476, |
|
"eval_steps_per_second": 0.88, |
|
"learning_rate": 0.0001, |
|
"step": 10136 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.27471383975026015, |
|
"eval_f1_macro": 0.606792526005266, |
|
"eval_f1_micro": 0.8398854864270742, |
|
"eval_loss": 0.29776841402053833, |
|
"eval_roc_auc": 0.8671915469127291, |
|
"eval_runtime": 71.354, |
|
"eval_samples_per_second": 53.872, |
|
"eval_steps_per_second": 0.855, |
|
"learning_rate": 0.0001, |
|
"step": 10317 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.2721123829344433, |
|
"eval_f1_macro": 0.614564922504406, |
|
"eval_f1_micro": 0.8427313221299859, |
|
"eval_loss": 0.29647430777549744, |
|
"eval_roc_auc": 0.8699460215572541, |
|
"eval_runtime": 68.4, |
|
"eval_samples_per_second": 56.199, |
|
"eval_steps_per_second": 0.892, |
|
"learning_rate": 0.0001, |
|
"step": 10498 |
|
}, |
|
{ |
|
"epoch": 58.011049723756905, |
|
"grad_norm": 0.2239256352186203, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3105, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.2793964620187305, |
|
"eval_f1_macro": 0.6005863231905055, |
|
"eval_f1_micro": 0.8405752518649542, |
|
"eval_loss": 0.2961284816265106, |
|
"eval_roc_auc": 0.8676383900900995, |
|
"eval_runtime": 74.2032, |
|
"eval_samples_per_second": 51.804, |
|
"eval_steps_per_second": 0.822, |
|
"learning_rate": 0.0001, |
|
"step": 10679 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.2796566077003122, |
|
"eval_f1_macro": 0.6113253981378993, |
|
"eval_f1_micro": 0.8428984624752626, |
|
"eval_loss": 0.29608383774757385, |
|
"eval_roc_auc": 0.8699772214199647, |
|
"eval_runtime": 67.9781, |
|
"eval_samples_per_second": 56.548, |
|
"eval_steps_per_second": 0.897, |
|
"learning_rate": 0.0001, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 60.773480662983424, |
|
"grad_norm": 0.20308265089988708, |
|
"learning_rate": 0.0001, |
|
"loss": 0.308, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.2796566077003122, |
|
"eval_f1_macro": 0.5998781229497869, |
|
"eval_f1_micro": 0.8415255756768527, |
|
"eval_loss": 0.2963137924671173, |
|
"eval_roc_auc": 0.8684482296406385, |
|
"eval_runtime": 65.3138, |
|
"eval_samples_per_second": 58.854, |
|
"eval_steps_per_second": 0.934, |
|
"learning_rate": 0.0001, |
|
"step": 11041 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.27887617065556713, |
|
"eval_f1_macro": 0.6017143414363826, |
|
"eval_f1_micro": 0.8405433111225014, |
|
"eval_loss": 0.29606395959854126, |
|
"eval_roc_auc": 0.8676237835288696, |
|
"eval_runtime": 66.6671, |
|
"eval_samples_per_second": 57.66, |
|
"eval_steps_per_second": 0.915, |
|
"learning_rate": 0.0001, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.2736732570239334, |
|
"eval_f1_macro": 0.6107600812503934, |
|
"eval_f1_micro": 0.8421159560149325, |
|
"eval_loss": 0.29548379778862, |
|
"eval_roc_auc": 0.8692919777882774, |
|
"eval_runtime": 65.9628, |
|
"eval_samples_per_second": 58.275, |
|
"eval_steps_per_second": 0.925, |
|
"learning_rate": 0.0001, |
|
"step": 11403 |
|
}, |
|
{ |
|
"epoch": 63.53591160220994, |
|
"grad_norm": 0.22083976864814758, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3083, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.2791363163371488, |
|
"eval_f1_macro": 0.6127112192164306, |
|
"eval_f1_micro": 0.8407360081778686, |
|
"eval_loss": 0.2951599955558777, |
|
"eval_roc_auc": 0.8678962734976112, |
|
"eval_runtime": 67.2265, |
|
"eval_samples_per_second": 57.18, |
|
"eval_steps_per_second": 0.907, |
|
"learning_rate": 0.0001, |
|
"step": 11584 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.27809573361082207, |
|
"eval_f1_macro": 0.6022403129456154, |
|
"eval_f1_micro": 0.839130323176836, |
|
"eval_loss": 0.29839980602264404, |
|
"eval_roc_auc": 0.8664061119366814, |
|
"eval_runtime": 69.4982, |
|
"eval_samples_per_second": 55.311, |
|
"eval_steps_per_second": 0.878, |
|
"learning_rate": 0.0001, |
|
"step": 11765 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.2739334027055151, |
|
"eval_f1_macro": 0.6104238923347367, |
|
"eval_f1_micro": 0.841473646741756, |
|
"eval_loss": 0.2956618666648865, |
|
"eval_roc_auc": 0.8687138112478329, |
|
"eval_runtime": 65.4466, |
|
"eval_samples_per_second": 58.735, |
|
"eval_steps_per_second": 0.932, |
|
"learning_rate": 0.0001, |
|
"step": 11946 |
|
}, |
|
{ |
|
"epoch": 66.29834254143647, |
|
"grad_norm": 0.245449036359787, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3051, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.27601456815816855, |
|
"eval_f1_macro": 0.6142232564356239, |
|
"eval_f1_micro": 0.841559361481444, |
|
"eval_loss": 0.29616212844848633, |
|
"eval_roc_auc": 0.868995015401048, |
|
"eval_runtime": 63.2029, |
|
"eval_samples_per_second": 60.82, |
|
"eval_steps_per_second": 0.965, |
|
"learning_rate": 0.0001, |
|
"step": 12127 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.277315296566077, |
|
"eval_f1_macro": 0.6083667891934388, |
|
"eval_f1_micro": 0.8412997050747482, |
|
"eval_loss": 0.29670244455337524, |
|
"eval_roc_auc": 0.8685716965013714, |
|
"eval_runtime": 62.807, |
|
"eval_samples_per_second": 61.203, |
|
"eval_steps_per_second": 0.971, |
|
"learning_rate": 0.0001, |
|
"step": 12308 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.27289281997918835, |
|
"eval_f1_macro": 0.6160532530170337, |
|
"eval_f1_micro": 0.8406188667703209, |
|
"eval_loss": 0.29603201150894165, |
|
"eval_roc_auc": 0.8679078999484645, |
|
"eval_runtime": 66.3361, |
|
"eval_samples_per_second": 57.947, |
|
"eval_steps_per_second": 0.92, |
|
"learning_rate": 0.0001, |
|
"step": 12489 |
|
}, |
|
{ |
|
"epoch": 69.06077348066299, |
|
"grad_norm": 0.240888312458992, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3066, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.268210197710718, |
|
"eval_f1_macro": 0.6247633367937411, |
|
"eval_f1_micro": 0.8433517310587054, |
|
"eval_loss": 0.2971898317337036, |
|
"eval_roc_auc": 0.8708423261877216, |
|
"eval_runtime": 65.1371, |
|
"eval_samples_per_second": 59.014, |
|
"eval_steps_per_second": 0.936, |
|
"learning_rate": 0.0001, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.28017689906347554, |
|
"eval_f1_macro": 0.6081264630991828, |
|
"eval_f1_micro": 0.8396197327852004, |
|
"eval_loss": 0.2964514493942261, |
|
"eval_roc_auc": 0.8667672572575927, |
|
"eval_runtime": 66.6789, |
|
"eval_samples_per_second": 57.649, |
|
"eval_steps_per_second": 0.915, |
|
"learning_rate": 1e-05, |
|
"step": 12851 |
|
}, |
|
{ |
|
"epoch": 71.8232044198895, |
|
"grad_norm": 0.2170819193124771, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3061, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.28069719042663893, |
|
"eval_f1_macro": 0.603429563858726, |
|
"eval_f1_micro": 0.8422399017676703, |
|
"eval_loss": 0.29609331488609314, |
|
"eval_roc_auc": 0.8690857705667967, |
|
"eval_runtime": 67.2956, |
|
"eval_samples_per_second": 57.121, |
|
"eval_steps_per_second": 0.906, |
|
"learning_rate": 1e-05, |
|
"step": 13032 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.2809573361082206, |
|
"eval_f1_macro": 0.6080269607484758, |
|
"eval_f1_micro": 0.8408677611863842, |
|
"eval_loss": 0.2954128384590149, |
|
"eval_roc_auc": 0.867912866799092, |
|
"eval_runtime": 67.8811, |
|
"eval_samples_per_second": 56.628, |
|
"eval_steps_per_second": 0.899, |
|
"learning_rate": 1e-05, |
|
"step": 13213 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.2809573361082206, |
|
"eval_f1_macro": 0.6093453328873296, |
|
"eval_f1_micro": 0.8411459000025601, |
|
"eval_loss": 0.29537200927734375, |
|
"eval_roc_auc": 0.8681591237557506, |
|
"eval_runtime": 65.6885, |
|
"eval_samples_per_second": 58.519, |
|
"eval_steps_per_second": 0.929, |
|
"learning_rate": 1e-05, |
|
"step": 13394 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 1e-05, |
|
"step": 13394, |
|
"total_flos": 1.9211796047151268e+19, |
|
"train_loss": 0.3220266872187844, |
|
"train_runtime": 23053.7141, |
|
"train_samples_per_second": 49.979, |
|
"train_steps_per_second": 0.785 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 18100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9211796047151268e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|