|
{ |
|
"best_metric": 0.28438833355903625, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria_binary-binary-large-2024_11_03-batch-size64_freeze/checkpoint-14118", |
|
"epoch": 88.0, |
|
"eval_steps": 500, |
|
"global_step": 15928, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.2630072840790843, |
|
"eval_f1_macro": 0.5774239185038708, |
|
"eval_f1_micro": 0.8262109753225342, |
|
"eval_loss": 0.3235681354999542, |
|
"eval_runtime": 79.5924, |
|
"eval_samples_per_second": 48.296, |
|
"eval_steps_per_second": 0.766, |
|
"learning_rate": 0.001, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.24115504682622269, |
|
"eval_f1_macro": 0.6199165901601139, |
|
"eval_f1_micro": 0.8378565084377776, |
|
"eval_loss": 0.3146470785140991, |
|
"eval_runtime": 78.3589, |
|
"eval_samples_per_second": 49.056, |
|
"eval_steps_per_second": 0.778, |
|
"learning_rate": 0.001, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.7624309392265194, |
|
"grad_norm": 0.39133042097091675, |
|
"learning_rate": 0.001, |
|
"loss": 0.3995, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.2554630593132154, |
|
"eval_f1_macro": 0.6043570009634397, |
|
"eval_f1_micro": 0.8398465111582348, |
|
"eval_loss": 0.3090434670448303, |
|
"eval_runtime": 77.1271, |
|
"eval_samples_per_second": 49.84, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 0.001, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.25624349635796045, |
|
"eval_f1_macro": 0.600278483167516, |
|
"eval_f1_micro": 0.8348980169243037, |
|
"eval_loss": 0.30735355615615845, |
|
"eval_runtime": 77.9527, |
|
"eval_samples_per_second": 49.312, |
|
"eval_steps_per_second": 0.783, |
|
"learning_rate": 0.001, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.2515608740894901, |
|
"eval_f1_macro": 0.6247746971203368, |
|
"eval_f1_micro": 0.8405948994360434, |
|
"eval_loss": 0.30385810136795044, |
|
"eval_runtime": 79.3747, |
|
"eval_samples_per_second": 48.429, |
|
"eval_steps_per_second": 0.769, |
|
"learning_rate": 0.001, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 5.524861878453039, |
|
"grad_norm": 0.30673518776893616, |
|
"learning_rate": 0.001, |
|
"loss": 0.3299, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.2596253902185224, |
|
"eval_f1_macro": 0.6225111439021958, |
|
"eval_f1_micro": 0.841987466427932, |
|
"eval_loss": 0.3059956729412079, |
|
"eval_runtime": 78.3475, |
|
"eval_samples_per_second": 49.063, |
|
"eval_steps_per_second": 0.779, |
|
"learning_rate": 0.001, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.28199791883454733, |
|
"eval_f1_macro": 0.5954695621655504, |
|
"eval_f1_micro": 0.8387498056289846, |
|
"eval_loss": 0.3013758361339569, |
|
"eval_runtime": 77.8519, |
|
"eval_samples_per_second": 49.376, |
|
"eval_steps_per_second": 0.784, |
|
"learning_rate": 0.001, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.2702913631633715, |
|
"eval_f1_macro": 0.5974832028652961, |
|
"eval_f1_micro": 0.8390550208451284, |
|
"eval_loss": 0.30131709575653076, |
|
"eval_runtime": 76.3356, |
|
"eval_samples_per_second": 50.357, |
|
"eval_steps_per_second": 0.799, |
|
"learning_rate": 0.001, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 8.287292817679559, |
|
"grad_norm": 0.2473623901605606, |
|
"learning_rate": 0.001, |
|
"loss": 0.3216, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.28407908428720086, |
|
"eval_f1_macro": 0.5974259992816957, |
|
"eval_f1_micro": 0.8406665130922214, |
|
"eval_loss": 0.30098479986190796, |
|
"eval_runtime": 76.6263, |
|
"eval_samples_per_second": 50.166, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 0.001, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.27107180020811655, |
|
"eval_f1_macro": 0.5937940362628795, |
|
"eval_f1_micro": 0.8376187886791475, |
|
"eval_loss": 0.30072343349456787, |
|
"eval_runtime": 78.1999, |
|
"eval_samples_per_second": 49.156, |
|
"eval_steps_per_second": 0.78, |
|
"learning_rate": 0.001, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.277315296566077, |
|
"eval_f1_macro": 0.5761905737205768, |
|
"eval_f1_micro": 0.8348592565387339, |
|
"eval_loss": 0.3035621643066406, |
|
"eval_runtime": 77.9385, |
|
"eval_samples_per_second": 49.321, |
|
"eval_steps_per_second": 0.783, |
|
"learning_rate": 0.001, |
|
"step": 1991 |
|
}, |
|
{ |
|
"epoch": 11.049723756906078, |
|
"grad_norm": 0.24565815925598145, |
|
"learning_rate": 0.001, |
|
"loss": 0.3167, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.26742976066597296, |
|
"eval_f1_macro": 0.6114755503631268, |
|
"eval_f1_micro": 0.838466245156027, |
|
"eval_loss": 0.3012838363647461, |
|
"eval_runtime": 77.6902, |
|
"eval_samples_per_second": 49.479, |
|
"eval_steps_per_second": 0.785, |
|
"learning_rate": 0.001, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2648283038501561, |
|
"eval_f1_macro": 0.6145726431106396, |
|
"eval_f1_micro": 0.8421213122252433, |
|
"eval_loss": 0.29778778553009033, |
|
"eval_runtime": 77.1253, |
|
"eval_samples_per_second": 49.841, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 0.001, |
|
"step": 2353 |
|
}, |
|
{ |
|
"epoch": 13.812154696132596, |
|
"grad_norm": 0.2421799898147583, |
|
"learning_rate": 0.001, |
|
"loss": 0.315, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.27341311134235174, |
|
"eval_f1_macro": 0.605884177295118, |
|
"eval_f1_micro": 0.8399742101869762, |
|
"eval_loss": 0.29774588346481323, |
|
"eval_runtime": 76.619, |
|
"eval_samples_per_second": 50.17, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 0.001, |
|
"step": 2534 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.2666493236212279, |
|
"eval_f1_macro": 0.6074624445346274, |
|
"eval_f1_micro": 0.8433503513117323, |
|
"eval_loss": 0.29809942841529846, |
|
"eval_runtime": 77.2846, |
|
"eval_samples_per_second": 49.738, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 0.001, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.27471383975026015, |
|
"eval_f1_macro": 0.5932952143692389, |
|
"eval_f1_micro": 0.8394100355835181, |
|
"eval_loss": 0.29744812846183777, |
|
"eval_runtime": 77.1329, |
|
"eval_samples_per_second": 49.836, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 0.001, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 16.574585635359117, |
|
"grad_norm": 0.2012377828359604, |
|
"learning_rate": 0.001, |
|
"loss": 0.3147, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.2663891779396462, |
|
"eval_f1_macro": 0.6146867059353278, |
|
"eval_f1_micro": 0.8437578624264077, |
|
"eval_loss": 0.2983638644218445, |
|
"eval_runtime": 77.7728, |
|
"eval_samples_per_second": 49.426, |
|
"eval_steps_per_second": 0.784, |
|
"learning_rate": 0.001, |
|
"step": 3077 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.2762747138397503, |
|
"eval_f1_macro": 0.5803903225868541, |
|
"eval_f1_micro": 0.8356339535005088, |
|
"eval_loss": 0.3023049235343933, |
|
"eval_runtime": 77.2379, |
|
"eval_samples_per_second": 49.768, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 0.001, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.2739334027055151, |
|
"eval_f1_macro": 0.6158875389283108, |
|
"eval_f1_micro": 0.8423529411764706, |
|
"eval_loss": 0.2984697222709656, |
|
"eval_runtime": 76.7734, |
|
"eval_samples_per_second": 50.069, |
|
"eval_steps_per_second": 0.795, |
|
"learning_rate": 0.001, |
|
"step": 3439 |
|
}, |
|
{ |
|
"epoch": 19.337016574585636, |
|
"grad_norm": 0.20086592435836792, |
|
"learning_rate": 0.001, |
|
"loss": 0.3122, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.28069719042663893, |
|
"eval_f1_macro": 0.5984147849283556, |
|
"eval_f1_micro": 0.8411767731317183, |
|
"eval_loss": 0.29680272936820984, |
|
"eval_runtime": 77.2197, |
|
"eval_samples_per_second": 49.78, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 0.001, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.2702913631633715, |
|
"eval_f1_macro": 0.6060492619397649, |
|
"eval_f1_micro": 0.8418969323285377, |
|
"eval_loss": 0.30051520466804504, |
|
"eval_runtime": 76.7663, |
|
"eval_samples_per_second": 50.074, |
|
"eval_steps_per_second": 0.795, |
|
"learning_rate": 0.001, |
|
"step": 3801 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.27471383975026015, |
|
"eval_f1_macro": 0.580353532272699, |
|
"eval_f1_micro": 0.8374817746302854, |
|
"eval_loss": 0.29818177223205566, |
|
"eval_runtime": 77.0688, |
|
"eval_samples_per_second": 49.877, |
|
"eval_steps_per_second": 0.792, |
|
"learning_rate": 0.001, |
|
"step": 3982 |
|
}, |
|
{ |
|
"epoch": 22.099447513812155, |
|
"grad_norm": 0.21317744255065918, |
|
"learning_rate": 0.001, |
|
"loss": 0.3149, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.27809573361082207, |
|
"eval_f1_macro": 0.615237110287355, |
|
"eval_f1_micro": 0.8436262061960386, |
|
"eval_loss": 0.29393449425697327, |
|
"eval_runtime": 76.8217, |
|
"eval_samples_per_second": 50.038, |
|
"eval_steps_per_second": 0.794, |
|
"learning_rate": 0.001, |
|
"step": 4163 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.27601456815816855, |
|
"eval_f1_macro": 0.6228721497006335, |
|
"eval_f1_micro": 0.8453232862164007, |
|
"eval_loss": 0.2948347330093384, |
|
"eval_runtime": 77.3517, |
|
"eval_samples_per_second": 49.695, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 0.001, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 24.861878453038674, |
|
"grad_norm": 0.17206734418869019, |
|
"learning_rate": 0.001, |
|
"loss": 0.3118, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.2736732570239334, |
|
"eval_f1_macro": 0.610255370235793, |
|
"eval_f1_micro": 0.8427456149244652, |
|
"eval_loss": 0.29676035046577454, |
|
"eval_runtime": 77.1567, |
|
"eval_samples_per_second": 49.821, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 0.001, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.2754942767950052, |
|
"eval_f1_macro": 0.6045462014226007, |
|
"eval_f1_micro": 0.8420542140997499, |
|
"eval_loss": 0.2955995500087738, |
|
"eval_runtime": 77.6213, |
|
"eval_samples_per_second": 49.522, |
|
"eval_steps_per_second": 0.786, |
|
"learning_rate": 0.001, |
|
"step": 4706 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.27653485952133194, |
|
"eval_f1_macro": 0.6115221375683754, |
|
"eval_f1_micro": 0.8437684356323902, |
|
"eval_loss": 0.29585039615631104, |
|
"eval_runtime": 76.6184, |
|
"eval_samples_per_second": 50.171, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 0.001, |
|
"step": 4887 |
|
}, |
|
{ |
|
"epoch": 27.624309392265193, |
|
"grad_norm": 0.1697782576084137, |
|
"learning_rate": 0.001, |
|
"loss": 0.3126, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.26925078043704476, |
|
"eval_f1_macro": 0.6191186747828321, |
|
"eval_f1_micro": 0.8446938104986479, |
|
"eval_loss": 0.295540988445282, |
|
"eval_runtime": 76.9629, |
|
"eval_samples_per_second": 49.946, |
|
"eval_steps_per_second": 0.793, |
|
"learning_rate": 0.001, |
|
"step": 5068 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2663891779396462, |
|
"eval_f1_macro": 0.6215750043898619, |
|
"eval_f1_micro": 0.8437664387164651, |
|
"eval_loss": 0.3010655343532562, |
|
"eval_runtime": 77.7511, |
|
"eval_samples_per_second": 49.44, |
|
"eval_steps_per_second": 0.785, |
|
"learning_rate": 0.001, |
|
"step": 5249 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.2809573361082206, |
|
"eval_f1_macro": 0.6025311078598518, |
|
"eval_f1_micro": 0.8437435686355217, |
|
"eval_loss": 0.29214760661125183, |
|
"eval_runtime": 76.285, |
|
"eval_samples_per_second": 50.39, |
|
"eval_steps_per_second": 0.8, |
|
"learning_rate": 0.0001, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 30.386740331491712, |
|
"grad_norm": 0.15570667386054993, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3093, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.28121748178980227, |
|
"eval_f1_macro": 0.6071651131848005, |
|
"eval_f1_micro": 0.8439103638567266, |
|
"eval_loss": 0.29040178656578064, |
|
"eval_runtime": 80.1919, |
|
"eval_samples_per_second": 47.935, |
|
"eval_steps_per_second": 0.761, |
|
"learning_rate": 0.0001, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.2809573361082206, |
|
"eval_f1_macro": 0.6111569473926136, |
|
"eval_f1_micro": 0.8437194965322373, |
|
"eval_loss": 0.29034462571144104, |
|
"eval_runtime": 76.5731, |
|
"eval_samples_per_second": 50.2, |
|
"eval_steps_per_second": 0.797, |
|
"learning_rate": 0.0001, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.28537981269510926, |
|
"eval_f1_macro": 0.6202495870793918, |
|
"eval_f1_micro": 0.8461617038663874, |
|
"eval_loss": 0.2888760268688202, |
|
"eval_runtime": 78.8612, |
|
"eval_samples_per_second": 48.744, |
|
"eval_steps_per_second": 0.774, |
|
"learning_rate": 0.0001, |
|
"step": 5973 |
|
}, |
|
{ |
|
"epoch": 33.149171270718234, |
|
"grad_norm": 0.16711881756782532, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3049, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.2861602497398543, |
|
"eval_f1_macro": 0.6150504150317478, |
|
"eval_f1_micro": 0.8446023671361742, |
|
"eval_loss": 0.28964364528656006, |
|
"eval_runtime": 78.2568, |
|
"eval_samples_per_second": 49.12, |
|
"eval_steps_per_second": 0.779, |
|
"learning_rate": 0.0001, |
|
"step": 6154 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.2866805411030177, |
|
"eval_f1_macro": 0.611180048847438, |
|
"eval_f1_micro": 0.8449244728566273, |
|
"eval_loss": 0.28874215483665466, |
|
"eval_runtime": 78.411, |
|
"eval_samples_per_second": 49.024, |
|
"eval_steps_per_second": 0.778, |
|
"learning_rate": 0.0001, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 35.91160220994475, |
|
"grad_norm": 0.1489323228597641, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3012, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.28355879292403746, |
|
"eval_f1_macro": 0.6119874534823754, |
|
"eval_f1_micro": 0.8447173058645225, |
|
"eval_loss": 0.2888963222503662, |
|
"eval_runtime": 80.9886, |
|
"eval_samples_per_second": 47.463, |
|
"eval_steps_per_second": 0.753, |
|
"learning_rate": 0.0001, |
|
"step": 6516 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.2866805411030177, |
|
"eval_f1_macro": 0.6255767175486281, |
|
"eval_f1_micro": 0.8475834540970686, |
|
"eval_loss": 0.288282573223114, |
|
"eval_runtime": 78.5643, |
|
"eval_samples_per_second": 48.928, |
|
"eval_steps_per_second": 0.776, |
|
"learning_rate": 0.0001, |
|
"step": 6697 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.28251821019771073, |
|
"eval_f1_macro": 0.6057239934398935, |
|
"eval_f1_micro": 0.8452536426724028, |
|
"eval_loss": 0.29050976037979126, |
|
"eval_runtime": 79.802, |
|
"eval_samples_per_second": 48.169, |
|
"eval_steps_per_second": 0.764, |
|
"learning_rate": 0.0001, |
|
"step": 6878 |
|
}, |
|
{ |
|
"epoch": 38.67403314917127, |
|
"grad_norm": 0.1844823658466339, |
|
"learning_rate": 0.0001, |
|
"loss": 0.299, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.28537981269510926, |
|
"eval_f1_macro": 0.625366961909805, |
|
"eval_f1_micro": 0.8470600182796791, |
|
"eval_loss": 0.28778275847435, |
|
"eval_runtime": 78.7507, |
|
"eval_samples_per_second": 48.812, |
|
"eval_steps_per_second": 0.775, |
|
"learning_rate": 0.0001, |
|
"step": 7059 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.2809573361082206, |
|
"eval_f1_macro": 0.622337777946806, |
|
"eval_f1_micro": 0.8468000302716884, |
|
"eval_loss": 0.2885717749595642, |
|
"eval_runtime": 79.0959, |
|
"eval_samples_per_second": 48.599, |
|
"eval_steps_per_second": 0.771, |
|
"learning_rate": 0.0001, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.2843392299687825, |
|
"eval_f1_macro": 0.6260539681026288, |
|
"eval_f1_micro": 0.847323400258903, |
|
"eval_loss": 0.28773826360702515, |
|
"eval_runtime": 81.7413, |
|
"eval_samples_per_second": 47.026, |
|
"eval_steps_per_second": 0.746, |
|
"learning_rate": 0.0001, |
|
"step": 7421 |
|
}, |
|
{ |
|
"epoch": 41.43646408839779, |
|
"grad_norm": 0.16540081799030304, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2989, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.28563995837669093, |
|
"eval_f1_macro": 0.6199392946357273, |
|
"eval_f1_micro": 0.8476613005450627, |
|
"eval_loss": 0.28776827454566956, |
|
"eval_runtime": 78.9339, |
|
"eval_samples_per_second": 48.699, |
|
"eval_steps_per_second": 0.773, |
|
"learning_rate": 0.0001, |
|
"step": 7602 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.28303850156087407, |
|
"eval_f1_macro": 0.6287571427217789, |
|
"eval_f1_micro": 0.8479237095716232, |
|
"eval_loss": 0.28717148303985596, |
|
"eval_runtime": 77.7099, |
|
"eval_samples_per_second": 49.466, |
|
"eval_steps_per_second": 0.785, |
|
"learning_rate": 0.0001, |
|
"step": 7783 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.28407908428720086, |
|
"eval_f1_macro": 0.6189979239207937, |
|
"eval_f1_micro": 0.8463665693654939, |
|
"eval_loss": 0.28678667545318604, |
|
"eval_runtime": 78.2343, |
|
"eval_samples_per_second": 49.134, |
|
"eval_steps_per_second": 0.78, |
|
"learning_rate": 0.0001, |
|
"step": 7964 |
|
}, |
|
{ |
|
"epoch": 44.19889502762431, |
|
"grad_norm": 0.17522749304771423, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2983, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.28381893860561913, |
|
"eval_f1_macro": 0.6235508782461164, |
|
"eval_f1_micro": 0.8462928555066304, |
|
"eval_loss": 0.28698909282684326, |
|
"eval_runtime": 78.0653, |
|
"eval_samples_per_second": 49.241, |
|
"eval_steps_per_second": 0.781, |
|
"learning_rate": 0.0001, |
|
"step": 8145 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.28251821019771073, |
|
"eval_f1_macro": 0.6151318511304835, |
|
"eval_f1_micro": 0.8459846547314578, |
|
"eval_loss": 0.2868472635746002, |
|
"eval_runtime": 77.6178, |
|
"eval_samples_per_second": 49.525, |
|
"eval_steps_per_second": 0.786, |
|
"learning_rate": 0.0001, |
|
"step": 8326 |
|
}, |
|
{ |
|
"epoch": 46.96132596685083, |
|
"grad_norm": 0.20419611036777496, |
|
"learning_rate": 0.0001, |
|
"loss": 0.298, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.2845993756503642, |
|
"eval_f1_macro": 0.6211457155619424, |
|
"eval_f1_micro": 0.8462129359348595, |
|
"eval_loss": 0.28715068101882935, |
|
"eval_runtime": 77.3289, |
|
"eval_samples_per_second": 49.71, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 0.0001, |
|
"step": 8507 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.28355879292403746, |
|
"eval_f1_macro": 0.6231150403485404, |
|
"eval_f1_micro": 0.8466852933705867, |
|
"eval_loss": 0.28661593794822693, |
|
"eval_runtime": 76.7641, |
|
"eval_samples_per_second": 50.075, |
|
"eval_steps_per_second": 0.795, |
|
"learning_rate": 0.0001, |
|
"step": 8688 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.28590010405827265, |
|
"eval_f1_macro": 0.616055362439494, |
|
"eval_f1_micro": 0.8460415439387342, |
|
"eval_loss": 0.28633347153663635, |
|
"eval_runtime": 76.5745, |
|
"eval_samples_per_second": 50.199, |
|
"eval_steps_per_second": 0.797, |
|
"learning_rate": 0.0001, |
|
"step": 8869 |
|
}, |
|
{ |
|
"epoch": 49.72375690607735, |
|
"grad_norm": 0.2103131115436554, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2965, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.2845993756503642, |
|
"eval_f1_macro": 0.625458075101288, |
|
"eval_f1_micro": 0.8482882700250868, |
|
"eval_loss": 0.28642749786376953, |
|
"eval_runtime": 76.3371, |
|
"eval_samples_per_second": 50.356, |
|
"eval_steps_per_second": 0.799, |
|
"learning_rate": 0.0001, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.28485952133194586, |
|
"eval_f1_macro": 0.6278100779578839, |
|
"eval_f1_micro": 0.848592785832539, |
|
"eval_loss": 0.2890762686729431, |
|
"eval_runtime": 77.0258, |
|
"eval_samples_per_second": 49.905, |
|
"eval_steps_per_second": 0.792, |
|
"learning_rate": 0.0001, |
|
"step": 9231 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.2851196670135276, |
|
"eval_f1_macro": 0.6255462096645672, |
|
"eval_f1_micro": 0.8464228285561143, |
|
"eval_loss": 0.2855978012084961, |
|
"eval_runtime": 76.6781, |
|
"eval_samples_per_second": 50.132, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 0.0001, |
|
"step": 9412 |
|
}, |
|
{ |
|
"epoch": 52.48618784530387, |
|
"grad_norm": 0.24192312359809875, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2956, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.27887617065556713, |
|
"eval_f1_macro": 0.6457587856102145, |
|
"eval_f1_micro": 0.8489991514001897, |
|
"eval_loss": 0.2872205674648285, |
|
"eval_runtime": 76.6479, |
|
"eval_samples_per_second": 50.151, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 0.0001, |
|
"step": 9593 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.2903225806451613, |
|
"eval_f1_macro": 0.6243869856844756, |
|
"eval_f1_micro": 0.8476844874709444, |
|
"eval_loss": 0.2855803072452545, |
|
"eval_runtime": 77.582, |
|
"eval_samples_per_second": 49.548, |
|
"eval_steps_per_second": 0.786, |
|
"learning_rate": 0.0001, |
|
"step": 9774 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.2845993756503642, |
|
"eval_f1_macro": 0.6339630509281279, |
|
"eval_f1_micro": 0.8475136716266056, |
|
"eval_loss": 0.28568968176841736, |
|
"eval_runtime": 77.102, |
|
"eval_samples_per_second": 49.856, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 0.0001, |
|
"step": 9955 |
|
}, |
|
{ |
|
"epoch": 55.248618784530386, |
|
"grad_norm": 0.21083500981330872, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2958, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.2866805411030177, |
|
"eval_f1_macro": 0.6241465491773776, |
|
"eval_f1_micro": 0.8465597622829039, |
|
"eval_loss": 0.28617897629737854, |
|
"eval_runtime": 76.1445, |
|
"eval_samples_per_second": 50.483, |
|
"eval_steps_per_second": 0.801, |
|
"learning_rate": 0.0001, |
|
"step": 10136 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.2861602497398543, |
|
"eval_f1_macro": 0.6249269702519318, |
|
"eval_f1_micro": 0.845436853426201, |
|
"eval_loss": 0.2870914936065674, |
|
"eval_runtime": 77.4556, |
|
"eval_samples_per_second": 49.628, |
|
"eval_steps_per_second": 0.788, |
|
"learning_rate": 0.0001, |
|
"step": 10317 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.28121748178980227, |
|
"eval_f1_macro": 0.6333866717026029, |
|
"eval_f1_micro": 0.8491941382702348, |
|
"eval_loss": 0.2857914865016937, |
|
"eval_runtime": 77.2551, |
|
"eval_samples_per_second": 49.757, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 0.0001, |
|
"step": 10498 |
|
}, |
|
{ |
|
"epoch": 58.011049723756905, |
|
"grad_norm": 0.22250542044639587, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2954, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.2887617065556712, |
|
"eval_f1_macro": 0.6178461796051926, |
|
"eval_f1_micro": 0.8468232576049287, |
|
"eval_loss": 0.28617140650749207, |
|
"eval_runtime": 76.6548, |
|
"eval_samples_per_second": 50.147, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 1e-05, |
|
"step": 10679 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.28537981269510926, |
|
"eval_f1_macro": 0.6275748058546806, |
|
"eval_f1_micro": 0.8485033598045205, |
|
"eval_loss": 0.2846605181694031, |
|
"eval_runtime": 76.2984, |
|
"eval_samples_per_second": 50.381, |
|
"eval_steps_per_second": 0.799, |
|
"learning_rate": 1e-05, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 60.773480662983424, |
|
"grad_norm": 0.25101110339164734, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2923, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.28303850156087407, |
|
"eval_f1_macro": 0.6223888517425455, |
|
"eval_f1_micro": 0.8479865171982329, |
|
"eval_loss": 0.2848633825778961, |
|
"eval_runtime": 76.6322, |
|
"eval_samples_per_second": 50.162, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 1e-05, |
|
"step": 11041 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.2843392299687825, |
|
"eval_f1_macro": 0.6247632003821695, |
|
"eval_f1_micro": 0.8469200122586577, |
|
"eval_loss": 0.28548601269721985, |
|
"eval_runtime": 77.5636, |
|
"eval_samples_per_second": 49.559, |
|
"eval_steps_per_second": 0.786, |
|
"learning_rate": 1e-05, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.2827783558792924, |
|
"eval_f1_macro": 0.6274806463168713, |
|
"eval_f1_micro": 0.8488979777323336, |
|
"eval_loss": 0.28493326902389526, |
|
"eval_runtime": 77.2321, |
|
"eval_samples_per_second": 49.772, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 1e-05, |
|
"step": 11403 |
|
}, |
|
{ |
|
"epoch": 63.53591160220994, |
|
"grad_norm": 0.23796355724334717, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2918, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.28225806451612906, |
|
"eval_f1_macro": 0.6370787064578803, |
|
"eval_f1_micro": 0.8475187206498287, |
|
"eval_loss": 0.28459736704826355, |
|
"eval_runtime": 77.0797, |
|
"eval_samples_per_second": 49.87, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 1e-05, |
|
"step": 11584 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.2869406867845994, |
|
"eval_f1_macro": 0.6240984315849201, |
|
"eval_f1_micro": 0.8467700785794469, |
|
"eval_loss": 0.2860054671764374, |
|
"eval_runtime": 76.4904, |
|
"eval_samples_per_second": 50.255, |
|
"eval_steps_per_second": 0.797, |
|
"learning_rate": 1e-05, |
|
"step": 11765 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.28407908428720086, |
|
"eval_f1_macro": 0.6346693986906206, |
|
"eval_f1_micro": 0.8481340441736481, |
|
"eval_loss": 0.2847185730934143, |
|
"eval_runtime": 77.2653, |
|
"eval_samples_per_second": 49.751, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 1e-05, |
|
"step": 11946 |
|
}, |
|
{ |
|
"epoch": 66.29834254143647, |
|
"grad_norm": 0.25470152497291565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2906, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.28537981269510926, |
|
"eval_f1_macro": 0.6287121285420982, |
|
"eval_f1_micro": 0.8487528745798691, |
|
"eval_loss": 0.28529325127601624, |
|
"eval_runtime": 79.9065, |
|
"eval_samples_per_second": 48.106, |
|
"eval_steps_per_second": 0.763, |
|
"learning_rate": 1e-05, |
|
"step": 12127 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.2866805411030177, |
|
"eval_f1_macro": 0.6321379394582358, |
|
"eval_f1_micro": 0.8480251642525557, |
|
"eval_loss": 0.2852926254272461, |
|
"eval_runtime": 78.4728, |
|
"eval_samples_per_second": 48.985, |
|
"eval_steps_per_second": 0.777, |
|
"learning_rate": 1e-05, |
|
"step": 12308 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.28355879292403746, |
|
"eval_f1_macro": 0.6397237492354447, |
|
"eval_f1_micro": 0.847692190707931, |
|
"eval_loss": 0.284834623336792, |
|
"eval_runtime": 77.7721, |
|
"eval_samples_per_second": 49.426, |
|
"eval_steps_per_second": 0.784, |
|
"learning_rate": 1e-05, |
|
"step": 12489 |
|
}, |
|
{ |
|
"epoch": 69.06077348066299, |
|
"grad_norm": 0.19653503596782684, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2918, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.28225806451612906, |
|
"eval_f1_macro": 0.6381143671040704, |
|
"eval_f1_micro": 0.8492167101827677, |
|
"eval_loss": 0.28527727723121643, |
|
"eval_runtime": 76.6607, |
|
"eval_samples_per_second": 50.143, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 1e-05, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.2882414151925078, |
|
"eval_f1_macro": 0.6325489300082728, |
|
"eval_f1_micro": 0.8475971370143149, |
|
"eval_loss": 0.28507113456726074, |
|
"eval_runtime": 76.9731, |
|
"eval_samples_per_second": 49.94, |
|
"eval_steps_per_second": 0.792, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 12851 |
|
}, |
|
{ |
|
"epoch": 71.8232044198895, |
|
"grad_norm": 0.19946995377540588, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2918, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.28485952133194586, |
|
"eval_f1_macro": 0.6236352127811986, |
|
"eval_f1_micro": 0.8474255781269963, |
|
"eval_loss": 0.28452861309051514, |
|
"eval_runtime": 79.7463, |
|
"eval_samples_per_second": 48.203, |
|
"eval_steps_per_second": 0.765, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13032 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.28121748178980227, |
|
"eval_f1_macro": 0.6333277250193455, |
|
"eval_f1_micro": 0.847641772858811, |
|
"eval_loss": 0.28448227047920227, |
|
"eval_runtime": 76.9059, |
|
"eval_samples_per_second": 49.983, |
|
"eval_steps_per_second": 0.793, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13213 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.2827783558792924, |
|
"eval_f1_macro": 0.6300187593616763, |
|
"eval_f1_micro": 0.8465770953294945, |
|
"eval_loss": 0.28447526693344116, |
|
"eval_runtime": 77.6657, |
|
"eval_samples_per_second": 49.494, |
|
"eval_steps_per_second": 0.785, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13394 |
|
}, |
|
{ |
|
"epoch": 74.58563535911603, |
|
"grad_norm": 0.251558780670166, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2913, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.28199791883454733, |
|
"eval_f1_macro": 0.6235297745568456, |
|
"eval_f1_micro": 0.8473772748126625, |
|
"eval_loss": 0.2851284146308899, |
|
"eval_runtime": 77.914, |
|
"eval_samples_per_second": 49.336, |
|
"eval_steps_per_second": 0.783, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.2879812695109261, |
|
"eval_f1_macro": 0.6186062513830065, |
|
"eval_f1_micro": 0.847320835674516, |
|
"eval_loss": 0.2859683036804199, |
|
"eval_runtime": 77.7414, |
|
"eval_samples_per_second": 49.446, |
|
"eval_steps_per_second": 0.785, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13756 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.28563995837669093, |
|
"eval_f1_macro": 0.6172786558676017, |
|
"eval_f1_micro": 0.8459046737621472, |
|
"eval_loss": 0.2858298718929291, |
|
"eval_runtime": 79.1015, |
|
"eval_samples_per_second": 48.596, |
|
"eval_steps_per_second": 0.771, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 13937 |
|
}, |
|
{ |
|
"epoch": 77.34806629834254, |
|
"grad_norm": 0.22088366746902466, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2913, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.2843392299687825, |
|
"eval_f1_macro": 0.6325947858436887, |
|
"eval_f1_micro": 0.8480547459130655, |
|
"eval_loss": 0.28438833355903625, |
|
"eval_runtime": 77.2562, |
|
"eval_samples_per_second": 49.757, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14118 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.2874609781477627, |
|
"eval_f1_macro": 0.617917490234713, |
|
"eval_f1_micro": 0.8472353346431579, |
|
"eval_loss": 0.2870919704437256, |
|
"eval_runtime": 76.5647, |
|
"eval_samples_per_second": 50.206, |
|
"eval_steps_per_second": 0.797, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14299 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.28381893860561913, |
|
"eval_f1_macro": 0.6286567457369128, |
|
"eval_f1_micro": 0.8477330616403465, |
|
"eval_loss": 0.28482332825660706, |
|
"eval_runtime": 76.83, |
|
"eval_samples_per_second": 50.033, |
|
"eval_steps_per_second": 0.794, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 80.11049723756906, |
|
"grad_norm": 0.21530944108963013, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2915, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.28537981269510926, |
|
"eval_f1_macro": 0.6304525529970205, |
|
"eval_f1_micro": 0.8489678202792957, |
|
"eval_loss": 0.2847617268562317, |
|
"eval_runtime": 77.26, |
|
"eval_samples_per_second": 49.754, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14661 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.28590010405827265, |
|
"eval_f1_macro": 0.6394217270135759, |
|
"eval_f1_micro": 0.8480416961845967, |
|
"eval_loss": 0.28511229157447815, |
|
"eval_runtime": 78.9926, |
|
"eval_samples_per_second": 48.663, |
|
"eval_steps_per_second": 0.772, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 14842 |
|
}, |
|
{ |
|
"epoch": 82.87292817679558, |
|
"grad_norm": 0.2371624857187271, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2913, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.28563995837669093, |
|
"eval_f1_macro": 0.6255055774993536, |
|
"eval_f1_micro": 0.8488055562622434, |
|
"eval_loss": 0.284644216299057, |
|
"eval_runtime": 76.4754, |
|
"eval_samples_per_second": 50.265, |
|
"eval_steps_per_second": 0.798, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 15023 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.2832986472424558, |
|
"eval_f1_macro": 0.6457553263622914, |
|
"eval_f1_micro": 0.848188643119867, |
|
"eval_loss": 0.2857225835323334, |
|
"eval_runtime": 77.2675, |
|
"eval_samples_per_second": 49.749, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"step": 15204 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.28121748178980227, |
|
"eval_f1_macro": 0.6339586571635658, |
|
"eval_f1_micro": 0.848818698673405, |
|
"eval_loss": 0.28550758957862854, |
|
"eval_runtime": 77.3218, |
|
"eval_samples_per_second": 49.714, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15385 |
|
}, |
|
{ |
|
"epoch": 85.6353591160221, |
|
"grad_norm": 0.22222235798835754, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"loss": 0.2922, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.28590010405827265, |
|
"eval_f1_macro": 0.6362631688004041, |
|
"eval_f1_micro": 0.8479890588592848, |
|
"eval_loss": 0.284895658493042, |
|
"eval_runtime": 76.6317, |
|
"eval_samples_per_second": 50.162, |
|
"eval_steps_per_second": 0.796, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15566 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.2851196670135276, |
|
"eval_f1_macro": 0.6327749126527296, |
|
"eval_f1_micro": 0.8473590201582036, |
|
"eval_loss": 0.2845035493373871, |
|
"eval_runtime": 77.1171, |
|
"eval_samples_per_second": 49.846, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15747 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.28121748178980227, |
|
"eval_f1_macro": 0.6370893160624239, |
|
"eval_f1_micro": 0.8477551536613127, |
|
"eval_loss": 0.28541097044944763, |
|
"eval_runtime": 76.6873, |
|
"eval_samples_per_second": 50.126, |
|
"eval_steps_per_second": 0.795, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15928 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 1.0000000000000002e-07, |
|
"step": 15928, |
|
"total_flos": 1.500719176717825e+20, |
|
"train_loss": 0.3047179739897961, |
|
"train_runtime": 30841.8451, |
|
"train_samples_per_second": 56.038, |
|
"train_steps_per_second": 0.88 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 27150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.500719176717825e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|