{ "best_metric": 0.28438833355903625, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria_binary-binary-large-2024_11_03-batch-size64_freeze/checkpoint-14118", "epoch": 88.0, "eval_steps": 500, "global_step": 15928, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.2630072840790843, "eval_f1_macro": 0.5774239185038708, "eval_f1_micro": 0.8262109753225342, "eval_loss": 0.3235681354999542, "eval_runtime": 79.5924, "eval_samples_per_second": 48.296, "eval_steps_per_second": 0.766, "learning_rate": 0.001, "step": 181 }, { "epoch": 2.0, "eval_accuracy": 0.24115504682622269, "eval_f1_macro": 0.6199165901601139, "eval_f1_micro": 0.8378565084377776, "eval_loss": 0.3146470785140991, "eval_runtime": 78.3589, "eval_samples_per_second": 49.056, "eval_steps_per_second": 0.778, "learning_rate": 0.001, "step": 362 }, { "epoch": 2.7624309392265194, "grad_norm": 0.39133042097091675, "learning_rate": 0.001, "loss": 0.3995, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.2554630593132154, "eval_f1_macro": 0.6043570009634397, "eval_f1_micro": 0.8398465111582348, "eval_loss": 0.3090434670448303, "eval_runtime": 77.1271, "eval_samples_per_second": 49.84, "eval_steps_per_second": 0.791, "learning_rate": 0.001, "step": 543 }, { "epoch": 4.0, "eval_accuracy": 0.25624349635796045, "eval_f1_macro": 0.600278483167516, "eval_f1_micro": 0.8348980169243037, "eval_loss": 0.30735355615615845, "eval_runtime": 77.9527, "eval_samples_per_second": 49.312, "eval_steps_per_second": 0.783, "learning_rate": 0.001, "step": 724 }, { "epoch": 5.0, "eval_accuracy": 0.2515608740894901, "eval_f1_macro": 0.6247746971203368, "eval_f1_micro": 0.8405948994360434, "eval_loss": 0.30385810136795044, "eval_runtime": 79.3747, "eval_samples_per_second": 48.429, "eval_steps_per_second": 0.769, "learning_rate": 0.001, "step": 905 }, { "epoch": 5.524861878453039, "grad_norm": 0.30673518776893616, "learning_rate": 0.001, "loss": 0.3299, "step": 1000 }, { "epoch": 6.0, "eval_accuracy": 0.2596253902185224, "eval_f1_macro": 0.6225111439021958, "eval_f1_micro": 0.841987466427932, "eval_loss": 0.3059956729412079, "eval_runtime": 78.3475, "eval_samples_per_second": 49.063, "eval_steps_per_second": 0.779, "learning_rate": 0.001, "step": 1086 }, { "epoch": 7.0, "eval_accuracy": 0.28199791883454733, "eval_f1_macro": 0.5954695621655504, "eval_f1_micro": 0.8387498056289846, "eval_loss": 0.3013758361339569, "eval_runtime": 77.8519, "eval_samples_per_second": 49.376, "eval_steps_per_second": 0.784, "learning_rate": 0.001, "step": 1267 }, { "epoch": 8.0, "eval_accuracy": 0.2702913631633715, "eval_f1_macro": 0.5974832028652961, "eval_f1_micro": 0.8390550208451284, "eval_loss": 0.30131709575653076, "eval_runtime": 76.3356, "eval_samples_per_second": 50.357, "eval_steps_per_second": 0.799, "learning_rate": 0.001, "step": 1448 }, { "epoch": 8.287292817679559, "grad_norm": 0.2473623901605606, "learning_rate": 0.001, "loss": 0.3216, "step": 1500 }, { "epoch": 9.0, "eval_accuracy": 0.28407908428720086, "eval_f1_macro": 0.5974259992816957, "eval_f1_micro": 0.8406665130922214, "eval_loss": 0.30098479986190796, "eval_runtime": 76.6263, "eval_samples_per_second": 50.166, "eval_steps_per_second": 0.796, "learning_rate": 0.001, "step": 1629 }, { "epoch": 10.0, "eval_accuracy": 0.27107180020811655, "eval_f1_macro": 0.5937940362628795, "eval_f1_micro": 0.8376187886791475, "eval_loss": 0.30072343349456787, "eval_runtime": 78.1999, "eval_samples_per_second": 49.156, "eval_steps_per_second": 0.78, "learning_rate": 0.001, "step": 1810 }, { "epoch": 11.0, "eval_accuracy": 0.277315296566077, "eval_f1_macro": 0.5761905737205768, "eval_f1_micro": 0.8348592565387339, "eval_loss": 0.3035621643066406, "eval_runtime": 77.9385, "eval_samples_per_second": 49.321, "eval_steps_per_second": 0.783, "learning_rate": 0.001, "step": 1991 }, { "epoch": 11.049723756906078, "grad_norm": 0.24565815925598145, "learning_rate": 0.001, "loss": 0.3167, "step": 2000 }, { "epoch": 12.0, "eval_accuracy": 0.26742976066597296, "eval_f1_macro": 0.6114755503631268, "eval_f1_micro": 0.838466245156027, "eval_loss": 0.3012838363647461, "eval_runtime": 77.6902, "eval_samples_per_second": 49.479, "eval_steps_per_second": 0.785, "learning_rate": 0.001, "step": 2172 }, { "epoch": 13.0, "eval_accuracy": 0.2648283038501561, "eval_f1_macro": 0.6145726431106396, "eval_f1_micro": 0.8421213122252433, "eval_loss": 0.29778778553009033, "eval_runtime": 77.1253, "eval_samples_per_second": 49.841, "eval_steps_per_second": 0.791, "learning_rate": 0.001, "step": 2353 }, { "epoch": 13.812154696132596, "grad_norm": 0.2421799898147583, "learning_rate": 0.001, "loss": 0.315, "step": 2500 }, { "epoch": 14.0, "eval_accuracy": 0.27341311134235174, "eval_f1_macro": 0.605884177295118, "eval_f1_micro": 0.8399742101869762, "eval_loss": 0.29774588346481323, "eval_runtime": 76.619, "eval_samples_per_second": 50.17, "eval_steps_per_second": 0.796, "learning_rate": 0.001, "step": 2534 }, { "epoch": 15.0, "eval_accuracy": 0.2666493236212279, "eval_f1_macro": 0.6074624445346274, "eval_f1_micro": 0.8433503513117323, "eval_loss": 0.29809942841529846, "eval_runtime": 77.2846, "eval_samples_per_second": 49.738, "eval_steps_per_second": 0.789, "learning_rate": 0.001, "step": 2715 }, { "epoch": 16.0, "eval_accuracy": 0.27471383975026015, "eval_f1_macro": 0.5932952143692389, "eval_f1_micro": 0.8394100355835181, "eval_loss": 0.29744812846183777, "eval_runtime": 77.1329, "eval_samples_per_second": 49.836, "eval_steps_per_second": 0.791, "learning_rate": 0.001, "step": 2896 }, { "epoch": 16.574585635359117, "grad_norm": 0.2012377828359604, "learning_rate": 0.001, "loss": 0.3147, "step": 3000 }, { "epoch": 17.0, "eval_accuracy": 0.2663891779396462, "eval_f1_macro": 0.6146867059353278, "eval_f1_micro": 0.8437578624264077, "eval_loss": 0.2983638644218445, "eval_runtime": 77.7728, "eval_samples_per_second": 49.426, "eval_steps_per_second": 0.784, "learning_rate": 0.001, "step": 3077 }, { "epoch": 18.0, "eval_accuracy": 0.2762747138397503, "eval_f1_macro": 0.5803903225868541, "eval_f1_micro": 0.8356339535005088, "eval_loss": 0.3023049235343933, "eval_runtime": 77.2379, "eval_samples_per_second": 49.768, "eval_steps_per_second": 0.79, "learning_rate": 0.001, "step": 3258 }, { "epoch": 19.0, "eval_accuracy": 0.2739334027055151, "eval_f1_macro": 0.6158875389283108, "eval_f1_micro": 0.8423529411764706, "eval_loss": 0.2984697222709656, "eval_runtime": 76.7734, "eval_samples_per_second": 50.069, "eval_steps_per_second": 0.795, "learning_rate": 0.001, "step": 3439 }, { "epoch": 19.337016574585636, "grad_norm": 0.20086592435836792, "learning_rate": 0.001, "loss": 0.3122, "step": 3500 }, { "epoch": 20.0, "eval_accuracy": 0.28069719042663893, "eval_f1_macro": 0.5984147849283556, "eval_f1_micro": 0.8411767731317183, "eval_loss": 0.29680272936820984, "eval_runtime": 77.2197, "eval_samples_per_second": 49.78, "eval_steps_per_second": 0.79, "learning_rate": 0.001, "step": 3620 }, { "epoch": 21.0, "eval_accuracy": 0.2702913631633715, "eval_f1_macro": 0.6060492619397649, "eval_f1_micro": 0.8418969323285377, "eval_loss": 0.30051520466804504, "eval_runtime": 76.7663, "eval_samples_per_second": 50.074, "eval_steps_per_second": 0.795, "learning_rate": 0.001, "step": 3801 }, { "epoch": 22.0, "eval_accuracy": 0.27471383975026015, "eval_f1_macro": 0.580353532272699, "eval_f1_micro": 0.8374817746302854, "eval_loss": 0.29818177223205566, "eval_runtime": 77.0688, "eval_samples_per_second": 49.877, "eval_steps_per_second": 0.792, "learning_rate": 0.001, "step": 3982 }, { "epoch": 22.099447513812155, "grad_norm": 0.21317744255065918, "learning_rate": 0.001, "loss": 0.3149, "step": 4000 }, { "epoch": 23.0, "eval_accuracy": 0.27809573361082207, "eval_f1_macro": 0.615237110287355, "eval_f1_micro": 0.8436262061960386, "eval_loss": 0.29393449425697327, "eval_runtime": 76.8217, "eval_samples_per_second": 50.038, "eval_steps_per_second": 0.794, "learning_rate": 0.001, "step": 4163 }, { "epoch": 24.0, "eval_accuracy": 0.27601456815816855, "eval_f1_macro": 0.6228721497006335, "eval_f1_micro": 0.8453232862164007, "eval_loss": 0.2948347330093384, "eval_runtime": 77.3517, "eval_samples_per_second": 49.695, "eval_steps_per_second": 0.789, "learning_rate": 0.001, "step": 4344 }, { "epoch": 24.861878453038674, "grad_norm": 0.17206734418869019, "learning_rate": 0.001, "loss": 0.3118, "step": 4500 }, { "epoch": 25.0, "eval_accuracy": 0.2736732570239334, "eval_f1_macro": 0.610255370235793, "eval_f1_micro": 0.8427456149244652, "eval_loss": 0.29676035046577454, "eval_runtime": 77.1567, "eval_samples_per_second": 49.821, "eval_steps_per_second": 0.791, "learning_rate": 0.001, "step": 4525 }, { "epoch": 26.0, "eval_accuracy": 0.2754942767950052, "eval_f1_macro": 0.6045462014226007, "eval_f1_micro": 0.8420542140997499, "eval_loss": 0.2955995500087738, "eval_runtime": 77.6213, "eval_samples_per_second": 49.522, "eval_steps_per_second": 0.786, "learning_rate": 0.001, "step": 4706 }, { "epoch": 27.0, "eval_accuracy": 0.27653485952133194, "eval_f1_macro": 0.6115221375683754, "eval_f1_micro": 0.8437684356323902, "eval_loss": 0.29585039615631104, "eval_runtime": 76.6184, "eval_samples_per_second": 50.171, "eval_steps_per_second": 0.796, "learning_rate": 0.001, "step": 4887 }, { "epoch": 27.624309392265193, "grad_norm": 0.1697782576084137, "learning_rate": 0.001, "loss": 0.3126, "step": 5000 }, { "epoch": 28.0, "eval_accuracy": 0.26925078043704476, "eval_f1_macro": 0.6191186747828321, "eval_f1_micro": 0.8446938104986479, "eval_loss": 0.295540988445282, "eval_runtime": 76.9629, "eval_samples_per_second": 49.946, "eval_steps_per_second": 0.793, "learning_rate": 0.001, "step": 5068 }, { "epoch": 29.0, "eval_accuracy": 0.2663891779396462, "eval_f1_macro": 0.6215750043898619, "eval_f1_micro": 0.8437664387164651, "eval_loss": 0.3010655343532562, "eval_runtime": 77.7511, "eval_samples_per_second": 49.44, "eval_steps_per_second": 0.785, "learning_rate": 0.001, "step": 5249 }, { "epoch": 30.0, "eval_accuracy": 0.2809573361082206, "eval_f1_macro": 0.6025311078598518, "eval_f1_micro": 0.8437435686355217, "eval_loss": 0.29214760661125183, "eval_runtime": 76.285, "eval_samples_per_second": 50.39, "eval_steps_per_second": 0.8, "learning_rate": 0.0001, "step": 5430 }, { "epoch": 30.386740331491712, "grad_norm": 0.15570667386054993, "learning_rate": 0.0001, "loss": 0.3093, "step": 5500 }, { "epoch": 31.0, "eval_accuracy": 0.28121748178980227, "eval_f1_macro": 0.6071651131848005, "eval_f1_micro": 0.8439103638567266, "eval_loss": 0.29040178656578064, "eval_runtime": 80.1919, "eval_samples_per_second": 47.935, "eval_steps_per_second": 0.761, "learning_rate": 0.0001, "step": 5611 }, { "epoch": 32.0, "eval_accuracy": 0.2809573361082206, "eval_f1_macro": 0.6111569473926136, "eval_f1_micro": 0.8437194965322373, "eval_loss": 0.29034462571144104, "eval_runtime": 76.5731, "eval_samples_per_second": 50.2, "eval_steps_per_second": 0.797, "learning_rate": 0.0001, "step": 5792 }, { "epoch": 33.0, "eval_accuracy": 0.28537981269510926, "eval_f1_macro": 0.6202495870793918, "eval_f1_micro": 0.8461617038663874, "eval_loss": 0.2888760268688202, "eval_runtime": 78.8612, "eval_samples_per_second": 48.744, "eval_steps_per_second": 0.774, "learning_rate": 0.0001, "step": 5973 }, { "epoch": 33.149171270718234, "grad_norm": 0.16711881756782532, "learning_rate": 0.0001, "loss": 0.3049, "step": 6000 }, { "epoch": 34.0, "eval_accuracy": 0.2861602497398543, "eval_f1_macro": 0.6150504150317478, "eval_f1_micro": 0.8446023671361742, "eval_loss": 0.28964364528656006, "eval_runtime": 78.2568, "eval_samples_per_second": 49.12, "eval_steps_per_second": 0.779, "learning_rate": 0.0001, "step": 6154 }, { "epoch": 35.0, "eval_accuracy": 0.2866805411030177, "eval_f1_macro": 0.611180048847438, "eval_f1_micro": 0.8449244728566273, "eval_loss": 0.28874215483665466, "eval_runtime": 78.411, "eval_samples_per_second": 49.024, "eval_steps_per_second": 0.778, "learning_rate": 0.0001, "step": 6335 }, { "epoch": 35.91160220994475, "grad_norm": 0.1489323228597641, "learning_rate": 0.0001, "loss": 0.3012, "step": 6500 }, { "epoch": 36.0, "eval_accuracy": 0.28355879292403746, "eval_f1_macro": 0.6119874534823754, "eval_f1_micro": 0.8447173058645225, "eval_loss": 0.2888963222503662, "eval_runtime": 80.9886, "eval_samples_per_second": 47.463, "eval_steps_per_second": 0.753, "learning_rate": 0.0001, "step": 6516 }, { "epoch": 37.0, "eval_accuracy": 0.2866805411030177, "eval_f1_macro": 0.6255767175486281, "eval_f1_micro": 0.8475834540970686, "eval_loss": 0.288282573223114, "eval_runtime": 78.5643, "eval_samples_per_second": 48.928, "eval_steps_per_second": 0.776, "learning_rate": 0.0001, "step": 6697 }, { "epoch": 38.0, "eval_accuracy": 0.28251821019771073, "eval_f1_macro": 0.6057239934398935, "eval_f1_micro": 0.8452536426724028, "eval_loss": 0.29050976037979126, "eval_runtime": 79.802, "eval_samples_per_second": 48.169, "eval_steps_per_second": 0.764, "learning_rate": 0.0001, "step": 6878 }, { "epoch": 38.67403314917127, "grad_norm": 0.1844823658466339, "learning_rate": 0.0001, "loss": 0.299, "step": 7000 }, { "epoch": 39.0, "eval_accuracy": 0.28537981269510926, "eval_f1_macro": 0.625366961909805, "eval_f1_micro": 0.8470600182796791, "eval_loss": 0.28778275847435, "eval_runtime": 78.7507, "eval_samples_per_second": 48.812, "eval_steps_per_second": 0.775, "learning_rate": 0.0001, "step": 7059 }, { "epoch": 40.0, "eval_accuracy": 0.2809573361082206, "eval_f1_macro": 0.622337777946806, "eval_f1_micro": 0.8468000302716884, "eval_loss": 0.2885717749595642, "eval_runtime": 79.0959, "eval_samples_per_second": 48.599, "eval_steps_per_second": 0.771, "learning_rate": 0.0001, "step": 7240 }, { "epoch": 41.0, "eval_accuracy": 0.2843392299687825, "eval_f1_macro": 0.6260539681026288, "eval_f1_micro": 0.847323400258903, "eval_loss": 0.28773826360702515, "eval_runtime": 81.7413, "eval_samples_per_second": 47.026, "eval_steps_per_second": 0.746, "learning_rate": 0.0001, "step": 7421 }, { "epoch": 41.43646408839779, "grad_norm": 0.16540081799030304, "learning_rate": 0.0001, "loss": 0.2989, "step": 7500 }, { "epoch": 42.0, "eval_accuracy": 0.28563995837669093, "eval_f1_macro": 0.6199392946357273, "eval_f1_micro": 0.8476613005450627, "eval_loss": 0.28776827454566956, "eval_runtime": 78.9339, "eval_samples_per_second": 48.699, "eval_steps_per_second": 0.773, "learning_rate": 0.0001, "step": 7602 }, { "epoch": 43.0, "eval_accuracy": 0.28303850156087407, "eval_f1_macro": 0.6287571427217789, "eval_f1_micro": 0.8479237095716232, "eval_loss": 0.28717148303985596, "eval_runtime": 77.7099, "eval_samples_per_second": 49.466, "eval_steps_per_second": 0.785, "learning_rate": 0.0001, "step": 7783 }, { "epoch": 44.0, "eval_accuracy": 0.28407908428720086, "eval_f1_macro": 0.6189979239207937, "eval_f1_micro": 0.8463665693654939, "eval_loss": 0.28678667545318604, "eval_runtime": 78.2343, "eval_samples_per_second": 49.134, "eval_steps_per_second": 0.78, "learning_rate": 0.0001, "step": 7964 }, { "epoch": 44.19889502762431, "grad_norm": 0.17522749304771423, "learning_rate": 0.0001, "loss": 0.2983, "step": 8000 }, { "epoch": 45.0, "eval_accuracy": 0.28381893860561913, "eval_f1_macro": 0.6235508782461164, "eval_f1_micro": 0.8462928555066304, "eval_loss": 0.28698909282684326, "eval_runtime": 78.0653, "eval_samples_per_second": 49.241, "eval_steps_per_second": 0.781, "learning_rate": 0.0001, "step": 8145 }, { "epoch": 46.0, "eval_accuracy": 0.28251821019771073, "eval_f1_macro": 0.6151318511304835, "eval_f1_micro": 0.8459846547314578, "eval_loss": 0.2868472635746002, "eval_runtime": 77.6178, "eval_samples_per_second": 49.525, "eval_steps_per_second": 0.786, "learning_rate": 0.0001, "step": 8326 }, { "epoch": 46.96132596685083, "grad_norm": 0.20419611036777496, "learning_rate": 0.0001, "loss": 0.298, "step": 8500 }, { "epoch": 47.0, "eval_accuracy": 0.2845993756503642, "eval_f1_macro": 0.6211457155619424, "eval_f1_micro": 0.8462129359348595, "eval_loss": 0.28715068101882935, "eval_runtime": 77.3289, "eval_samples_per_second": 49.71, "eval_steps_per_second": 0.789, "learning_rate": 0.0001, "step": 8507 }, { "epoch": 48.0, "eval_accuracy": 0.28355879292403746, "eval_f1_macro": 0.6231150403485404, "eval_f1_micro": 0.8466852933705867, "eval_loss": 0.28661593794822693, "eval_runtime": 76.7641, "eval_samples_per_second": 50.075, "eval_steps_per_second": 0.795, "learning_rate": 0.0001, "step": 8688 }, { "epoch": 49.0, "eval_accuracy": 0.28590010405827265, "eval_f1_macro": 0.616055362439494, "eval_f1_micro": 0.8460415439387342, "eval_loss": 0.28633347153663635, "eval_runtime": 76.5745, "eval_samples_per_second": 50.199, "eval_steps_per_second": 0.797, "learning_rate": 0.0001, "step": 8869 }, { "epoch": 49.72375690607735, "grad_norm": 0.2103131115436554, "learning_rate": 0.0001, "loss": 0.2965, "step": 9000 }, { "epoch": 50.0, "eval_accuracy": 0.2845993756503642, "eval_f1_macro": 0.625458075101288, "eval_f1_micro": 0.8482882700250868, "eval_loss": 0.28642749786376953, "eval_runtime": 76.3371, "eval_samples_per_second": 50.356, "eval_steps_per_second": 0.799, "learning_rate": 0.0001, "step": 9050 }, { "epoch": 51.0, "eval_accuracy": 0.28485952133194586, "eval_f1_macro": 0.6278100779578839, "eval_f1_micro": 0.848592785832539, "eval_loss": 0.2890762686729431, "eval_runtime": 77.0258, "eval_samples_per_second": 49.905, "eval_steps_per_second": 0.792, "learning_rate": 0.0001, "step": 9231 }, { "epoch": 52.0, "eval_accuracy": 0.2851196670135276, "eval_f1_macro": 0.6255462096645672, "eval_f1_micro": 0.8464228285561143, "eval_loss": 0.2855978012084961, "eval_runtime": 76.6781, "eval_samples_per_second": 50.132, "eval_steps_per_second": 0.796, "learning_rate": 0.0001, "step": 9412 }, { "epoch": 52.48618784530387, "grad_norm": 0.24192312359809875, "learning_rate": 0.0001, "loss": 0.2956, "step": 9500 }, { "epoch": 53.0, "eval_accuracy": 0.27887617065556713, "eval_f1_macro": 0.6457587856102145, "eval_f1_micro": 0.8489991514001897, "eval_loss": 0.2872205674648285, "eval_runtime": 76.6479, "eval_samples_per_second": 50.151, "eval_steps_per_second": 0.796, "learning_rate": 0.0001, "step": 9593 }, { "epoch": 54.0, "eval_accuracy": 0.2903225806451613, "eval_f1_macro": 0.6243869856844756, "eval_f1_micro": 0.8476844874709444, "eval_loss": 0.2855803072452545, "eval_runtime": 77.582, "eval_samples_per_second": 49.548, "eval_steps_per_second": 0.786, "learning_rate": 0.0001, "step": 9774 }, { "epoch": 55.0, "eval_accuracy": 0.2845993756503642, "eval_f1_macro": 0.6339630509281279, "eval_f1_micro": 0.8475136716266056, "eval_loss": 0.28568968176841736, "eval_runtime": 77.102, "eval_samples_per_second": 49.856, "eval_steps_per_second": 0.791, "learning_rate": 0.0001, "step": 9955 }, { "epoch": 55.248618784530386, "grad_norm": 0.21083500981330872, "learning_rate": 0.0001, "loss": 0.2958, "step": 10000 }, { "epoch": 56.0, "eval_accuracy": 0.2866805411030177, "eval_f1_macro": 0.6241465491773776, "eval_f1_micro": 0.8465597622829039, "eval_loss": 0.28617897629737854, "eval_runtime": 76.1445, "eval_samples_per_second": 50.483, "eval_steps_per_second": 0.801, "learning_rate": 0.0001, "step": 10136 }, { "epoch": 57.0, "eval_accuracy": 0.2861602497398543, "eval_f1_macro": 0.6249269702519318, "eval_f1_micro": 0.845436853426201, "eval_loss": 0.2870914936065674, "eval_runtime": 77.4556, "eval_samples_per_second": 49.628, "eval_steps_per_second": 0.788, "learning_rate": 0.0001, "step": 10317 }, { "epoch": 58.0, "eval_accuracy": 0.28121748178980227, "eval_f1_macro": 0.6333866717026029, "eval_f1_micro": 0.8491941382702348, "eval_loss": 0.2857914865016937, "eval_runtime": 77.2551, "eval_samples_per_second": 49.757, "eval_steps_per_second": 0.79, "learning_rate": 0.0001, "step": 10498 }, { "epoch": 58.011049723756905, "grad_norm": 0.22250542044639587, "learning_rate": 1e-05, "loss": 0.2954, "step": 10500 }, { "epoch": 59.0, "eval_accuracy": 0.2887617065556712, "eval_f1_macro": 0.6178461796051926, "eval_f1_micro": 0.8468232576049287, "eval_loss": 0.28617140650749207, "eval_runtime": 76.6548, "eval_samples_per_second": 50.147, "eval_steps_per_second": 0.796, "learning_rate": 1e-05, "step": 10679 }, { "epoch": 60.0, "eval_accuracy": 0.28537981269510926, "eval_f1_macro": 0.6275748058546806, "eval_f1_micro": 0.8485033598045205, "eval_loss": 0.2846605181694031, "eval_runtime": 76.2984, "eval_samples_per_second": 50.381, "eval_steps_per_second": 0.799, "learning_rate": 1e-05, "step": 10860 }, { "epoch": 60.773480662983424, "grad_norm": 0.25101110339164734, "learning_rate": 1e-05, "loss": 0.2923, "step": 11000 }, { "epoch": 61.0, "eval_accuracy": 0.28303850156087407, "eval_f1_macro": 0.6223888517425455, "eval_f1_micro": 0.8479865171982329, "eval_loss": 0.2848633825778961, "eval_runtime": 76.6322, "eval_samples_per_second": 50.162, "eval_steps_per_second": 0.796, "learning_rate": 1e-05, "step": 11041 }, { "epoch": 62.0, "eval_accuracy": 0.2843392299687825, "eval_f1_macro": 0.6247632003821695, "eval_f1_micro": 0.8469200122586577, "eval_loss": 0.28548601269721985, "eval_runtime": 77.5636, "eval_samples_per_second": 49.559, "eval_steps_per_second": 0.786, "learning_rate": 1e-05, "step": 11222 }, { "epoch": 63.0, "eval_accuracy": 0.2827783558792924, "eval_f1_macro": 0.6274806463168713, "eval_f1_micro": 0.8488979777323336, "eval_loss": 0.28493326902389526, "eval_runtime": 77.2321, "eval_samples_per_second": 49.772, "eval_steps_per_second": 0.79, "learning_rate": 1e-05, "step": 11403 }, { "epoch": 63.53591160220994, "grad_norm": 0.23796355724334717, "learning_rate": 1e-05, "loss": 0.2918, "step": 11500 }, { "epoch": 64.0, "eval_accuracy": 0.28225806451612906, "eval_f1_macro": 0.6370787064578803, "eval_f1_micro": 0.8475187206498287, "eval_loss": 0.28459736704826355, "eval_runtime": 77.0797, "eval_samples_per_second": 49.87, "eval_steps_per_second": 0.791, "learning_rate": 1e-05, "step": 11584 }, { "epoch": 65.0, "eval_accuracy": 0.2869406867845994, "eval_f1_macro": 0.6240984315849201, "eval_f1_micro": 0.8467700785794469, "eval_loss": 0.2860054671764374, "eval_runtime": 76.4904, "eval_samples_per_second": 50.255, "eval_steps_per_second": 0.797, "learning_rate": 1e-05, "step": 11765 }, { "epoch": 66.0, "eval_accuracy": 0.28407908428720086, "eval_f1_macro": 0.6346693986906206, "eval_f1_micro": 0.8481340441736481, "eval_loss": 0.2847185730934143, "eval_runtime": 77.2653, "eval_samples_per_second": 49.751, "eval_steps_per_second": 0.789, "learning_rate": 1e-05, "step": 11946 }, { "epoch": 66.29834254143647, "grad_norm": 0.25470152497291565, "learning_rate": 1e-05, "loss": 0.2906, "step": 12000 }, { "epoch": 67.0, "eval_accuracy": 0.28537981269510926, "eval_f1_macro": 0.6287121285420982, "eval_f1_micro": 0.8487528745798691, "eval_loss": 0.28529325127601624, "eval_runtime": 79.9065, "eval_samples_per_second": 48.106, "eval_steps_per_second": 0.763, "learning_rate": 1e-05, "step": 12127 }, { "epoch": 68.0, "eval_accuracy": 0.2866805411030177, "eval_f1_macro": 0.6321379394582358, "eval_f1_micro": 0.8480251642525557, "eval_loss": 0.2852926254272461, "eval_runtime": 78.4728, "eval_samples_per_second": 48.985, "eval_steps_per_second": 0.777, "learning_rate": 1e-05, "step": 12308 }, { "epoch": 69.0, "eval_accuracy": 0.28355879292403746, "eval_f1_macro": 0.6397237492354447, "eval_f1_micro": 0.847692190707931, "eval_loss": 0.284834623336792, "eval_runtime": 77.7721, "eval_samples_per_second": 49.426, "eval_steps_per_second": 0.784, "learning_rate": 1e-05, "step": 12489 }, { "epoch": 69.06077348066299, "grad_norm": 0.19653503596782684, "learning_rate": 1e-05, "loss": 0.2918, "step": 12500 }, { "epoch": 70.0, "eval_accuracy": 0.28225806451612906, "eval_f1_macro": 0.6381143671040704, "eval_f1_micro": 0.8492167101827677, "eval_loss": 0.28527727723121643, "eval_runtime": 76.6607, "eval_samples_per_second": 50.143, "eval_steps_per_second": 0.796, "learning_rate": 1e-05, "step": 12670 }, { "epoch": 71.0, "eval_accuracy": 0.2882414151925078, "eval_f1_macro": 0.6325489300082728, "eval_f1_micro": 0.8475971370143149, "eval_loss": 0.28507113456726074, "eval_runtime": 76.9731, "eval_samples_per_second": 49.94, "eval_steps_per_second": 0.792, "learning_rate": 1.0000000000000002e-06, "step": 12851 }, { "epoch": 71.8232044198895, "grad_norm": 0.19946995377540588, "learning_rate": 1.0000000000000002e-06, "loss": 0.2918, "step": 13000 }, { "epoch": 72.0, "eval_accuracy": 0.28485952133194586, "eval_f1_macro": 0.6236352127811986, "eval_f1_micro": 0.8474255781269963, "eval_loss": 0.28452861309051514, "eval_runtime": 79.7463, "eval_samples_per_second": 48.203, "eval_steps_per_second": 0.765, "learning_rate": 1.0000000000000002e-06, "step": 13032 }, { "epoch": 73.0, "eval_accuracy": 0.28121748178980227, "eval_f1_macro": 0.6333277250193455, "eval_f1_micro": 0.847641772858811, "eval_loss": 0.28448227047920227, "eval_runtime": 76.9059, "eval_samples_per_second": 49.983, "eval_steps_per_second": 0.793, "learning_rate": 1.0000000000000002e-06, "step": 13213 }, { "epoch": 74.0, "eval_accuracy": 0.2827783558792924, "eval_f1_macro": 0.6300187593616763, "eval_f1_micro": 0.8465770953294945, "eval_loss": 0.28447526693344116, "eval_runtime": 77.6657, "eval_samples_per_second": 49.494, "eval_steps_per_second": 0.785, "learning_rate": 1.0000000000000002e-06, "step": 13394 }, { "epoch": 74.58563535911603, "grad_norm": 0.251558780670166, "learning_rate": 1.0000000000000002e-06, "loss": 0.2913, "step": 13500 }, { "epoch": 75.0, "eval_accuracy": 0.28199791883454733, "eval_f1_macro": 0.6235297745568456, "eval_f1_micro": 0.8473772748126625, "eval_loss": 0.2851284146308899, "eval_runtime": 77.914, "eval_samples_per_second": 49.336, "eval_steps_per_second": 0.783, "learning_rate": 1.0000000000000002e-06, "step": 13575 }, { "epoch": 76.0, "eval_accuracy": 0.2879812695109261, "eval_f1_macro": 0.6186062513830065, "eval_f1_micro": 0.847320835674516, "eval_loss": 0.2859683036804199, "eval_runtime": 77.7414, "eval_samples_per_second": 49.446, "eval_steps_per_second": 0.785, "learning_rate": 1.0000000000000002e-06, "step": 13756 }, { "epoch": 77.0, "eval_accuracy": 0.28563995837669093, "eval_f1_macro": 0.6172786558676017, "eval_f1_micro": 0.8459046737621472, "eval_loss": 0.2858298718929291, "eval_runtime": 79.1015, "eval_samples_per_second": 48.596, "eval_steps_per_second": 0.771, "learning_rate": 1.0000000000000002e-06, "step": 13937 }, { "epoch": 77.34806629834254, "grad_norm": 0.22088366746902466, "learning_rate": 1.0000000000000002e-06, "loss": 0.2913, "step": 14000 }, { "epoch": 78.0, "eval_accuracy": 0.2843392299687825, "eval_f1_macro": 0.6325947858436887, "eval_f1_micro": 0.8480547459130655, "eval_loss": 0.28438833355903625, "eval_runtime": 77.2562, "eval_samples_per_second": 49.757, "eval_steps_per_second": 0.79, "learning_rate": 1.0000000000000002e-06, "step": 14118 }, { "epoch": 79.0, "eval_accuracy": 0.2874609781477627, "eval_f1_macro": 0.617917490234713, "eval_f1_micro": 0.8472353346431579, "eval_loss": 0.2870919704437256, "eval_runtime": 76.5647, "eval_samples_per_second": 50.206, "eval_steps_per_second": 0.797, "learning_rate": 1.0000000000000002e-06, "step": 14299 }, { "epoch": 80.0, "eval_accuracy": 0.28381893860561913, "eval_f1_macro": 0.6286567457369128, "eval_f1_micro": 0.8477330616403465, "eval_loss": 0.28482332825660706, "eval_runtime": 76.83, "eval_samples_per_second": 50.033, "eval_steps_per_second": 0.794, "learning_rate": 1.0000000000000002e-06, "step": 14480 }, { "epoch": 80.11049723756906, "grad_norm": 0.21530944108963013, "learning_rate": 1.0000000000000002e-06, "loss": 0.2915, "step": 14500 }, { "epoch": 81.0, "eval_accuracy": 0.28537981269510926, "eval_f1_macro": 0.6304525529970205, "eval_f1_micro": 0.8489678202792957, "eval_loss": 0.2847617268562317, "eval_runtime": 77.26, "eval_samples_per_second": 49.754, "eval_steps_per_second": 0.79, "learning_rate": 1.0000000000000002e-06, "step": 14661 }, { "epoch": 82.0, "eval_accuracy": 0.28590010405827265, "eval_f1_macro": 0.6394217270135759, "eval_f1_micro": 0.8480416961845967, "eval_loss": 0.28511229157447815, "eval_runtime": 78.9926, "eval_samples_per_second": 48.663, "eval_steps_per_second": 0.772, "learning_rate": 1.0000000000000002e-06, "step": 14842 }, { "epoch": 82.87292817679558, "grad_norm": 0.2371624857187271, "learning_rate": 1.0000000000000002e-06, "loss": 0.2913, "step": 15000 }, { "epoch": 83.0, "eval_accuracy": 0.28563995837669093, "eval_f1_macro": 0.6255055774993536, "eval_f1_micro": 0.8488055562622434, "eval_loss": 0.284644216299057, "eval_runtime": 76.4754, "eval_samples_per_second": 50.265, "eval_steps_per_second": 0.798, "learning_rate": 1.0000000000000002e-06, "step": 15023 }, { "epoch": 84.0, "eval_accuracy": 0.2832986472424558, "eval_f1_macro": 0.6457553263622914, "eval_f1_micro": 0.848188643119867, "eval_loss": 0.2857225835323334, "eval_runtime": 77.2675, "eval_samples_per_second": 49.749, "eval_steps_per_second": 0.789, "learning_rate": 1.0000000000000002e-06, "step": 15204 }, { "epoch": 85.0, "eval_accuracy": 0.28121748178980227, "eval_f1_macro": 0.6339586571635658, "eval_f1_micro": 0.848818698673405, "eval_loss": 0.28550758957862854, "eval_runtime": 77.3218, "eval_samples_per_second": 49.714, "eval_steps_per_second": 0.789, "learning_rate": 1.0000000000000002e-07, "step": 15385 }, { "epoch": 85.6353591160221, "grad_norm": 0.22222235798835754, "learning_rate": 1.0000000000000002e-07, "loss": 0.2922, "step": 15500 }, { "epoch": 86.0, "eval_accuracy": 0.28590010405827265, "eval_f1_macro": 0.6362631688004041, "eval_f1_micro": 0.8479890588592848, "eval_loss": 0.284895658493042, "eval_runtime": 76.6317, "eval_samples_per_second": 50.162, "eval_steps_per_second": 0.796, "learning_rate": 1.0000000000000002e-07, "step": 15566 }, { "epoch": 87.0, "eval_accuracy": 0.2851196670135276, "eval_f1_macro": 0.6327749126527296, "eval_f1_micro": 0.8473590201582036, "eval_loss": 0.2845035493373871, "eval_runtime": 77.1171, "eval_samples_per_second": 49.846, "eval_steps_per_second": 0.791, "learning_rate": 1.0000000000000002e-07, "step": 15747 }, { "epoch": 88.0, "eval_accuracy": 0.28121748178980227, "eval_f1_macro": 0.6370893160624239, "eval_f1_micro": 0.8477551536613127, "eval_loss": 0.28541097044944763, "eval_runtime": 76.6873, "eval_samples_per_second": 50.126, "eval_steps_per_second": 0.795, "learning_rate": 1.0000000000000002e-07, "step": 15928 }, { "epoch": 88.0, "learning_rate": 1.0000000000000002e-07, "step": 15928, "total_flos": 1.500719176717825e+20, "train_loss": 0.3047179739897961, "train_runtime": 30841.8451, "train_samples_per_second": 56.038, "train_steps_per_second": 0.88 } ], "logging_steps": 500, "max_steps": 27150, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.500719176717825e+20, "train_batch_size": 64, "trial_name": null, "trial_params": null }