{ "best_metric": 0.12240613251924515, "best_model_checkpoint": "/home1/datahome/villien/project_hub/DinoVdeau/models/DinoVdo-large-2025_01_27_45863-bs32_freeze/checkpoint-22113", "epoch": 91.0, "eval_steps": 500, "global_step": 24843, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.2293193717277487, "eval_f1_macro": 0.5138028081483725, "eval_f1_micro": 0.7433001107419712, "eval_loss": 0.16775080561637878, "eval_runtime": 464.7451, "eval_samples_per_second": 6.165, "eval_steps_per_second": 0.194, "learning_rate": 0.001, "step": 273 }, { "epoch": 1.8315018315018317, "grad_norm": 0.32014167308807373, "learning_rate": 0.001, "loss": 0.2722, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.243630017452007, "eval_f1_macro": 0.5770554152365124, "eval_f1_micro": 0.7613556338028169, "eval_loss": 0.15366077423095703, "eval_runtime": 460.9126, "eval_samples_per_second": 6.216, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 546 }, { "epoch": 3.0, "eval_accuracy": 0.24153577661431064, "eval_f1_macro": 0.6194117241441944, "eval_f1_micro": 0.7763416930582898, "eval_loss": 0.14831368625164032, "eval_runtime": 459.029, "eval_samples_per_second": 6.241, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 819 }, { "epoch": 3.663003663003663, "grad_norm": 0.2205018699169159, "learning_rate": 0.001, "loss": 0.169, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.2554973821989529, "eval_f1_macro": 0.6275864194841865, "eval_f1_micro": 0.7808419243986254, "eval_loss": 0.14640773832798004, "eval_runtime": 453.7375, "eval_samples_per_second": 6.314, "eval_steps_per_second": 0.198, "learning_rate": 0.001, "step": 1092 }, { "epoch": 5.0, "eval_accuracy": 0.2520069808027923, "eval_f1_macro": 0.6420978846376312, "eval_f1_micro": 0.7787831782144088, "eval_loss": 0.14515382051467896, "eval_runtime": 459.4943, "eval_samples_per_second": 6.235, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 1365 }, { "epoch": 5.4945054945054945, "grad_norm": 0.19979850947856903, "learning_rate": 0.001, "loss": 0.1612, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.2579406631762653, "eval_f1_macro": 0.614677201484495, "eval_f1_micro": 0.7802330599913682, "eval_loss": 0.14404040575027466, "eval_runtime": 455.8188, "eval_samples_per_second": 6.285, "eval_steps_per_second": 0.197, "learning_rate": 0.001, "step": 1638 }, { "epoch": 7.0, "eval_accuracy": 0.25026178010471206, "eval_f1_macro": 0.6141169392172392, "eval_f1_micro": 0.7787126995142263, "eval_loss": 0.14518576860427856, "eval_runtime": 463.2529, "eval_samples_per_second": 6.185, "eval_steps_per_second": 0.194, "learning_rate": 0.001, "step": 1911 }, { "epoch": 7.326007326007326, "grad_norm": 0.21145381033420563, "learning_rate": 0.001, "loss": 0.1594, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.2534031413612565, "eval_f1_macro": 0.6193231969902891, "eval_f1_micro": 0.77764832793959, "eval_loss": 0.14463570713996887, "eval_runtime": 461.4649, "eval_samples_per_second": 6.208, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 2184 }, { "epoch": 9.0, "eval_accuracy": 0.24537521815008725, "eval_f1_macro": 0.6363157565562336, "eval_f1_micro": 0.7813486742506164, "eval_loss": 0.14786836504936218, "eval_runtime": 457.2698, "eval_samples_per_second": 6.265, "eval_steps_per_second": 0.197, "learning_rate": 0.001, "step": 2457 }, { "epoch": 9.157509157509157, "grad_norm": 0.1889927238225937, "learning_rate": 0.001, "loss": 0.1567, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.2607329842931937, "eval_f1_macro": 0.6366285266954459, "eval_f1_micro": 0.7865724075118186, "eval_loss": 0.14251072704792023, "eval_runtime": 460.3923, "eval_samples_per_second": 6.223, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 2730 }, { "epoch": 10.989010989010989, "grad_norm": 0.1742856353521347, "learning_rate": 0.001, "loss": 0.1559, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.2617801047120419, "eval_f1_macro": 0.6565839087323015, "eval_f1_micro": 0.7907930529399456, "eval_loss": 0.14535894989967346, "eval_runtime": 453.3601, "eval_samples_per_second": 6.319, "eval_steps_per_second": 0.199, "learning_rate": 0.001, "step": 3003 }, { "epoch": 12.0, "eval_accuracy": 0.2600349040139616, "eval_f1_macro": 0.6438532757657272, "eval_f1_micro": 0.7895364689843217, "eval_loss": 0.14042578637599945, "eval_runtime": 461.5251, "eval_samples_per_second": 6.208, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 3276 }, { "epoch": 12.820512820512821, "grad_norm": 0.16230766475200653, "learning_rate": 0.001, "loss": 0.1549, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.2530541012216405, "eval_f1_macro": 0.6490185335679685, "eval_f1_micro": 0.7883035906256719, "eval_loss": 0.1413952261209488, "eval_runtime": 459.1407, "eval_samples_per_second": 6.24, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 3549 }, { "epoch": 14.0, "eval_accuracy": 0.2649214659685864, "eval_f1_macro": 0.6347742331888712, "eval_f1_micro": 0.7893909125685383, "eval_loss": 0.14056158065795898, "eval_runtime": 459.115, "eval_samples_per_second": 6.24, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 3822 }, { "epoch": 14.652014652014651, "grad_norm": 0.5134446024894714, "learning_rate": 0.001, "loss": 0.155, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.2631762652705061, "eval_f1_macro": 0.6469635503488436, "eval_f1_micro": 0.7906051299796704, "eval_loss": 0.13873133063316345, "eval_runtime": 461.6498, "eval_samples_per_second": 6.206, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 4095 }, { "epoch": 16.0, "eval_accuracy": 0.2603839441535777, "eval_f1_macro": 0.6331356980695925, "eval_f1_micro": 0.7857704059362723, "eval_loss": 0.14008578658103943, "eval_runtime": 459.7885, "eval_samples_per_second": 6.231, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 4368 }, { "epoch": 16.483516483516482, "grad_norm": 0.159025177359581, "learning_rate": 0.001, "loss": 0.1531, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.2520069808027923, "eval_f1_macro": 0.665107882553296, "eval_f1_micro": 0.7954926273458445, "eval_loss": 0.13811993598937988, "eval_runtime": 460.3054, "eval_samples_per_second": 6.224, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 4641 }, { "epoch": 18.0, "eval_accuracy": 0.27015706806282724, "eval_f1_macro": 0.6498362138588303, "eval_f1_micro": 0.791355160264673, "eval_loss": 0.13870170712471008, "eval_runtime": 463.8864, "eval_samples_per_second": 6.176, "eval_steps_per_second": 0.194, "learning_rate": 0.001, "step": 4914 }, { "epoch": 18.315018315018314, "grad_norm": 0.14721287786960602, "learning_rate": 0.001, "loss": 0.1549, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.2631762652705061, "eval_f1_macro": 0.6356042954083428, "eval_f1_micro": 0.794037710208935, "eval_loss": 0.1373777985572815, "eval_runtime": 458.2597, "eval_samples_per_second": 6.252, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 5187 }, { "epoch": 20.0, "eval_accuracy": 0.25514834205933684, "eval_f1_macro": 0.6393109338369551, "eval_f1_micro": 0.7850155722244155, "eval_loss": 0.13864819705486298, "eval_runtime": 469.7679, "eval_samples_per_second": 6.099, "eval_steps_per_second": 0.192, "learning_rate": 0.001, "step": 5460 }, { "epoch": 20.146520146520146, "grad_norm": 0.29727715253829956, "learning_rate": 0.001, "loss": 0.1524, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.26457242582897034, "eval_f1_macro": 0.6496408648134395, "eval_f1_micro": 0.7943311144513635, "eval_loss": 0.13566707074642181, "eval_runtime": 461.0026, "eval_samples_per_second": 6.215, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 5733 }, { "epoch": 21.978021978021978, "grad_norm": 0.16196180880069733, "learning_rate": 0.001, "loss": 0.1519, "step": 6000 }, { "epoch": 22.0, "eval_accuracy": 0.27225130890052357, "eval_f1_macro": 0.6400060646722534, "eval_f1_micro": 0.7972140143520473, "eval_loss": 0.1371580958366394, "eval_runtime": 454.7092, "eval_samples_per_second": 6.301, "eval_steps_per_second": 0.198, "learning_rate": 0.001, "step": 6006 }, { "epoch": 23.0, "eval_accuracy": 0.2603839441535777, "eval_f1_macro": 0.6595092645083339, "eval_f1_micro": 0.7938437661665805, "eval_loss": 0.13614478707313538, "eval_runtime": 452.3843, "eval_samples_per_second": 6.333, "eval_steps_per_second": 0.199, "learning_rate": 0.001, "step": 6279 }, { "epoch": 23.80952380952381, "grad_norm": 0.19081830978393555, "learning_rate": 0.001, "loss": 0.1528, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.2649214659685864, "eval_f1_macro": 0.6417599876035757, "eval_f1_micro": 0.7953625165167725, "eval_loss": 0.1362675279378891, "eval_runtime": 458.9922, "eval_samples_per_second": 6.242, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 6552 }, { "epoch": 25.0, "eval_accuracy": 0.2732984293193717, "eval_f1_macro": 0.6499701126111167, "eval_f1_micro": 0.7963302752293578, "eval_loss": 0.1358671337366104, "eval_runtime": 456.4797, "eval_samples_per_second": 6.276, "eval_steps_per_second": 0.197, "learning_rate": 0.001, "step": 6825 }, { "epoch": 25.641025641025642, "grad_norm": 0.1495918333530426, "learning_rate": 0.001, "loss": 0.1524, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.26910994764397905, "eval_f1_macro": 0.6554940295679257, "eval_f1_micro": 0.7983850403739906, "eval_loss": 0.13484793901443481, "eval_runtime": 468.4399, "eval_samples_per_second": 6.116, "eval_steps_per_second": 0.192, "learning_rate": 0.001, "step": 7098 }, { "epoch": 27.0, "eval_accuracy": 0.268760907504363, "eval_f1_macro": 0.6535438446852289, "eval_f1_micro": 0.7944040481353914, "eval_loss": 0.13669784367084503, "eval_runtime": 466.0343, "eval_samples_per_second": 6.148, "eval_steps_per_second": 0.193, "learning_rate": 0.001, "step": 7371 }, { "epoch": 27.47252747252747, "grad_norm": 0.16602398455142975, "learning_rate": 0.001, "loss": 0.1521, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.26771378708551485, "eval_f1_macro": 0.6398313564596081, "eval_f1_micro": 0.7922701137002292, "eval_loss": 0.13569706678390503, "eval_runtime": 460.9998, "eval_samples_per_second": 6.215, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 7644 }, { "epoch": 29.0, "eval_accuracy": 0.2638743455497382, "eval_f1_macro": 0.6636571549498853, "eval_f1_micro": 0.7924252834077636, "eval_loss": 0.14052562415599823, "eval_runtime": 467.3084, "eval_samples_per_second": 6.131, "eval_steps_per_second": 0.193, "learning_rate": 0.001, "step": 7917 }, { "epoch": 29.304029304029303, "grad_norm": 0.1555439680814743, "learning_rate": 0.001, "loss": 0.1525, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.27225130890052357, "eval_f1_macro": 0.6405329741259155, "eval_f1_micro": 0.7875164835164835, "eval_loss": 0.13725879788398743, "eval_runtime": 459.9825, "eval_samples_per_second": 6.228, "eval_steps_per_second": 0.196, "learning_rate": 0.001, "step": 8190 }, { "epoch": 31.0, "eval_accuracy": 0.2719022687609075, "eval_f1_macro": 0.6562409583519914, "eval_f1_micro": 0.7986043969902056, "eval_loss": 0.13544805347919464, "eval_runtime": 461.8222, "eval_samples_per_second": 6.204, "eval_steps_per_second": 0.195, "learning_rate": 0.001, "step": 8463 }, { "epoch": 31.135531135531135, "grad_norm": 0.20161172747612, "learning_rate": 0.001, "loss": 0.1522, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.2649214659685864, "eval_f1_macro": 0.6463119642593244, "eval_f1_micro": 0.793030759303076, "eval_loss": 0.13693773746490479, "eval_runtime": 462.8659, "eval_samples_per_second": 6.19, "eval_steps_per_second": 0.194, "learning_rate": 0.001, "step": 8736 }, { "epoch": 32.967032967032964, "grad_norm": 0.5906699299812317, "learning_rate": 0.0001, "loss": 0.1486, "step": 9000 }, { "epoch": 33.0, "eval_accuracy": 0.27469458987783596, "eval_f1_macro": 0.6660418237263234, "eval_f1_micro": 0.8014840718154292, "eval_loss": 0.13195939362049103, "eval_runtime": 465.1048, "eval_samples_per_second": 6.16, "eval_steps_per_second": 0.194, "learning_rate": 0.0001, "step": 9009 }, { "epoch": 34.0, "eval_accuracy": 0.28342059336823733, "eval_f1_macro": 0.6755989146580753, "eval_f1_micro": 0.8042847388187094, "eval_loss": 0.1299898624420166, "eval_runtime": 462.0703, "eval_samples_per_second": 6.2, "eval_steps_per_second": 0.195, "learning_rate": 0.0001, "step": 9282 }, { "epoch": 34.798534798534796, "grad_norm": 0.1343473345041275, "learning_rate": 0.0001, "loss": 0.1419, "step": 9500 }, { "epoch": 35.0, "eval_accuracy": 0.28097731239092494, "eval_f1_macro": 0.6772201041047545, "eval_f1_micro": 0.8066335550130482, "eval_loss": 0.12946264445781708, "eval_runtime": 463.7568, "eval_samples_per_second": 6.178, "eval_steps_per_second": 0.194, "learning_rate": 0.0001, "step": 9555 }, { "epoch": 36.0, "eval_accuracy": 0.28167539267015707, "eval_f1_macro": 0.6794992142877868, "eval_f1_micro": 0.8046182685753238, "eval_loss": 0.13086125254631042, "eval_runtime": 457.8267, "eval_samples_per_second": 6.258, "eval_steps_per_second": 0.197, "learning_rate": 0.0001, "step": 9828 }, { "epoch": 36.63003663003663, "grad_norm": 0.16144825518131256, "learning_rate": 0.0001, "loss": 0.1396, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.2823734729493892, "eval_f1_macro": 0.6791512030316924, "eval_f1_micro": 0.805367401183229, "eval_loss": 0.1278763711452484, "eval_runtime": 471.6542, "eval_samples_per_second": 6.074, "eval_steps_per_second": 0.191, "learning_rate": 0.0001, "step": 10101 }, { "epoch": 38.0, "eval_accuracy": 0.2848167539267016, "eval_f1_macro": 0.6814340581384846, "eval_f1_micro": 0.8077836187892149, "eval_loss": 0.12904110550880432, "eval_runtime": 470.311, "eval_samples_per_second": 6.092, "eval_steps_per_second": 0.191, "learning_rate": 0.0001, "step": 10374 }, { "epoch": 38.46153846153846, "grad_norm": 0.16166280210018158, "learning_rate": 0.0001, "loss": 0.1366, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.293891797556719, "eval_f1_macro": 0.6832531367067615, "eval_f1_micro": 0.8115661433562018, "eval_loss": 0.12716704607009888, "eval_runtime": 462.4995, "eval_samples_per_second": 6.195, "eval_steps_per_second": 0.195, "learning_rate": 0.0001, "step": 10647 }, { "epoch": 40.0, "eval_accuracy": 0.2907504363001745, "eval_f1_macro": 0.6878954476405744, "eval_f1_micro": 0.8115966316142277, "eval_loss": 0.1293308585882187, "eval_runtime": 477.5548, "eval_samples_per_second": 5.999, "eval_steps_per_second": 0.188, "learning_rate": 0.0001, "step": 10920 }, { "epoch": 40.29304029304029, "grad_norm": 0.23697003722190857, "learning_rate": 0.0001, "loss": 0.1361, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.2917975567190227, "eval_f1_macro": 0.6863712244346338, "eval_f1_micro": 0.8088835942818243, "eval_loss": 0.12695887684822083, "eval_runtime": 460.0604, "eval_samples_per_second": 6.227, "eval_steps_per_second": 0.196, "learning_rate": 0.0001, "step": 11193 }, { "epoch": 42.0, "eval_accuracy": 0.2914485165794066, "eval_f1_macro": 0.6837014102587237, "eval_f1_micro": 0.8109225964379168, "eval_loss": 0.12624548375606537, "eval_runtime": 468.1922, "eval_samples_per_second": 6.119, "eval_steps_per_second": 0.192, "learning_rate": 0.0001, "step": 11466 }, { "epoch": 42.124542124542124, "grad_norm": 0.23925267159938812, "learning_rate": 0.0001, "loss": 0.135, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.2949389179755672, "eval_f1_macro": 0.6983715804658985, "eval_f1_micro": 0.8122734552811262, "eval_loss": 0.1261172592639923, "eval_runtime": 464.3762, "eval_samples_per_second": 6.17, "eval_steps_per_second": 0.194, "learning_rate": 0.0001, "step": 11739 }, { "epoch": 43.956043956043956, "grad_norm": 0.7063220143318176, "learning_rate": 0.0001, "loss": 0.134, "step": 12000 }, { "epoch": 44.0, "eval_accuracy": 0.29354275741710295, "eval_f1_macro": 0.6833912042783733, "eval_f1_micro": 0.8106054209237524, "eval_loss": 0.12830273807048798, "eval_runtime": 456.3209, "eval_samples_per_second": 6.278, "eval_steps_per_second": 0.197, "learning_rate": 0.0001, "step": 12012 }, { "epoch": 45.0, "eval_accuracy": 0.2931937172774869, "eval_f1_macro": 0.7009703620992983, "eval_f1_micro": 0.8113048605846179, "eval_loss": 0.12624593079090118, "eval_runtime": 455.5361, "eval_samples_per_second": 6.289, "eval_steps_per_second": 0.198, "learning_rate": 0.0001, "step": 12285 }, { "epoch": 45.78754578754579, "grad_norm": 0.22236338257789612, "learning_rate": 0.0001, "loss": 0.1331, "step": 12500 }, { "epoch": 46.0, "eval_accuracy": 0.29598603839441534, "eval_f1_macro": 0.6963625779503319, "eval_f1_micro": 0.8147029204431017, "eval_loss": 0.12462077289819717, "eval_runtime": 465.158, "eval_samples_per_second": 6.159, "eval_steps_per_second": 0.193, "learning_rate": 0.0001, "step": 12558 }, { "epoch": 47.0, "eval_accuracy": 0.2987783595113438, "eval_f1_macro": 0.6923480831102572, "eval_f1_micro": 0.8126030873334743, "eval_loss": 0.12529432773590088, "eval_runtime": 457.756, "eval_samples_per_second": 6.259, "eval_steps_per_second": 0.197, "learning_rate": 0.0001, "step": 12831 }, { "epoch": 47.61904761904762, "grad_norm": 0.20239435136318207, "learning_rate": 0.0001, "loss": 0.1325, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.29773123909249566, "eval_f1_macro": 0.6954224588663396, "eval_f1_micro": 0.8133141600406023, "eval_loss": 0.12631145119667053, "eval_runtime": 454.7373, "eval_samples_per_second": 6.3, "eval_steps_per_second": 0.198, "learning_rate": 0.0001, "step": 13104 }, { "epoch": 49.0, "eval_accuracy": 0.3036649214659686, "eval_f1_macro": 0.6951680615776381, "eval_f1_micro": 0.8157960928984657, "eval_loss": 0.1252526491880417, "eval_runtime": 455.0418, "eval_samples_per_second": 6.296, "eval_steps_per_second": 0.198, "learning_rate": 0.0001, "step": 13377 }, { "epoch": 49.45054945054945, "grad_norm": 0.2540508806705475, "learning_rate": 0.0001, "loss": 0.1312, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.30052356020942406, "eval_f1_macro": 0.7008159807881353, "eval_f1_micro": 0.8136319704164391, "eval_loss": 0.12632089853286743, "eval_runtime": 450.3202, "eval_samples_per_second": 6.362, "eval_steps_per_second": 0.2, "learning_rate": 0.0001, "step": 13650 }, { "epoch": 51.0, "eval_accuracy": 0.3008726003490401, "eval_f1_macro": 0.7019225754370848, "eval_f1_micro": 0.8157828600150792, "eval_loss": 0.1246422603726387, "eval_runtime": 455.9576, "eval_samples_per_second": 6.283, "eval_steps_per_second": 0.197, "learning_rate": 0.0001, "step": 13923 }, { "epoch": 51.282051282051285, "grad_norm": 0.2041085660457611, "learning_rate": 0.0001, "loss": 0.1301, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.29109947643979056, "eval_f1_macro": 0.6948529573216252, "eval_f1_micro": 0.8091726695366779, "eval_loss": 0.12534211575984955, "eval_runtime": 453.0007, "eval_samples_per_second": 6.324, "eval_steps_per_second": 0.199, "learning_rate": 0.0001, "step": 14196 }, { "epoch": 53.0, "eval_accuracy": 0.3022687609075044, "eval_f1_macro": 0.7018961268425457, "eval_f1_micro": 0.8154071593050032, "eval_loss": 0.12436465919017792, "eval_runtime": 452.0514, "eval_samples_per_second": 6.338, "eval_steps_per_second": 0.199, "learning_rate": 1e-05, "step": 14469 }, { "epoch": 53.11355311355312, "grad_norm": 0.19839347898960114, "learning_rate": 1e-05, "loss": 0.1306, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.3008726003490401, "eval_f1_macro": 0.7040300990277949, "eval_f1_micro": 0.8154115879111705, "eval_loss": 0.12488020956516266, "eval_runtime": 453.4122, "eval_samples_per_second": 6.319, "eval_steps_per_second": 0.198, "learning_rate": 1e-05, "step": 14742 }, { "epoch": 54.94505494505494, "grad_norm": 0.2790488600730896, "learning_rate": 1e-05, "loss": 0.1282, "step": 15000 }, { "epoch": 55.0, "eval_accuracy": 0.30052356020942406, "eval_f1_macro": 0.6998211272872638, "eval_f1_micro": 0.814350990015231, "eval_loss": 0.12366042286157608, "eval_runtime": 451.3538, "eval_samples_per_second": 6.348, "eval_steps_per_second": 0.199, "learning_rate": 1e-05, "step": 15015 }, { "epoch": 56.0, "eval_accuracy": 0.3033158813263525, "eval_f1_macro": 0.7003615661638819, "eval_f1_micro": 0.8167849686847599, "eval_loss": 0.12352865934371948, "eval_runtime": 453.6557, "eval_samples_per_second": 6.315, "eval_steps_per_second": 0.198, "learning_rate": 1e-05, "step": 15288 }, { "epoch": 56.776556776556774, "grad_norm": 0.29984405636787415, "learning_rate": 1e-05, "loss": 0.1281, "step": 15500 }, { "epoch": 57.0, "eval_accuracy": 0.3029668411867365, "eval_f1_macro": 0.7002357451773985, "eval_f1_micro": 0.815746583643278, "eval_loss": 0.1239086389541626, "eval_runtime": 461.4901, "eval_samples_per_second": 6.208, "eval_steps_per_second": 0.195, "learning_rate": 1e-05, "step": 15561 }, { "epoch": 58.0, "eval_accuracy": 0.30261780104712044, "eval_f1_macro": 0.699458636808175, "eval_f1_micro": 0.8157211095281991, "eval_loss": 0.12343526631593704, "eval_runtime": 461.5197, "eval_samples_per_second": 6.208, "eval_steps_per_second": 0.195, "learning_rate": 1e-05, "step": 15834 }, { "epoch": 58.608058608058606, "grad_norm": 0.22550125420093536, "learning_rate": 1e-05, "loss": 0.129, "step": 16000 }, { "epoch": 59.0, "eval_accuracy": 0.30471204188481676, "eval_f1_macro": 0.7012031830256923, "eval_f1_micro": 0.8149766777324873, "eval_loss": 0.12345146387815475, "eval_runtime": 458.3652, "eval_samples_per_second": 6.25, "eval_steps_per_second": 0.196, "learning_rate": 1e-05, "step": 16107 }, { "epoch": 60.0, "eval_accuracy": 0.29808027923211167, "eval_f1_macro": 0.6932117075627285, "eval_f1_micro": 0.8127865511971473, "eval_loss": 0.1239377036690712, "eval_runtime": 461.8366, "eval_samples_per_second": 6.203, "eval_steps_per_second": 0.195, "learning_rate": 1e-05, "step": 16380 }, { "epoch": 60.43956043956044, "grad_norm": 0.21306775510311127, "learning_rate": 1e-05, "loss": 0.1284, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.3008726003490401, "eval_f1_macro": 0.707636320445804, "eval_f1_micro": 0.8174437097179208, "eval_loss": 0.12398885935544968, "eval_runtime": 462.2279, "eval_samples_per_second": 6.198, "eval_steps_per_second": 0.195, "learning_rate": 1e-05, "step": 16653 }, { "epoch": 62.0, "eval_accuracy": 0.3019197207678883, "eval_f1_macro": 0.7031525654438611, "eval_f1_micro": 0.8151862464183381, "eval_loss": 0.12334412336349487, "eval_runtime": 460.3594, "eval_samples_per_second": 6.223, "eval_steps_per_second": 0.195, "learning_rate": 1e-05, "step": 16926 }, { "epoch": 62.27106227106227, "grad_norm": 0.24677294492721558, "learning_rate": 1e-05, "loss": 0.127, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.3022687609075044, "eval_f1_macro": 0.7023118424286922, "eval_f1_micro": 0.8157861555919809, "eval_loss": 0.12325507402420044, "eval_runtime": 461.2281, "eval_samples_per_second": 6.212, "eval_steps_per_second": 0.195, "learning_rate": 1e-05, "step": 17199 }, { "epoch": 64.0, "eval_accuracy": 0.30471204188481676, "eval_f1_macro": 0.6998879503661606, "eval_f1_micro": 0.8152265453932963, "eval_loss": 0.12346883863210678, "eval_runtime": 466.8309, "eval_samples_per_second": 6.137, "eval_steps_per_second": 0.193, "learning_rate": 1e-05, "step": 17472 }, { "epoch": 64.1025641025641, "grad_norm": 0.1753869205713272, "learning_rate": 1e-05, "loss": 0.1279, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.29773123909249566, "eval_f1_macro": 0.7001319611176023, "eval_f1_micro": 0.814535597814578, "eval_loss": 0.12324482202529907, "eval_runtime": 462.9556, "eval_samples_per_second": 6.188, "eval_steps_per_second": 0.194, "learning_rate": 1e-05, "step": 17745 }, { "epoch": 65.93406593406593, "grad_norm": 0.23069432377815247, "learning_rate": 1e-05, "loss": 0.1273, "step": 18000 }, { "epoch": 66.0, "eval_accuracy": 0.3012216404886562, "eval_f1_macro": 0.7004348905275414, "eval_f1_micro": 0.814516812060642, "eval_loss": 0.12292143702507019, "eval_runtime": 464.3912, "eval_samples_per_second": 6.169, "eval_steps_per_second": 0.194, "learning_rate": 1e-05, "step": 18018 }, { "epoch": 67.0, "eval_accuracy": 0.30157068062827225, "eval_f1_macro": 0.699284425146712, "eval_f1_micro": 0.8159166351527375, "eval_loss": 0.12375594675540924, "eval_runtime": 464.0044, "eval_samples_per_second": 6.175, "eval_steps_per_second": 0.194, "learning_rate": 1e-05, "step": 18291 }, { "epoch": 67.76556776556777, "grad_norm": 0.4648727476596832, "learning_rate": 1e-05, "loss": 0.1272, "step": 18500 }, { "epoch": 68.0, "eval_accuracy": 0.299825479930192, "eval_f1_macro": 0.7038988669971363, "eval_f1_micro": 0.8175921663807173, "eval_loss": 0.1228519007563591, "eval_runtime": 462.9465, "eval_samples_per_second": 6.189, "eval_steps_per_second": 0.194, "learning_rate": 1e-05, "step": 18564 }, { "epoch": 69.0, "eval_accuracy": 0.3057591623036649, "eval_f1_macro": 0.7006054696492156, "eval_f1_micro": 0.8157472332516685, "eval_loss": 0.12302352488040924, "eval_runtime": 460.1932, "eval_samples_per_second": 6.226, "eval_steps_per_second": 0.196, "learning_rate": 1e-05, "step": 18837 }, { "epoch": 69.59706959706959, "grad_norm": 0.19116108119487762, "learning_rate": 1e-05, "loss": 0.1274, "step": 19000 }, { "epoch": 70.0, "eval_accuracy": 0.30401396160558464, "eval_f1_macro": 0.7009163688044155, "eval_f1_micro": 0.8169984042999916, "eval_loss": 0.12284138053655624, "eval_runtime": 460.5489, "eval_samples_per_second": 6.221, "eval_steps_per_second": 0.195, "learning_rate": 1e-05, "step": 19110 }, { "epoch": 71.0, "eval_accuracy": 0.3019197207678883, "eval_f1_macro": 0.7043489270867269, "eval_f1_micro": 0.8157594696169925, "eval_loss": 0.12295401096343994, "eval_runtime": 459.6032, "eval_samples_per_second": 6.234, "eval_steps_per_second": 0.196, "learning_rate": 1e-05, "step": 19383 }, { "epoch": 71.42857142857143, "grad_norm": 0.23645249009132385, "learning_rate": 1e-05, "loss": 0.1272, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.30157068062827225, "eval_f1_macro": 0.7024968584049214, "eval_f1_micro": 0.8171375869875074, "eval_loss": 0.123215451836586, "eval_runtime": 456.5626, "eval_samples_per_second": 6.275, "eval_steps_per_second": 0.197, "learning_rate": 1e-05, "step": 19656 }, { "epoch": 73.0, "eval_accuracy": 0.3054101221640489, "eval_f1_macro": 0.7048999311445782, "eval_f1_micro": 0.8173905730984317, "eval_loss": 0.12291014939546585, "eval_runtime": 459.2654, "eval_samples_per_second": 6.238, "eval_steps_per_second": 0.196, "learning_rate": 1e-05, "step": 19929 }, { "epoch": 73.26007326007326, "grad_norm": 0.18364407122135162, "learning_rate": 1e-05, "loss": 0.1267, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.3008726003490401, "eval_f1_macro": 0.6942481843546244, "eval_f1_micro": 0.8141268691688323, "eval_loss": 0.12304174154996872, "eval_runtime": 452.0481, "eval_samples_per_second": 6.338, "eval_steps_per_second": 0.199, "learning_rate": 1e-05, "step": 20202 }, { "epoch": 75.0, "eval_accuracy": 0.3033158813263525, "eval_f1_macro": 0.7001404945828892, "eval_f1_micro": 0.8161419818297063, "eval_loss": 0.1232200339436531, "eval_runtime": 455.6916, "eval_samples_per_second": 6.287, "eval_steps_per_second": 0.198, "learning_rate": 1.0000000000000002e-06, "step": 20475 }, { "epoch": 75.0915750915751, "grad_norm": 0.23693855106830597, "learning_rate": 1.0000000000000002e-06, "loss": 0.1269, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.3057591623036649, "eval_f1_macro": 0.7020186938135398, "eval_f1_micro": 0.8171137406830337, "eval_loss": 0.12267689406871796, "eval_runtime": 450.1022, "eval_samples_per_second": 6.365, "eval_steps_per_second": 0.2, "learning_rate": 1.0000000000000002e-06, "step": 20748 }, { "epoch": 76.92307692307692, "grad_norm": 0.2620922923088074, "learning_rate": 1.0000000000000002e-06, "loss": 0.1261, "step": 21000 }, { "epoch": 77.0, "eval_accuracy": 0.3078534031413613, "eval_f1_macro": 0.7059725105446049, "eval_f1_micro": 0.8190563444800733, "eval_loss": 0.12284990400075912, "eval_runtime": 450.7915, "eval_samples_per_second": 6.355, "eval_steps_per_second": 0.2, "learning_rate": 1.0000000000000002e-06, "step": 21021 }, { "epoch": 78.0, "eval_accuracy": 0.3019197207678883, "eval_f1_macro": 0.70715796895925, "eval_f1_micro": 0.8165993945509586, "eval_loss": 0.123690165579319, "eval_runtime": 449.2258, "eval_samples_per_second": 6.378, "eval_steps_per_second": 0.2, "learning_rate": 1.0000000000000002e-06, "step": 21294 }, { "epoch": 78.75457875457876, "grad_norm": 0.2725893557071686, "learning_rate": 1.0000000000000002e-06, "loss": 0.1268, "step": 21500 }, { "epoch": 79.0, "eval_accuracy": 0.30471204188481676, "eval_f1_macro": 0.6992489783675215, "eval_f1_micro": 0.8156292286874154, "eval_loss": 0.12329532951116562, "eval_runtime": 448.681, "eval_samples_per_second": 6.385, "eval_steps_per_second": 0.201, "learning_rate": 1.0000000000000002e-06, "step": 21567 }, { "epoch": 80.0, "eval_accuracy": 0.30261780104712044, "eval_f1_macro": 0.6994382162058129, "eval_f1_micro": 0.8171630910227036, "eval_loss": 0.12325812131166458, "eval_runtime": 447.9437, "eval_samples_per_second": 6.396, "eval_steps_per_second": 0.201, "learning_rate": 1.0000000000000002e-06, "step": 21840 }, { "epoch": 80.58608058608058, "grad_norm": 0.23704515397548676, "learning_rate": 1.0000000000000002e-06, "loss": 0.1271, "step": 22000 }, { "epoch": 81.0, "eval_accuracy": 0.3054101221640489, "eval_f1_macro": 0.7037260965073638, "eval_f1_micro": 0.8175692748252337, "eval_loss": 0.12240613251924515, "eval_runtime": 448.5942, "eval_samples_per_second": 6.387, "eval_steps_per_second": 0.201, "learning_rate": 1.0000000000000002e-06, "step": 22113 }, { "epoch": 82.0, "eval_accuracy": 0.30017452006980805, "eval_f1_macro": 0.6972115267564822, "eval_f1_micro": 0.8150502097368756, "eval_loss": 0.12270382046699524, "eval_runtime": 454.1003, "eval_samples_per_second": 6.309, "eval_steps_per_second": 0.198, "learning_rate": 1.0000000000000002e-06, "step": 22386 }, { "epoch": 82.41758241758242, "grad_norm": 0.24043193459510803, "learning_rate": 1.0000000000000002e-06, "loss": 0.1263, "step": 22500 }, { "epoch": 83.0, "eval_accuracy": 0.2994764397905759, "eval_f1_macro": 0.6939342317359001, "eval_f1_micro": 0.8146007733820593, "eval_loss": 0.12315402179956436, "eval_runtime": 453.2474, "eval_samples_per_second": 6.321, "eval_steps_per_second": 0.199, "learning_rate": 1.0000000000000002e-06, "step": 22659 }, { "epoch": 84.0, "eval_accuracy": 0.30261780104712044, "eval_f1_macro": 0.7017348072852434, "eval_f1_micro": 0.817694818782958, "eval_loss": 0.1225922629237175, "eval_runtime": 454.9911, "eval_samples_per_second": 6.297, "eval_steps_per_second": 0.198, "learning_rate": 1.0000000000000002e-06, "step": 22932 }, { "epoch": 84.24908424908425, "grad_norm": 0.20054659247398376, "learning_rate": 1.0000000000000002e-06, "loss": 0.1265, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.30715532286212915, "eval_f1_macro": 0.7059249231157801, "eval_f1_micro": 0.8180560794668679, "eval_loss": 0.1230376735329628, "eval_runtime": 454.3422, "eval_samples_per_second": 6.306, "eval_steps_per_second": 0.198, "learning_rate": 1.0000000000000002e-06, "step": 23205 }, { "epoch": 86.0, "eval_accuracy": 0.3033158813263525, "eval_f1_macro": 0.7058661509975368, "eval_f1_micro": 0.8168233713901948, "eval_loss": 0.12335028499364853, "eval_runtime": 455.4692, "eval_samples_per_second": 6.29, "eval_steps_per_second": 0.198, "learning_rate": 1.0000000000000002e-06, "step": 23478 }, { "epoch": 86.08058608058609, "grad_norm": 0.2747795581817627, "learning_rate": 1.0000000000000002e-06, "loss": 0.1265, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.30052356020942406, "eval_f1_macro": 0.6943550775207357, "eval_f1_micro": 0.8143178633603668, "eval_loss": 0.12355341017246246, "eval_runtime": 467.692, "eval_samples_per_second": 6.126, "eval_steps_per_second": 0.192, "learning_rate": 1.0000000000000002e-06, "step": 23751 }, { "epoch": 87.91208791208791, "grad_norm": 0.2173408567905426, "learning_rate": 1.0000000000000002e-07, "loss": 0.1266, "step": 24000 }, { "epoch": 88.0, "eval_accuracy": 0.3078534031413613, "eval_f1_macro": 0.702889046791746, "eval_f1_micro": 0.8186198516780492, "eval_loss": 0.12264065444469452, "eval_runtime": 469.0124, "eval_samples_per_second": 6.109, "eval_steps_per_second": 0.192, "learning_rate": 1.0000000000000002e-07, "step": 24024 }, { "epoch": 89.0, "eval_accuracy": 0.3075043630017452, "eval_f1_macro": 0.7083793564773274, "eval_f1_micro": 0.817912934805249, "eval_loss": 0.1229795441031456, "eval_runtime": 470.229, "eval_samples_per_second": 6.093, "eval_steps_per_second": 0.191, "learning_rate": 1.0000000000000002e-07, "step": 24297 }, { "epoch": 89.74358974358974, "grad_norm": 0.22348648309707642, "learning_rate": 1.0000000000000002e-07, "loss": 0.1263, "step": 24500 }, { "epoch": 90.0, "eval_accuracy": 0.3036649214659686, "eval_f1_macro": 0.710478296923057, "eval_f1_micro": 0.8195029312195524, "eval_loss": 0.12317965924739838, "eval_runtime": 465.171, "eval_samples_per_second": 6.159, "eval_steps_per_second": 0.193, "learning_rate": 1.0000000000000002e-07, "step": 24570 }, { "epoch": 91.0, "eval_accuracy": 0.3019197207678883, "eval_f1_macro": 0.6971981356235558, "eval_f1_micro": 0.8155397390272835, "eval_loss": 0.12267619371414185, "eval_runtime": 458.3455, "eval_samples_per_second": 6.251, "eval_steps_per_second": 0.196, "learning_rate": 1.0000000000000002e-07, "step": 24843 }, { "epoch": 91.0, "learning_rate": 1.0000000000000002e-07, "step": 24843, "total_flos": 1.1760993126572918e+21, "train_loss": 0.1412498749900842, "train_runtime": 170277.6523, "train_samples_per_second": 7.678, "train_steps_per_second": 0.24 } ], "logging_steps": 500, "max_steps": 40950, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 10 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1760993126572918e+21, "train_batch_size": 32, "trial_name": null, "trial_params": null }