{ "best_metric": 0.7926221335992024, "best_model_checkpoint": "swin-large-patch4-window7-224-in22k-finetuned-lora-medmnistv2/checkpoint-985", "epoch": 9.954337899543379, "eval_steps": 500, "global_step": 1090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "grad_norm": 3.442108154296875, "learning_rate": 0.004954128440366973, "loss": 1.1496, "step": 10 }, { "epoch": 0.18, "grad_norm": 1.3745663166046143, "learning_rate": 0.004908256880733945, "loss": 0.9658, "step": 20 }, { "epoch": 0.27, "grad_norm": 1.6516236066818237, "learning_rate": 0.004862385321100918, "loss": 0.9225, "step": 30 }, { "epoch": 0.37, "grad_norm": 1.4029324054718018, "learning_rate": 0.00481651376146789, "loss": 0.8968, "step": 40 }, { "epoch": 0.46, "grad_norm": 1.2768313884735107, "learning_rate": 0.0047706422018348625, "loss": 0.8582, "step": 50 }, { "epoch": 0.55, "grad_norm": 1.250333309173584, "learning_rate": 0.004724770642201835, "loss": 0.8362, "step": 60 }, { "epoch": 0.64, "grad_norm": 1.2696894407272339, "learning_rate": 0.004678899082568808, "loss": 0.8948, "step": 70 }, { "epoch": 0.73, "grad_norm": 1.02017080783844, "learning_rate": 0.00463302752293578, "loss": 0.8448, "step": 80 }, { "epoch": 0.82, "grad_norm": 1.1102752685546875, "learning_rate": 0.0045871559633027525, "loss": 0.8288, "step": 90 }, { "epoch": 0.91, "grad_norm": 0.8990870714187622, "learning_rate": 0.004541284403669725, "loss": 0.8516, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.7547357926221336, "eval_f1": 0.41878801809381894, "eval_loss": 0.7470152974128723, "eval_precision": 0.5455908887866956, "eval_recall": 0.3913939658670338, "eval_runtime": 11.5403, "eval_samples_per_second": 86.913, "eval_steps_per_second": 5.459, "step": 109 }, { "epoch": 1.0, "grad_norm": 1.1006461381912231, "learning_rate": 0.004495412844036698, "loss": 0.8742, "step": 110 }, { "epoch": 1.1, "grad_norm": 1.271594524383545, "learning_rate": 0.0044495412844036695, "loss": 0.8544, "step": 120 }, { "epoch": 1.19, "grad_norm": 1.24767005443573, "learning_rate": 0.004403669724770643, "loss": 0.792, "step": 130 }, { "epoch": 1.28, "grad_norm": 1.5129131078720093, "learning_rate": 0.004357798165137615, "loss": 0.8107, "step": 140 }, { "epoch": 1.37, "grad_norm": 0.728676974773407, "learning_rate": 0.004311926605504587, "loss": 0.7921, "step": 150 }, { "epoch": 1.46, "grad_norm": 1.073712944984436, "learning_rate": 0.0042660550458715595, "loss": 0.7453, "step": 160 }, { "epoch": 1.55, "grad_norm": 1.1818609237670898, "learning_rate": 0.004220183486238533, "loss": 0.8023, "step": 170 }, { "epoch": 1.64, "grad_norm": 0.6278601884841919, "learning_rate": 0.004174311926605505, "loss": 0.8196, "step": 180 }, { "epoch": 1.74, "grad_norm": 0.796804666519165, "learning_rate": 0.004128440366972477, "loss": 0.8063, "step": 190 }, { "epoch": 1.83, "grad_norm": 1.4323725700378418, "learning_rate": 0.00408256880733945, "loss": 0.7128, "step": 200 }, { "epoch": 1.92, "grad_norm": 0.5659187436103821, "learning_rate": 0.004036697247706422, "loss": 0.7738, "step": 210 }, { "epoch": 2.0, "eval_accuracy": 0.7168494516450648, "eval_f1": 0.3577314775338443, "eval_loss": 0.8952543139457703, "eval_precision": 0.42246935817657555, "eval_recall": 0.4458814441250039, "eval_runtime": 11.2901, "eval_samples_per_second": 88.839, "eval_steps_per_second": 5.58, "step": 219 }, { "epoch": 2.01, "grad_norm": 2.2657077312469482, "learning_rate": 0.003990825688073394, "loss": 0.8065, "step": 220 }, { "epoch": 2.1, "grad_norm": 0.6983966827392578, "learning_rate": 0.003944954128440367, "loss": 0.8244, "step": 230 }, { "epoch": 2.19, "grad_norm": 0.9509605765342712, "learning_rate": 0.0038990825688073397, "loss": 0.7429, "step": 240 }, { "epoch": 2.28, "grad_norm": 0.8736411333084106, "learning_rate": 0.0038532110091743124, "loss": 0.7479, "step": 250 }, { "epoch": 2.37, "grad_norm": 1.0205260515213013, "learning_rate": 0.0038073394495412843, "loss": 0.7733, "step": 260 }, { "epoch": 2.47, "grad_norm": 0.9213468432426453, "learning_rate": 0.003761467889908257, "loss": 0.6993, "step": 270 }, { "epoch": 2.56, "grad_norm": 0.7223290801048279, "learning_rate": 0.0037155963302752293, "loss": 0.7582, "step": 280 }, { "epoch": 2.65, "grad_norm": 0.8736669421195984, "learning_rate": 0.003669724770642202, "loss": 0.7483, "step": 290 }, { "epoch": 2.74, "grad_norm": 1.2014636993408203, "learning_rate": 0.0036238532110091743, "loss": 0.7592, "step": 300 }, { "epoch": 2.83, "grad_norm": 1.290935754776001, "learning_rate": 0.003577981651376147, "loss": 0.819, "step": 310 }, { "epoch": 2.92, "grad_norm": 0.751524806022644, "learning_rate": 0.0035321100917431194, "loss": 0.6994, "step": 320 }, { "epoch": 3.0, "eval_accuracy": 0.7607178464606181, "eval_f1": 0.5104809166351773, "eval_loss": 0.6592820286750793, "eval_precision": 0.6256931769842607, "eval_recall": 0.5058522052667251, "eval_runtime": 11.3494, "eval_samples_per_second": 88.375, "eval_steps_per_second": 5.551, "step": 328 }, { "epoch": 3.01, "grad_norm": 1.0127859115600586, "learning_rate": 0.003486238532110092, "loss": 0.723, "step": 330 }, { "epoch": 3.11, "grad_norm": 0.8557056784629822, "learning_rate": 0.0034403669724770644, "loss": 0.762, "step": 340 }, { "epoch": 3.2, "grad_norm": 0.7101117968559265, "learning_rate": 0.003394495412844037, "loss": 0.746, "step": 350 }, { "epoch": 3.29, "grad_norm": 0.732359766960144, "learning_rate": 0.003348623853211009, "loss": 0.7152, "step": 360 }, { "epoch": 3.38, "grad_norm": 0.7687917947769165, "learning_rate": 0.0033027522935779817, "loss": 0.6849, "step": 370 }, { "epoch": 3.47, "grad_norm": 0.9754200577735901, "learning_rate": 0.003256880733944954, "loss": 0.721, "step": 380 }, { "epoch": 3.56, "grad_norm": 0.9926655888557434, "learning_rate": 0.003211009174311927, "loss": 0.7457, "step": 390 }, { "epoch": 3.65, "grad_norm": 1.4650967121124268, "learning_rate": 0.003165137614678899, "loss": 0.7209, "step": 400 }, { "epoch": 3.74, "grad_norm": 0.7933741211891174, "learning_rate": 0.003119266055045872, "loss": 0.7785, "step": 410 }, { "epoch": 3.84, "grad_norm": 0.7204951047897339, "learning_rate": 0.003073394495412844, "loss": 0.7276, "step": 420 }, { "epoch": 3.93, "grad_norm": 1.0182371139526367, "learning_rate": 0.003027522935779817, "loss": 0.6731, "step": 430 }, { "epoch": 4.0, "eval_accuracy": 0.7716849451645065, "eval_f1": 0.5382789492033663, "eval_loss": 0.6144729852676392, "eval_precision": 0.6321625904242083, "eval_recall": 0.500050058598068, "eval_runtime": 11.305, "eval_samples_per_second": 88.722, "eval_steps_per_second": 5.573, "step": 438 }, { "epoch": 4.02, "grad_norm": 0.5864942669868469, "learning_rate": 0.002981651376146789, "loss": 0.685, "step": 440 }, { "epoch": 4.11, "grad_norm": 0.816683292388916, "learning_rate": 0.002935779816513762, "loss": 0.6859, "step": 450 }, { "epoch": 4.2, "grad_norm": 0.6105664968490601, "learning_rate": 0.0028899082568807338, "loss": 0.6785, "step": 460 }, { "epoch": 4.29, "grad_norm": 1.0114610195159912, "learning_rate": 0.0028440366972477065, "loss": 0.6661, "step": 470 }, { "epoch": 4.38, "grad_norm": 1.1204463243484497, "learning_rate": 0.002798165137614679, "loss": 0.664, "step": 480 }, { "epoch": 4.47, "grad_norm": 1.0051954984664917, "learning_rate": 0.0027522935779816515, "loss": 0.7172, "step": 490 }, { "epoch": 4.57, "grad_norm": 0.6869407892227173, "learning_rate": 0.002706422018348624, "loss": 0.7341, "step": 500 }, { "epoch": 4.66, "grad_norm": 0.720870316028595, "learning_rate": 0.0026605504587155966, "loss": 0.7028, "step": 510 }, { "epoch": 4.75, "grad_norm": 0.9448994994163513, "learning_rate": 0.002614678899082569, "loss": 0.7383, "step": 520 }, { "epoch": 4.84, "grad_norm": 0.9630647897720337, "learning_rate": 0.0025688073394495416, "loss": 0.6727, "step": 530 }, { "epoch": 4.93, "grad_norm": 0.8725408315658569, "learning_rate": 0.0025229357798165135, "loss": 0.7266, "step": 540 }, { "epoch": 5.0, "eval_accuracy": 0.7397806580259222, "eval_f1": 0.4934894169090878, "eval_loss": 0.6838864088058472, "eval_precision": 0.5519819307791096, "eval_recall": 0.5344054167239178, "eval_runtime": 11.2776, "eval_samples_per_second": 88.937, "eval_steps_per_second": 5.586, "step": 547 }, { "epoch": 5.02, "grad_norm": 1.1792114973068237, "learning_rate": 0.0024770642201834866, "loss": 0.6676, "step": 550 }, { "epoch": 5.11, "grad_norm": 0.7483982443809509, "learning_rate": 0.002431192660550459, "loss": 0.7229, "step": 560 }, { "epoch": 5.21, "grad_norm": 0.8732199668884277, "learning_rate": 0.0023853211009174312, "loss": 0.6366, "step": 570 }, { "epoch": 5.3, "grad_norm": 1.1174126863479614, "learning_rate": 0.002339449541284404, "loss": 0.6855, "step": 580 }, { "epoch": 5.39, "grad_norm": 0.7935824990272522, "learning_rate": 0.0022935779816513763, "loss": 0.7225, "step": 590 }, { "epoch": 5.48, "grad_norm": 0.9188850522041321, "learning_rate": 0.002247706422018349, "loss": 0.6657, "step": 600 }, { "epoch": 5.57, "grad_norm": 0.7955396771430969, "learning_rate": 0.0022018348623853213, "loss": 0.6797, "step": 610 }, { "epoch": 5.66, "grad_norm": 1.0317906141281128, "learning_rate": 0.0021559633027522936, "loss": 0.6589, "step": 620 }, { "epoch": 5.75, "grad_norm": 0.6561917662620544, "learning_rate": 0.0021100917431192663, "loss": 0.6435, "step": 630 }, { "epoch": 5.84, "grad_norm": 0.8070980906486511, "learning_rate": 0.0020642201834862386, "loss": 0.6303, "step": 640 }, { "epoch": 5.94, "grad_norm": 0.6900261044502258, "learning_rate": 0.002018348623853211, "loss": 0.6388, "step": 650 }, { "epoch": 6.0, "eval_accuracy": 0.7666999002991027, "eval_f1": 0.5338252978902394, "eval_loss": 0.6242878437042236, "eval_precision": 0.6116740127093266, "eval_recall": 0.5062826891866704, "eval_runtime": 11.256, "eval_samples_per_second": 89.108, "eval_steps_per_second": 5.597, "step": 657 }, { "epoch": 6.03, "grad_norm": 0.8859825134277344, "learning_rate": 0.0019724770642201837, "loss": 0.6252, "step": 660 }, { "epoch": 6.12, "grad_norm": 0.7855720520019531, "learning_rate": 0.0019266055045871562, "loss": 0.6026, "step": 670 }, { "epoch": 6.21, "grad_norm": 0.7128086090087891, "learning_rate": 0.0018807339449541285, "loss": 0.5799, "step": 680 }, { "epoch": 6.3, "grad_norm": 0.6269203424453735, "learning_rate": 0.001834862385321101, "loss": 0.6358, "step": 690 }, { "epoch": 6.39, "grad_norm": 1.1196659803390503, "learning_rate": 0.0017889908256880735, "loss": 0.6282, "step": 700 }, { "epoch": 6.48, "grad_norm": 1.253045678138733, "learning_rate": 0.001743119266055046, "loss": 0.6109, "step": 710 }, { "epoch": 6.58, "grad_norm": 0.946592390537262, "learning_rate": 0.0016972477064220186, "loss": 0.58, "step": 720 }, { "epoch": 6.67, "grad_norm": 0.9241579174995422, "learning_rate": 0.0016513761467889909, "loss": 0.6492, "step": 730 }, { "epoch": 6.76, "grad_norm": 0.9794093370437622, "learning_rate": 0.0016055045871559634, "loss": 0.6333, "step": 740 }, { "epoch": 6.85, "grad_norm": 0.9750506281852722, "learning_rate": 0.001559633027522936, "loss": 0.6883, "step": 750 }, { "epoch": 6.94, "grad_norm": 1.1783692836761475, "learning_rate": 0.0015137614678899084, "loss": 0.6495, "step": 760 }, { "epoch": 7.0, "eval_accuracy": 0.7826520438683948, "eval_f1": 0.6162957297860807, "eval_loss": 0.6161015033721924, "eval_precision": 0.6357243111624864, "eval_recall": 0.6152698884549003, "eval_runtime": 11.1928, "eval_samples_per_second": 89.611, "eval_steps_per_second": 5.629, "step": 766 }, { "epoch": 7.03, "grad_norm": 1.0667985677719116, "learning_rate": 0.001467889908256881, "loss": 0.6615, "step": 770 }, { "epoch": 7.12, "grad_norm": 1.2581771612167358, "learning_rate": 0.0014220183486238532, "loss": 0.5515, "step": 780 }, { "epoch": 7.21, "grad_norm": 1.012791395187378, "learning_rate": 0.0013761467889908258, "loss": 0.607, "step": 790 }, { "epoch": 7.31, "grad_norm": 0.9969860911369324, "learning_rate": 0.0013302752293577983, "loss": 0.6396, "step": 800 }, { "epoch": 7.4, "grad_norm": 0.9849869608879089, "learning_rate": 0.0012844036697247708, "loss": 0.6563, "step": 810 }, { "epoch": 7.49, "grad_norm": 1.0733628273010254, "learning_rate": 0.0012385321100917433, "loss": 0.5833, "step": 820 }, { "epoch": 7.58, "grad_norm": 1.044959306716919, "learning_rate": 0.0011926605504587156, "loss": 0.5745, "step": 830 }, { "epoch": 7.67, "grad_norm": 0.8886712193489075, "learning_rate": 0.0011467889908256881, "loss": 0.5437, "step": 840 }, { "epoch": 7.76, "grad_norm": 1.2174370288848877, "learning_rate": 0.0011009174311926607, "loss": 0.5654, "step": 850 }, { "epoch": 7.85, "grad_norm": 0.8696100115776062, "learning_rate": 0.0010550458715596332, "loss": 0.6165, "step": 860 }, { "epoch": 7.95, "grad_norm": 1.2088637351989746, "learning_rate": 0.0010091743119266055, "loss": 0.5639, "step": 870 }, { "epoch": 8.0, "eval_accuracy": 0.7836490528414756, "eval_f1": 0.5930825856311631, "eval_loss": 0.5751714706420898, "eval_precision": 0.601839864113542, "eval_recall": 0.59116309877434, "eval_runtime": 11.2311, "eval_samples_per_second": 89.305, "eval_steps_per_second": 5.609, "step": 876 }, { "epoch": 8.04, "grad_norm": 0.6919325590133667, "learning_rate": 0.0009633027522935781, "loss": 0.6158, "step": 880 }, { "epoch": 8.13, "grad_norm": 1.1032708883285522, "learning_rate": 0.0009174311926605505, "loss": 0.4922, "step": 890 }, { "epoch": 8.22, "grad_norm": 1.5484873056411743, "learning_rate": 0.000871559633027523, "loss": 0.5453, "step": 900 }, { "epoch": 8.31, "grad_norm": 1.1881980895996094, "learning_rate": 0.0008256880733944954, "loss": 0.6307, "step": 910 }, { "epoch": 8.4, "grad_norm": 0.80064457654953, "learning_rate": 0.000779816513761468, "loss": 0.5129, "step": 920 }, { "epoch": 8.49, "grad_norm": 0.7097495794296265, "learning_rate": 0.0007339449541284405, "loss": 0.6111, "step": 930 }, { "epoch": 8.58, "grad_norm": 1.0539182424545288, "learning_rate": 0.0006880733944954129, "loss": 0.5803, "step": 940 }, { "epoch": 8.68, "grad_norm": 1.1059874296188354, "learning_rate": 0.0006422018348623854, "loss": 0.6122, "step": 950 }, { "epoch": 8.77, "grad_norm": 0.969431459903717, "learning_rate": 0.0005963302752293578, "loss": 0.5662, "step": 960 }, { "epoch": 8.86, "grad_norm": 0.9566834568977356, "learning_rate": 0.0005504587155963303, "loss": 0.5317, "step": 970 }, { "epoch": 8.95, "grad_norm": 1.040589690208435, "learning_rate": 0.0005045871559633027, "loss": 0.6012, "step": 980 }, { "epoch": 9.0, "eval_accuracy": 0.7926221335992024, "eval_f1": 0.6175857153722378, "eval_loss": 0.5508496165275574, "eval_precision": 0.6303209720740276, "eval_recall": 0.6194980716058, "eval_runtime": 11.1597, "eval_samples_per_second": 89.877, "eval_steps_per_second": 5.645, "step": 985 }, { "epoch": 9.04, "grad_norm": 0.8962224721908569, "learning_rate": 0.00045871559633027525, "loss": 0.5106, "step": 990 }, { "epoch": 9.13, "grad_norm": 0.9521830677986145, "learning_rate": 0.0004128440366972477, "loss": 0.5768, "step": 1000 }, { "epoch": 9.22, "grad_norm": 1.102037787437439, "learning_rate": 0.00036697247706422024, "loss": 0.5709, "step": 1010 }, { "epoch": 9.32, "grad_norm": 1.3637118339538574, "learning_rate": 0.0003211009174311927, "loss": 0.4719, "step": 1020 }, { "epoch": 9.41, "grad_norm": 0.8370681405067444, "learning_rate": 0.00027522935779816516, "loss": 0.5717, "step": 1030 }, { "epoch": 9.5, "grad_norm": 1.0408118963241577, "learning_rate": 0.00022935779816513763, "loss": 0.5216, "step": 1040 }, { "epoch": 9.59, "grad_norm": 1.2250603437423706, "learning_rate": 0.00018348623853211012, "loss": 0.5648, "step": 1050 }, { "epoch": 9.68, "grad_norm": 0.9561938047409058, "learning_rate": 0.00013761467889908258, "loss": 0.5316, "step": 1060 }, { "epoch": 9.77, "grad_norm": 1.2182472944259644, "learning_rate": 9.174311926605506e-05, "loss": 0.5773, "step": 1070 }, { "epoch": 9.86, "grad_norm": 1.1085007190704346, "learning_rate": 4.587155963302753e-05, "loss": 0.5777, "step": 1080 }, { "epoch": 9.95, "grad_norm": 0.9443553686141968, "learning_rate": 0.0, "loss": 0.5468, "step": 1090 }, { "epoch": 9.95, "eval_accuracy": 0.7856430707876371, "eval_f1": 0.6355311291077588, "eval_loss": 0.566506028175354, "eval_precision": 0.6470430666433826, "eval_recall": 0.6287911299620902, "eval_runtime": 11.1527, "eval_samples_per_second": 89.934, "eval_steps_per_second": 5.649, "step": 1090 }, { "epoch": 9.95, "step": 1090, "total_flos": 1.2358226368562393e+19, "train_loss": 0.6880708248243419, "train_runtime": 1791.2534, "train_samples_per_second": 39.118, "train_steps_per_second": 0.609 } ], "logging_steps": 10, "max_steps": 1090, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.2358226368562393e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }