{ "best_metric": 0.9716738197424892, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-medmnistv2/checkpoint-382", "epoch": 9.882352941176471, "eval_steps": 500, "global_step": 630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "grad_norm": 1.3673616647720337, "learning_rate": 0.004920634920634921, "loss": 0.4969, "step": 10 }, { "epoch": 0.31, "grad_norm": 0.564309298992157, "learning_rate": 0.004841269841269842, "loss": 0.3486, "step": 20 }, { "epoch": 0.47, "grad_norm": 0.83164381980896, "learning_rate": 0.0047619047619047615, "loss": 0.2505, "step": 30 }, { "epoch": 0.63, "grad_norm": 0.7283287644386292, "learning_rate": 0.004682539682539683, "loss": 0.2221, "step": 40 }, { "epoch": 0.78, "grad_norm": 0.3768545687198639, "learning_rate": 0.004603174603174603, "loss": 0.2593, "step": 50 }, { "epoch": 0.94, "grad_norm": 0.4381789267063141, "learning_rate": 0.004523809523809524, "loss": 0.211, "step": 60 }, { "epoch": 0.99, "eval_accuracy": 0.9605150214592275, "eval_f1": 0.9500622474690998, "eval_loss": 0.11396056413650513, "eval_precision": 0.9400752542471662, "eval_recall": 0.9615826914848096, "eval_runtime": 11.8657, "eval_samples_per_second": 98.182, "eval_steps_per_second": 6.152, "step": 63 }, { "epoch": 1.1, "grad_norm": 0.8330244421958923, "learning_rate": 0.0044444444444444444, "loss": 0.2013, "step": 70 }, { "epoch": 1.25, "grad_norm": 1.0441814661026, "learning_rate": 0.004365079365079365, "loss": 0.1882, "step": 80 }, { "epoch": 1.41, "grad_norm": 0.8081804513931274, "learning_rate": 0.004285714285714286, "loss": 0.2045, "step": 90 }, { "epoch": 1.57, "grad_norm": 0.49920302629470825, "learning_rate": 0.004206349206349207, "loss": 0.194, "step": 100 }, { "epoch": 1.73, "grad_norm": 0.39656010270118713, "learning_rate": 0.0041269841269841265, "loss": 0.1918, "step": 110 }, { "epoch": 1.88, "grad_norm": 0.4606592655181885, "learning_rate": 0.004047619047619048, "loss": 0.1911, "step": 120 }, { "epoch": 1.99, "eval_accuracy": 0.9330472103004291, "eval_f1": 0.9185857738786393, "eval_loss": 0.151698499917984, "eval_precision": 0.8989291177970423, "eval_recall": 0.948319732257473, "eval_runtime": 11.9483, "eval_samples_per_second": 97.504, "eval_steps_per_second": 6.11, "step": 127 }, { "epoch": 2.04, "grad_norm": 0.6359087824821472, "learning_rate": 0.003976190476190476, "loss": 0.1652, "step": 130 }, { "epoch": 2.2, "grad_norm": 0.439413845539093, "learning_rate": 0.003896825396825397, "loss": 0.1795, "step": 140 }, { "epoch": 2.35, "grad_norm": 0.9134456515312195, "learning_rate": 0.003817460317460317, "loss": 0.2155, "step": 150 }, { "epoch": 2.51, "grad_norm": 1.381035566329956, "learning_rate": 0.0037380952380952383, "loss": 0.218, "step": 160 }, { "epoch": 2.67, "grad_norm": 0.7177550196647644, "learning_rate": 0.0036587301587301586, "loss": 0.1839, "step": 170 }, { "epoch": 2.82, "grad_norm": 0.7977726459503174, "learning_rate": 0.0035793650793650793, "loss": 0.1595, "step": 180 }, { "epoch": 2.98, "grad_norm": 1.0747120380401611, "learning_rate": 0.0034999999999999996, "loss": 0.1695, "step": 190 }, { "epoch": 3.0, "eval_accuracy": 0.9579399141630901, "eval_f1": 0.9470671089099915, "eval_loss": 0.11627380549907684, "eval_precision": 0.9354318260568261, "eval_recall": 0.960904547955254, "eval_runtime": 11.7288, "eval_samples_per_second": 99.328, "eval_steps_per_second": 6.224, "step": 191 }, { "epoch": 3.14, "grad_norm": 0.328319787979126, "learning_rate": 0.003420634920634921, "loss": 0.2, "step": 200 }, { "epoch": 3.29, "grad_norm": 0.33479979634284973, "learning_rate": 0.003341269841269841, "loss": 0.1086, "step": 210 }, { "epoch": 3.45, "grad_norm": 0.2731302082538605, "learning_rate": 0.003261904761904762, "loss": 0.1782, "step": 220 }, { "epoch": 3.61, "grad_norm": 0.507108211517334, "learning_rate": 0.0031825396825396826, "loss": 0.1869, "step": 230 }, { "epoch": 3.76, "grad_norm": 2.0676915645599365, "learning_rate": 0.0031031746031746034, "loss": 0.1438, "step": 240 }, { "epoch": 3.92, "grad_norm": 0.9014037847518921, "learning_rate": 0.0030317460317460317, "loss": 0.1556, "step": 250 }, { "epoch": 4.0, "eval_accuracy": 0.9570815450643777, "eval_f1": 0.9416580530916725, "eval_loss": 0.11594364047050476, "eval_precision": 0.9668957588361103, "eval_recall": 0.9220192248914971, "eval_runtime": 11.9006, "eval_samples_per_second": 97.894, "eval_steps_per_second": 6.134, "step": 255 }, { "epoch": 4.08, "grad_norm": 0.5203756093978882, "learning_rate": 0.0029523809523809524, "loss": 0.1301, "step": 260 }, { "epoch": 4.24, "grad_norm": 0.2871193587779999, "learning_rate": 0.0028730158730158727, "loss": 0.1614, "step": 270 }, { "epoch": 4.39, "grad_norm": 0.5260260701179504, "learning_rate": 0.002793650793650794, "loss": 0.1536, "step": 280 }, { "epoch": 4.55, "grad_norm": 1.3978557586669922, "learning_rate": 0.0027142857142857142, "loss": 0.1558, "step": 290 }, { "epoch": 4.71, "grad_norm": 0.3816111385822296, "learning_rate": 0.002634920634920635, "loss": 0.1393, "step": 300 }, { "epoch": 4.86, "grad_norm": 0.8400008678436279, "learning_rate": 0.0025555555555555553, "loss": 0.173, "step": 310 }, { "epoch": 4.99, "eval_accuracy": 0.9502145922746781, "eval_f1": 0.9381227106227107, "eval_loss": 0.11663959920406342, "eval_precision": 0.9228616598558795, "eval_recall": 0.9578061006173972, "eval_runtime": 11.8429, "eval_samples_per_second": 98.371, "eval_steps_per_second": 6.164, "step": 318 }, { "epoch": 5.02, "grad_norm": 0.4521535336971283, "learning_rate": 0.0024761904761904764, "loss": 0.1657, "step": 320 }, { "epoch": 5.18, "grad_norm": 1.030579924583435, "learning_rate": 0.0024047619047619048, "loss": 0.1658, "step": 330 }, { "epoch": 5.33, "grad_norm": 0.7163300514221191, "learning_rate": 0.0023253968253968255, "loss": 0.142, "step": 340 }, { "epoch": 5.49, "grad_norm": 0.45034098625183105, "learning_rate": 0.0022460317460317463, "loss": 0.1319, "step": 350 }, { "epoch": 5.65, "grad_norm": 0.8648952841758728, "learning_rate": 0.002166666666666667, "loss": 0.1621, "step": 360 }, { "epoch": 5.8, "grad_norm": 1.5358220338821411, "learning_rate": 0.0020873015873015873, "loss": 0.1747, "step": 370 }, { "epoch": 5.96, "grad_norm": 4.473792552947998, "learning_rate": 0.002007936507936508, "loss": 0.1485, "step": 380 }, { "epoch": 5.99, "eval_accuracy": 0.9716738197424892, "eval_f1": 0.9637751828201366, "eval_loss": 0.08252137899398804, "eval_precision": 0.9578261710612463, "eval_recall": 0.9701960694418974, "eval_runtime": 12.3059, "eval_samples_per_second": 94.67, "eval_steps_per_second": 5.932, "step": 382 }, { "epoch": 6.12, "grad_norm": 0.918424665927887, "learning_rate": 0.0019285714285714288, "loss": 0.1477, "step": 390 }, { "epoch": 6.27, "grad_norm": 0.2846250534057617, "learning_rate": 0.0018492063492063493, "loss": 0.107, "step": 400 }, { "epoch": 6.43, "grad_norm": 1.0383787155151367, "learning_rate": 0.00176984126984127, "loss": 0.1429, "step": 410 }, { "epoch": 6.59, "grad_norm": 0.5193153619766235, "learning_rate": 0.0016904761904761906, "loss": 0.1611, "step": 420 }, { "epoch": 6.75, "grad_norm": 0.6372103095054626, "learning_rate": 0.0016111111111111113, "loss": 0.1602, "step": 430 }, { "epoch": 6.9, "grad_norm": 0.4708787202835083, "learning_rate": 0.0015317460317460319, "loss": 0.1854, "step": 440 }, { "epoch": 7.0, "eval_accuracy": 0.9716738197424892, "eval_f1": 0.9637751828201366, "eval_loss": 0.08776707202196121, "eval_precision": 0.9578261710612463, "eval_recall": 0.9701960694418974, "eval_runtime": 11.6697, "eval_samples_per_second": 99.831, "eval_steps_per_second": 6.256, "step": 446 }, { "epoch": 7.06, "grad_norm": 1.3629688024520874, "learning_rate": 0.0014523809523809526, "loss": 0.2118, "step": 450 }, { "epoch": 7.22, "grad_norm": 0.5590702295303345, "learning_rate": 0.0013730158730158731, "loss": 0.1608, "step": 460 }, { "epoch": 7.37, "grad_norm": 1.0309607982635498, "learning_rate": 0.0012936507936507939, "loss": 0.1684, "step": 470 }, { "epoch": 7.53, "grad_norm": 0.41553324460983276, "learning_rate": 0.0012142857142857144, "loss": 0.116, "step": 480 }, { "epoch": 7.69, "grad_norm": 0.3408653736114502, "learning_rate": 0.001134920634920635, "loss": 0.1188, "step": 490 }, { "epoch": 7.84, "grad_norm": 0.4753941595554352, "learning_rate": 0.0010555555555555557, "loss": 0.1345, "step": 500 }, { "epoch": 8.0, "grad_norm": 0.5954430103302002, "learning_rate": 0.0009761904761904762, "loss": 0.1353, "step": 510 }, { "epoch": 8.0, "eval_accuracy": 0.9587982832618026, "eval_f1": 0.9483988129512335, "eval_loss": 0.10603910684585571, "eval_precision": 0.9351169479762169, "eval_recall": 0.9646773182957393, "eval_runtime": 11.7084, "eval_samples_per_second": 99.501, "eval_steps_per_second": 6.235, "step": 510 }, { "epoch": 8.16, "grad_norm": 0.35974177718162537, "learning_rate": 0.0008968253968253968, "loss": 0.1192, "step": 520 }, { "epoch": 8.31, "grad_norm": 0.4420110285282135, "learning_rate": 0.0008174603174603175, "loss": 0.1342, "step": 530 }, { "epoch": 8.47, "grad_norm": 0.83766108751297, "learning_rate": 0.0007380952380952381, "loss": 0.1206, "step": 540 }, { "epoch": 8.63, "grad_norm": 0.488089919090271, "learning_rate": 0.0006587301587301587, "loss": 0.1288, "step": 550 }, { "epoch": 8.78, "grad_norm": 0.48352861404418945, "learning_rate": 0.0005793650793650794, "loss": 0.1376, "step": 560 }, { "epoch": 8.94, "grad_norm": 0.504624605178833, "learning_rate": 0.0005, "loss": 0.1196, "step": 570 }, { "epoch": 8.99, "eval_accuracy": 0.9690987124463519, "eval_f1": 0.9606826261080863, "eval_loss": 0.08817815780639648, "eval_precision": 0.9527364558696758, "eval_recall": 0.9695179259123419, "eval_runtime": 11.6769, "eval_samples_per_second": 99.77, "eval_steps_per_second": 6.252, "step": 573 }, { "epoch": 9.1, "grad_norm": 0.22534619271755219, "learning_rate": 0.0004206349206349207, "loss": 0.1083, "step": 580 }, { "epoch": 9.25, "grad_norm": 0.642715573310852, "learning_rate": 0.0003412698412698412, "loss": 0.095, "step": 590 }, { "epoch": 9.41, "grad_norm": 0.5458790063858032, "learning_rate": 0.0002619047619047619, "loss": 0.1413, "step": 600 }, { "epoch": 9.57, "grad_norm": 0.44277364015579224, "learning_rate": 0.00018253968253968255, "loss": 0.1277, "step": 610 }, { "epoch": 9.73, "grad_norm": 0.5391418933868408, "learning_rate": 0.00010317460317460317, "loss": 0.1289, "step": 620 }, { "epoch": 9.88, "grad_norm": 0.3602410554885864, "learning_rate": 2.3809523809523814e-05, "loss": 0.1218, "step": 630 }, { "epoch": 9.88, "eval_accuracy": 0.9639484978540772, "eval_f1": 0.9547614644970415, "eval_loss": 0.09816381335258484, "eval_precision": 0.9419247946464473, "eval_recall": 0.9702896723516108, "eval_runtime": 11.5253, "eval_samples_per_second": 101.082, "eval_steps_per_second": 6.334, "step": 630 }, { "epoch": 9.88, "step": 630, "total_flos": 3.1439051980091965e+18, "train_loss": 0.16921042270130582, "train_runtime": 727.4236, "train_samples_per_second": 56.047, "train_steps_per_second": 0.866 } ], "logging_steps": 10, "max_steps": 630, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.1439051980091965e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }