|
{ |
|
"best_metric": 0.9716738197424892, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-lora-medmnistv2/checkpoint-382", |
|
"epoch": 9.882352941176471, |
|
"eval_steps": 500, |
|
"global_step": 630, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.3673616647720337, |
|
"learning_rate": 0.004920634920634921, |
|
"loss": 0.4969, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.564309298992157, |
|
"learning_rate": 0.004841269841269842, |
|
"loss": 0.3486, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.83164381980896, |
|
"learning_rate": 0.0047619047619047615, |
|
"loss": 0.2505, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.7283287644386292, |
|
"learning_rate": 0.004682539682539683, |
|
"loss": 0.2221, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.3768545687198639, |
|
"learning_rate": 0.004603174603174603, |
|
"loss": 0.2593, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.4381789267063141, |
|
"learning_rate": 0.004523809523809524, |
|
"loss": 0.211, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.9605150214592275, |
|
"eval_f1": 0.9500622474690998, |
|
"eval_loss": 0.11396056413650513, |
|
"eval_precision": 0.9400752542471662, |
|
"eval_recall": 0.9615826914848096, |
|
"eval_runtime": 11.8657, |
|
"eval_samples_per_second": 98.182, |
|
"eval_steps_per_second": 6.152, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.8330244421958923, |
|
"learning_rate": 0.0044444444444444444, |
|
"loss": 0.2013, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.0441814661026, |
|
"learning_rate": 0.004365079365079365, |
|
"loss": 0.1882, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.8081804513931274, |
|
"learning_rate": 0.004285714285714286, |
|
"loss": 0.2045, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.49920302629470825, |
|
"learning_rate": 0.004206349206349207, |
|
"loss": 0.194, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.39656010270118713, |
|
"learning_rate": 0.0041269841269841265, |
|
"loss": 0.1918, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.4606592655181885, |
|
"learning_rate": 0.004047619047619048, |
|
"loss": 0.1911, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.9330472103004291, |
|
"eval_f1": 0.9185857738786393, |
|
"eval_loss": 0.151698499917984, |
|
"eval_precision": 0.8989291177970423, |
|
"eval_recall": 0.948319732257473, |
|
"eval_runtime": 11.9483, |
|
"eval_samples_per_second": 97.504, |
|
"eval_steps_per_second": 6.11, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 0.6359087824821472, |
|
"learning_rate": 0.003976190476190476, |
|
"loss": 0.1652, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.439413845539093, |
|
"learning_rate": 0.003896825396825397, |
|
"loss": 0.1795, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.9134456515312195, |
|
"learning_rate": 0.003817460317460317, |
|
"loss": 0.2155, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 1.381035566329956, |
|
"learning_rate": 0.0037380952380952383, |
|
"loss": 0.218, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 0.7177550196647644, |
|
"learning_rate": 0.0036587301587301586, |
|
"loss": 0.1839, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 0.7977726459503174, |
|
"learning_rate": 0.0035793650793650793, |
|
"loss": 0.1595, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 1.0747120380401611, |
|
"learning_rate": 0.0034999999999999996, |
|
"loss": 0.1695, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9579399141630901, |
|
"eval_f1": 0.9470671089099915, |
|
"eval_loss": 0.11627380549907684, |
|
"eval_precision": 0.9354318260568261, |
|
"eval_recall": 0.960904547955254, |
|
"eval_runtime": 11.7288, |
|
"eval_samples_per_second": 99.328, |
|
"eval_steps_per_second": 6.224, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 0.328319787979126, |
|
"learning_rate": 0.003420634920634921, |
|
"loss": 0.2, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 0.33479979634284973, |
|
"learning_rate": 0.003341269841269841, |
|
"loss": 0.1086, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.2731302082538605, |
|
"learning_rate": 0.003261904761904762, |
|
"loss": 0.1782, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 0.507108211517334, |
|
"learning_rate": 0.0031825396825396826, |
|
"loss": 0.1869, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"grad_norm": 2.0676915645599365, |
|
"learning_rate": 0.0031031746031746034, |
|
"loss": 0.1438, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 0.9014037847518921, |
|
"learning_rate": 0.0030317460317460317, |
|
"loss": 0.1556, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9570815450643777, |
|
"eval_f1": 0.9416580530916725, |
|
"eval_loss": 0.11594364047050476, |
|
"eval_precision": 0.9668957588361103, |
|
"eval_recall": 0.9220192248914971, |
|
"eval_runtime": 11.9006, |
|
"eval_samples_per_second": 97.894, |
|
"eval_steps_per_second": 6.134, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"grad_norm": 0.5203756093978882, |
|
"learning_rate": 0.0029523809523809524, |
|
"loss": 0.1301, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"grad_norm": 0.2871193587779999, |
|
"learning_rate": 0.0028730158730158727, |
|
"loss": 0.1614, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"grad_norm": 0.5260260701179504, |
|
"learning_rate": 0.002793650793650794, |
|
"loss": 0.1536, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 1.3978557586669922, |
|
"learning_rate": 0.0027142857142857142, |
|
"loss": 0.1558, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"grad_norm": 0.3816111385822296, |
|
"learning_rate": 0.002634920634920635, |
|
"loss": 0.1393, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"grad_norm": 0.8400008678436279, |
|
"learning_rate": 0.0025555555555555553, |
|
"loss": 0.173, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9502145922746781, |
|
"eval_f1": 0.9381227106227107, |
|
"eval_loss": 0.11663959920406342, |
|
"eval_precision": 0.9228616598558795, |
|
"eval_recall": 0.9578061006173972, |
|
"eval_runtime": 11.8429, |
|
"eval_samples_per_second": 98.371, |
|
"eval_steps_per_second": 6.164, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 0.4521535336971283, |
|
"learning_rate": 0.0024761904761904764, |
|
"loss": 0.1657, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"grad_norm": 1.030579924583435, |
|
"learning_rate": 0.0024047619047619048, |
|
"loss": 0.1658, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 0.7163300514221191, |
|
"learning_rate": 0.0023253968253968255, |
|
"loss": 0.142, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"grad_norm": 0.45034098625183105, |
|
"learning_rate": 0.0022460317460317463, |
|
"loss": 0.1319, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"grad_norm": 0.8648952841758728, |
|
"learning_rate": 0.002166666666666667, |
|
"loss": 0.1621, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"grad_norm": 1.5358220338821411, |
|
"learning_rate": 0.0020873015873015873, |
|
"loss": 0.1747, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"grad_norm": 4.473792552947998, |
|
"learning_rate": 0.002007936507936508, |
|
"loss": 0.1485, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.9716738197424892, |
|
"eval_f1": 0.9637751828201366, |
|
"eval_loss": 0.08252137899398804, |
|
"eval_precision": 0.9578261710612463, |
|
"eval_recall": 0.9701960694418974, |
|
"eval_runtime": 12.3059, |
|
"eval_samples_per_second": 94.67, |
|
"eval_steps_per_second": 5.932, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.918424665927887, |
|
"learning_rate": 0.0019285714285714288, |
|
"loss": 0.1477, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"grad_norm": 0.2846250534057617, |
|
"learning_rate": 0.0018492063492063493, |
|
"loss": 0.107, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"grad_norm": 1.0383787155151367, |
|
"learning_rate": 0.00176984126984127, |
|
"loss": 0.1429, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"grad_norm": 0.5193153619766235, |
|
"learning_rate": 0.0016904761904761906, |
|
"loss": 0.1611, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"grad_norm": 0.6372103095054626, |
|
"learning_rate": 0.0016111111111111113, |
|
"loss": 0.1602, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"grad_norm": 0.4708787202835083, |
|
"learning_rate": 0.0015317460317460319, |
|
"loss": 0.1854, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9716738197424892, |
|
"eval_f1": 0.9637751828201366, |
|
"eval_loss": 0.08776707202196121, |
|
"eval_precision": 0.9578261710612463, |
|
"eval_recall": 0.9701960694418974, |
|
"eval_runtime": 11.6697, |
|
"eval_samples_per_second": 99.831, |
|
"eval_steps_per_second": 6.256, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"grad_norm": 1.3629688024520874, |
|
"learning_rate": 0.0014523809523809526, |
|
"loss": 0.2118, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"grad_norm": 0.5590702295303345, |
|
"learning_rate": 0.0013730158730158731, |
|
"loss": 0.1608, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"grad_norm": 1.0309607982635498, |
|
"learning_rate": 0.0012936507936507939, |
|
"loss": 0.1684, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"grad_norm": 0.41553324460983276, |
|
"learning_rate": 0.0012142857142857144, |
|
"loss": 0.116, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"grad_norm": 0.3408653736114502, |
|
"learning_rate": 0.001134920634920635, |
|
"loss": 0.1188, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"grad_norm": 0.4753941595554352, |
|
"learning_rate": 0.0010555555555555557, |
|
"loss": 0.1345, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.5954430103302002, |
|
"learning_rate": 0.0009761904761904762, |
|
"loss": 0.1353, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9587982832618026, |
|
"eval_f1": 0.9483988129512335, |
|
"eval_loss": 0.10603910684585571, |
|
"eval_precision": 0.9351169479762169, |
|
"eval_recall": 0.9646773182957393, |
|
"eval_runtime": 11.7084, |
|
"eval_samples_per_second": 99.501, |
|
"eval_steps_per_second": 6.235, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"grad_norm": 0.35974177718162537, |
|
"learning_rate": 0.0008968253968253968, |
|
"loss": 0.1192, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 0.4420110285282135, |
|
"learning_rate": 0.0008174603174603175, |
|
"loss": 0.1342, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"grad_norm": 0.83766108751297, |
|
"learning_rate": 0.0007380952380952381, |
|
"loss": 0.1206, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"grad_norm": 0.488089919090271, |
|
"learning_rate": 0.0006587301587301587, |
|
"loss": 0.1288, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"grad_norm": 0.48352861404418945, |
|
"learning_rate": 0.0005793650793650794, |
|
"loss": 0.1376, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"grad_norm": 0.504624605178833, |
|
"learning_rate": 0.0005, |
|
"loss": 0.1196, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9690987124463519, |
|
"eval_f1": 0.9606826261080863, |
|
"eval_loss": 0.08817815780639648, |
|
"eval_precision": 0.9527364558696758, |
|
"eval_recall": 0.9695179259123419, |
|
"eval_runtime": 11.6769, |
|
"eval_samples_per_second": 99.77, |
|
"eval_steps_per_second": 6.252, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.22534619271755219, |
|
"learning_rate": 0.0004206349206349207, |
|
"loss": 0.1083, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"grad_norm": 0.642715573310852, |
|
"learning_rate": 0.0003412698412698412, |
|
"loss": 0.095, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.5458790063858032, |
|
"learning_rate": 0.0002619047619047619, |
|
"loss": 0.1413, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"grad_norm": 0.44277364015579224, |
|
"learning_rate": 0.00018253968253968255, |
|
"loss": 0.1277, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"grad_norm": 0.5391418933868408, |
|
"learning_rate": 0.00010317460317460317, |
|
"loss": 0.1289, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"grad_norm": 0.3602410554885864, |
|
"learning_rate": 2.3809523809523814e-05, |
|
"loss": 0.1218, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_accuracy": 0.9639484978540772, |
|
"eval_f1": 0.9547614644970415, |
|
"eval_loss": 0.09816381335258484, |
|
"eval_precision": 0.9419247946464473, |
|
"eval_recall": 0.9702896723516108, |
|
"eval_runtime": 11.5253, |
|
"eval_samples_per_second": 101.082, |
|
"eval_steps_per_second": 6.334, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"step": 630, |
|
"total_flos": 3.1439051980091965e+18, |
|
"train_loss": 0.16921042270130582, |
|
"train_runtime": 727.4236, |
|
"train_samples_per_second": 56.047, |
|
"train_steps_per_second": 0.866 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 630, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.1439051980091965e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|