|
{ |
|
"best_metric": 0.7926221335992024, |
|
"best_model_checkpoint": "swin-large-patch4-window7-224-in22k-finetuned-lora-medmnistv2/checkpoint-985", |
|
"epoch": 9.954337899543379, |
|
"eval_steps": 500, |
|
"global_step": 1090, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.442108154296875, |
|
"learning_rate": 0.004954128440366973, |
|
"loss": 1.1496, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.3745663166046143, |
|
"learning_rate": 0.004908256880733945, |
|
"loss": 0.9658, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.6516236066818237, |
|
"learning_rate": 0.004862385321100918, |
|
"loss": 0.9225, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.4029324054718018, |
|
"learning_rate": 0.00481651376146789, |
|
"loss": 0.8968, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.2768313884735107, |
|
"learning_rate": 0.0047706422018348625, |
|
"loss": 0.8582, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.250333309173584, |
|
"learning_rate": 0.004724770642201835, |
|
"loss": 0.8362, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.2696894407272339, |
|
"learning_rate": 0.004678899082568808, |
|
"loss": 0.8948, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 1.02017080783844, |
|
"learning_rate": 0.00463302752293578, |
|
"loss": 0.8448, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 1.1102752685546875, |
|
"learning_rate": 0.0045871559633027525, |
|
"loss": 0.8288, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.8990870714187622, |
|
"learning_rate": 0.004541284403669725, |
|
"loss": 0.8516, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7547357926221336, |
|
"eval_f1": 0.41878801809381894, |
|
"eval_loss": 0.7470152974128723, |
|
"eval_precision": 0.5455908887866956, |
|
"eval_recall": 0.3913939658670338, |
|
"eval_runtime": 11.5403, |
|
"eval_samples_per_second": 86.913, |
|
"eval_steps_per_second": 5.459, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1006461381912231, |
|
"learning_rate": 0.004495412844036698, |
|
"loss": 0.8742, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.271594524383545, |
|
"learning_rate": 0.0044495412844036695, |
|
"loss": 0.8544, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 1.24767005443573, |
|
"learning_rate": 0.004403669724770643, |
|
"loss": 0.792, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.5129131078720093, |
|
"learning_rate": 0.004357798165137615, |
|
"loss": 0.8107, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.728676974773407, |
|
"learning_rate": 0.004311926605504587, |
|
"loss": 0.7921, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.073712944984436, |
|
"learning_rate": 0.0042660550458715595, |
|
"loss": 0.7453, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.1818609237670898, |
|
"learning_rate": 0.004220183486238533, |
|
"loss": 0.8023, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.6278601884841919, |
|
"learning_rate": 0.004174311926605505, |
|
"loss": 0.8196, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 0.796804666519165, |
|
"learning_rate": 0.004128440366972477, |
|
"loss": 0.8063, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.4323725700378418, |
|
"learning_rate": 0.00408256880733945, |
|
"loss": 0.7128, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.5659187436103821, |
|
"learning_rate": 0.004036697247706422, |
|
"loss": 0.7738, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7168494516450648, |
|
"eval_f1": 0.3577314775338443, |
|
"eval_loss": 0.8952543139457703, |
|
"eval_precision": 0.42246935817657555, |
|
"eval_recall": 0.4458814441250039, |
|
"eval_runtime": 11.2901, |
|
"eval_samples_per_second": 88.839, |
|
"eval_steps_per_second": 5.58, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 2.2657077312469482, |
|
"learning_rate": 0.003990825688073394, |
|
"loss": 0.8065, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 0.6983966827392578, |
|
"learning_rate": 0.003944954128440367, |
|
"loss": 0.8244, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.9509605765342712, |
|
"learning_rate": 0.0038990825688073397, |
|
"loss": 0.7429, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 0.8736411333084106, |
|
"learning_rate": 0.0038532110091743124, |
|
"loss": 0.7479, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 1.0205260515213013, |
|
"learning_rate": 0.0038073394495412843, |
|
"loss": 0.7733, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 0.9213468432426453, |
|
"learning_rate": 0.003761467889908257, |
|
"loss": 0.6993, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.7223290801048279, |
|
"learning_rate": 0.0037155963302752293, |
|
"loss": 0.7582, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.8736669421195984, |
|
"learning_rate": 0.003669724770642202, |
|
"loss": 0.7483, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 1.2014636993408203, |
|
"learning_rate": 0.0036238532110091743, |
|
"loss": 0.7592, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 1.290935754776001, |
|
"learning_rate": 0.003577981651376147, |
|
"loss": 0.819, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 0.751524806022644, |
|
"learning_rate": 0.0035321100917431194, |
|
"loss": 0.6994, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7607178464606181, |
|
"eval_f1": 0.5104809166351773, |
|
"eval_loss": 0.6592820286750793, |
|
"eval_precision": 0.6256931769842607, |
|
"eval_recall": 0.5058522052667251, |
|
"eval_runtime": 11.3494, |
|
"eval_samples_per_second": 88.375, |
|
"eval_steps_per_second": 5.551, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 1.0127859115600586, |
|
"learning_rate": 0.003486238532110092, |
|
"loss": 0.723, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 0.8557056784629822, |
|
"learning_rate": 0.0034403669724770644, |
|
"loss": 0.762, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 0.7101117968559265, |
|
"learning_rate": 0.003394495412844037, |
|
"loss": 0.746, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"grad_norm": 0.732359766960144, |
|
"learning_rate": 0.003348623853211009, |
|
"loss": 0.7152, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 0.7687917947769165, |
|
"learning_rate": 0.0033027522935779817, |
|
"loss": 0.6849, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"grad_norm": 0.9754200577735901, |
|
"learning_rate": 0.003256880733944954, |
|
"loss": 0.721, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 0.9926655888557434, |
|
"learning_rate": 0.003211009174311927, |
|
"loss": 0.7457, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 1.4650967121124268, |
|
"learning_rate": 0.003165137614678899, |
|
"loss": 0.7209, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 0.7933741211891174, |
|
"learning_rate": 0.003119266055045872, |
|
"loss": 0.7785, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 0.7204951047897339, |
|
"learning_rate": 0.003073394495412844, |
|
"loss": 0.7276, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"grad_norm": 1.0182371139526367, |
|
"learning_rate": 0.003027522935779817, |
|
"loss": 0.6731, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7716849451645065, |
|
"eval_f1": 0.5382789492033663, |
|
"eval_loss": 0.6144729852676392, |
|
"eval_precision": 0.6321625904242083, |
|
"eval_recall": 0.500050058598068, |
|
"eval_runtime": 11.305, |
|
"eval_samples_per_second": 88.722, |
|
"eval_steps_per_second": 5.573, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"grad_norm": 0.5864942669868469, |
|
"learning_rate": 0.002981651376146789, |
|
"loss": 0.685, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"grad_norm": 0.816683292388916, |
|
"learning_rate": 0.002935779816513762, |
|
"loss": 0.6859, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"grad_norm": 0.6105664968490601, |
|
"learning_rate": 0.0028899082568807338, |
|
"loss": 0.6785, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"grad_norm": 1.0114610195159912, |
|
"learning_rate": 0.0028440366972477065, |
|
"loss": 0.6661, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 1.1204463243484497, |
|
"learning_rate": 0.002798165137614679, |
|
"loss": 0.664, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 1.0051954984664917, |
|
"learning_rate": 0.0027522935779816515, |
|
"loss": 0.7172, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"grad_norm": 0.6869407892227173, |
|
"learning_rate": 0.002706422018348624, |
|
"loss": 0.7341, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"grad_norm": 0.720870316028595, |
|
"learning_rate": 0.0026605504587155966, |
|
"loss": 0.7028, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"grad_norm": 0.9448994994163513, |
|
"learning_rate": 0.002614678899082569, |
|
"loss": 0.7383, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"grad_norm": 0.9630647897720337, |
|
"learning_rate": 0.0025688073394495416, |
|
"loss": 0.6727, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"grad_norm": 0.8725408315658569, |
|
"learning_rate": 0.0025229357798165135, |
|
"loss": 0.7266, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7397806580259222, |
|
"eval_f1": 0.4934894169090878, |
|
"eval_loss": 0.6838864088058472, |
|
"eval_precision": 0.5519819307791096, |
|
"eval_recall": 0.5344054167239178, |
|
"eval_runtime": 11.2776, |
|
"eval_samples_per_second": 88.937, |
|
"eval_steps_per_second": 5.586, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"grad_norm": 1.1792114973068237, |
|
"learning_rate": 0.0024770642201834866, |
|
"loss": 0.6676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"grad_norm": 0.7483982443809509, |
|
"learning_rate": 0.002431192660550459, |
|
"loss": 0.7229, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 0.8732199668884277, |
|
"learning_rate": 0.0023853211009174312, |
|
"loss": 0.6366, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"grad_norm": 1.1174126863479614, |
|
"learning_rate": 0.002339449541284404, |
|
"loss": 0.6855, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"grad_norm": 0.7935824990272522, |
|
"learning_rate": 0.0022935779816513763, |
|
"loss": 0.7225, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"grad_norm": 0.9188850522041321, |
|
"learning_rate": 0.002247706422018349, |
|
"loss": 0.6657, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"grad_norm": 0.7955396771430969, |
|
"learning_rate": 0.0022018348623853213, |
|
"loss": 0.6797, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"grad_norm": 1.0317906141281128, |
|
"learning_rate": 0.0021559633027522936, |
|
"loss": 0.6589, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"grad_norm": 0.6561917662620544, |
|
"learning_rate": 0.0021100917431192663, |
|
"loss": 0.6435, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"grad_norm": 0.8070980906486511, |
|
"learning_rate": 0.0020642201834862386, |
|
"loss": 0.6303, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 0.6900261044502258, |
|
"learning_rate": 0.002018348623853211, |
|
"loss": 0.6388, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7666999002991027, |
|
"eval_f1": 0.5338252978902394, |
|
"eval_loss": 0.6242878437042236, |
|
"eval_precision": 0.6116740127093266, |
|
"eval_recall": 0.5062826891866704, |
|
"eval_runtime": 11.256, |
|
"eval_samples_per_second": 89.108, |
|
"eval_steps_per_second": 5.597, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"grad_norm": 0.8859825134277344, |
|
"learning_rate": 0.0019724770642201837, |
|
"loss": 0.6252, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"grad_norm": 0.7855720520019531, |
|
"learning_rate": 0.0019266055045871562, |
|
"loss": 0.6026, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"grad_norm": 0.7128086090087891, |
|
"learning_rate": 0.0018807339449541285, |
|
"loss": 0.5799, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"grad_norm": 0.6269203424453735, |
|
"learning_rate": 0.001834862385321101, |
|
"loss": 0.6358, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"grad_norm": 1.1196659803390503, |
|
"learning_rate": 0.0017889908256880735, |
|
"loss": 0.6282, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"grad_norm": 1.253045678138733, |
|
"learning_rate": 0.001743119266055046, |
|
"loss": 0.6109, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"grad_norm": 0.946592390537262, |
|
"learning_rate": 0.0016972477064220186, |
|
"loss": 0.58, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 0.9241579174995422, |
|
"learning_rate": 0.0016513761467889909, |
|
"loss": 0.6492, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"grad_norm": 0.9794093370437622, |
|
"learning_rate": 0.0016055045871559634, |
|
"loss": 0.6333, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"grad_norm": 0.9750506281852722, |
|
"learning_rate": 0.001559633027522936, |
|
"loss": 0.6883, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"grad_norm": 1.1783692836761475, |
|
"learning_rate": 0.0015137614678899084, |
|
"loss": 0.6495, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7826520438683948, |
|
"eval_f1": 0.6162957297860807, |
|
"eval_loss": 0.6161015033721924, |
|
"eval_precision": 0.6357243111624864, |
|
"eval_recall": 0.6152698884549003, |
|
"eval_runtime": 11.1928, |
|
"eval_samples_per_second": 89.611, |
|
"eval_steps_per_second": 5.629, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"grad_norm": 1.0667985677719116, |
|
"learning_rate": 0.001467889908256881, |
|
"loss": 0.6615, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"grad_norm": 1.2581771612167358, |
|
"learning_rate": 0.0014220183486238532, |
|
"loss": 0.5515, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"grad_norm": 1.012791395187378, |
|
"learning_rate": 0.0013761467889908258, |
|
"loss": 0.607, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"grad_norm": 0.9969860911369324, |
|
"learning_rate": 0.0013302752293577983, |
|
"loss": 0.6396, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"grad_norm": 0.9849869608879089, |
|
"learning_rate": 0.0012844036697247708, |
|
"loss": 0.6563, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"grad_norm": 1.0733628273010254, |
|
"learning_rate": 0.0012385321100917433, |
|
"loss": 0.5833, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"grad_norm": 1.044959306716919, |
|
"learning_rate": 0.0011926605504587156, |
|
"loss": 0.5745, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"grad_norm": 0.8886712193489075, |
|
"learning_rate": 0.0011467889908256881, |
|
"loss": 0.5437, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"grad_norm": 1.2174370288848877, |
|
"learning_rate": 0.0011009174311926607, |
|
"loss": 0.5654, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"grad_norm": 0.8696100115776062, |
|
"learning_rate": 0.0010550458715596332, |
|
"loss": 0.6165, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"grad_norm": 1.2088637351989746, |
|
"learning_rate": 0.0010091743119266055, |
|
"loss": 0.5639, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7836490528414756, |
|
"eval_f1": 0.5930825856311631, |
|
"eval_loss": 0.5751714706420898, |
|
"eval_precision": 0.601839864113542, |
|
"eval_recall": 0.59116309877434, |
|
"eval_runtime": 11.2311, |
|
"eval_samples_per_second": 89.305, |
|
"eval_steps_per_second": 5.609, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"grad_norm": 0.6919325590133667, |
|
"learning_rate": 0.0009633027522935781, |
|
"loss": 0.6158, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"grad_norm": 1.1032708883285522, |
|
"learning_rate": 0.0009174311926605505, |
|
"loss": 0.4922, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"grad_norm": 1.5484873056411743, |
|
"learning_rate": 0.000871559633027523, |
|
"loss": 0.5453, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"grad_norm": 1.1881980895996094, |
|
"learning_rate": 0.0008256880733944954, |
|
"loss": 0.6307, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"grad_norm": 0.80064457654953, |
|
"learning_rate": 0.000779816513761468, |
|
"loss": 0.5129, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"grad_norm": 0.7097495794296265, |
|
"learning_rate": 0.0007339449541284405, |
|
"loss": 0.6111, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"grad_norm": 1.0539182424545288, |
|
"learning_rate": 0.0006880733944954129, |
|
"loss": 0.5803, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"grad_norm": 1.1059874296188354, |
|
"learning_rate": 0.0006422018348623854, |
|
"loss": 0.6122, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"grad_norm": 0.969431459903717, |
|
"learning_rate": 0.0005963302752293578, |
|
"loss": 0.5662, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"grad_norm": 0.9566834568977356, |
|
"learning_rate": 0.0005504587155963303, |
|
"loss": 0.5317, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"grad_norm": 1.040589690208435, |
|
"learning_rate": 0.0005045871559633027, |
|
"loss": 0.6012, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7926221335992024, |
|
"eval_f1": 0.6175857153722378, |
|
"eval_loss": 0.5508496165275574, |
|
"eval_precision": 0.6303209720740276, |
|
"eval_recall": 0.6194980716058, |
|
"eval_runtime": 11.1597, |
|
"eval_samples_per_second": 89.877, |
|
"eval_steps_per_second": 5.645, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"grad_norm": 0.8962224721908569, |
|
"learning_rate": 0.00045871559633027525, |
|
"loss": 0.5106, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"grad_norm": 0.9521830677986145, |
|
"learning_rate": 0.0004128440366972477, |
|
"loss": 0.5768, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"grad_norm": 1.102037787437439, |
|
"learning_rate": 0.00036697247706422024, |
|
"loss": 0.5709, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"grad_norm": 1.3637118339538574, |
|
"learning_rate": 0.0003211009174311927, |
|
"loss": 0.4719, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"grad_norm": 0.8370681405067444, |
|
"learning_rate": 0.00027522935779816516, |
|
"loss": 0.5717, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"grad_norm": 1.0408118963241577, |
|
"learning_rate": 0.00022935779816513763, |
|
"loss": 0.5216, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"grad_norm": 1.2250603437423706, |
|
"learning_rate": 0.00018348623853211012, |
|
"loss": 0.5648, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"grad_norm": 0.9561938047409058, |
|
"learning_rate": 0.00013761467889908258, |
|
"loss": 0.5316, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"grad_norm": 1.2182472944259644, |
|
"learning_rate": 9.174311926605506e-05, |
|
"loss": 0.5773, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"grad_norm": 1.1085007190704346, |
|
"learning_rate": 4.587155963302753e-05, |
|
"loss": 0.5777, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"grad_norm": 0.9443553686141968, |
|
"learning_rate": 0.0, |
|
"loss": 0.5468, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.7856430707876371, |
|
"eval_f1": 0.6355311291077588, |
|
"eval_loss": 0.566506028175354, |
|
"eval_precision": 0.6470430666433826, |
|
"eval_recall": 0.6287911299620902, |
|
"eval_runtime": 11.1527, |
|
"eval_samples_per_second": 89.934, |
|
"eval_steps_per_second": 5.649, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"step": 1090, |
|
"total_flos": 1.2358226368562393e+19, |
|
"train_loss": 0.6880708248243419, |
|
"train_runtime": 1791.2534, |
|
"train_samples_per_second": 39.118, |
|
"train_steps_per_second": 0.609 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1090, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.2358226368562393e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|