|
{ |
|
"best_metric": 0.15389865636825562, |
|
"best_model_checkpoint": "mobilevitv2-1.0-imagenet1k-256-finetuned_v2024-10-21-frost/checkpoint-1000", |
|
"epoch": 30.0, |
|
"eval_steps": 100, |
|
"global_step": 1710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 0.3124828040599823, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.6955, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 0.24917739629745483, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.6942, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.2268371284008026, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.6939, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.2435961812734604, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.6918, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 0.24638999998569489, |
|
"learning_rate": 5.847953216374269e-05, |
|
"loss": 0.6889, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.2426590472459793, |
|
"learning_rate": 7.017543859649122e-05, |
|
"loss": 0.6854, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.2280701754385965, |
|
"grad_norm": 0.26534757018089294, |
|
"learning_rate": 8.187134502923976e-05, |
|
"loss": 0.6803, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 0.2573549449443817, |
|
"learning_rate": 9.35672514619883e-05, |
|
"loss": 0.6763, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.2639031410217285, |
|
"learning_rate": 0.00010526315789473685, |
|
"loss": 0.6701, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.26114630699157715, |
|
"learning_rate": 0.00011695906432748539, |
|
"loss": 0.6635, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"eval_accuracy": 0.7604444444444445, |
|
"eval_f1": 0.5705179282868525, |
|
"eval_loss": 0.6512863039970398, |
|
"eval_precision": 0.43552311435523117, |
|
"eval_recall": 0.8267898383371824, |
|
"eval_runtime": 2.9095, |
|
"eval_samples_per_second": 77.332, |
|
"eval_steps_per_second": 9.967, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9298245614035088, |
|
"grad_norm": 0.3371104896068573, |
|
"learning_rate": 0.0001286549707602339, |
|
"loss": 0.6502, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.31244638562202454, |
|
"learning_rate": 0.00014035087719298245, |
|
"loss": 0.6343, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.280701754385965, |
|
"grad_norm": 0.47065746784210205, |
|
"learning_rate": 0.00015204678362573098, |
|
"loss": 0.6161, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 0.41640815138816833, |
|
"learning_rate": 0.00016374269005847952, |
|
"loss": 0.588, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.34670090675354004, |
|
"learning_rate": 0.00017543859649122806, |
|
"loss": 0.5565, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 0.384328693151474, |
|
"learning_rate": 0.0001871345029239766, |
|
"loss": 0.5242, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"grad_norm": 0.4133964478969574, |
|
"learning_rate": 0.00019883040935672513, |
|
"loss": 0.5158, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 0.4693595767021179, |
|
"learning_rate": 0.00019883040935672513, |
|
"loss": 0.4658, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.41811782121658325, |
|
"learning_rate": 0.00019753086419753085, |
|
"loss": 0.4297, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 0.8540976643562317, |
|
"learning_rate": 0.00019623131903833657, |
|
"loss": 0.4461, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"eval_accuracy": 0.8768888888888889, |
|
"eval_f1": 0.729227761485826, |
|
"eval_loss": 0.3972250819206238, |
|
"eval_precision": 0.6322033898305085, |
|
"eval_recall": 0.8614318706697459, |
|
"eval_runtime": 1.766, |
|
"eval_samples_per_second": 127.406, |
|
"eval_steps_per_second": 16.421, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.6842105263157894, |
|
"grad_norm": 0.8259305357933044, |
|
"learning_rate": 0.0001949317738791423, |
|
"loss": 0.3914, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.8596491228070176, |
|
"grad_norm": 0.8546284437179565, |
|
"learning_rate": 0.00019363222871994802, |
|
"loss": 0.384, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.035087719298246, |
|
"grad_norm": 0.3827027678489685, |
|
"learning_rate": 0.00019233268356075374, |
|
"loss": 0.3497, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 0.6248043775558472, |
|
"learning_rate": 0.00019103313840155946, |
|
"loss": 0.3648, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"grad_norm": 0.5684685111045837, |
|
"learning_rate": 0.00018973359324236518, |
|
"loss": 0.3112, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.56140350877193, |
|
"grad_norm": 0.5080260634422302, |
|
"learning_rate": 0.0001884340480831709, |
|
"loss": 0.3059, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.7368421052631575, |
|
"grad_norm": 0.5282370448112488, |
|
"learning_rate": 0.0001871345029239766, |
|
"loss": 0.2922, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.912280701754386, |
|
"grad_norm": 0.7253307104110718, |
|
"learning_rate": 0.00018583495776478232, |
|
"loss": 0.2909, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.087719298245614, |
|
"grad_norm": 0.7058104276657104, |
|
"learning_rate": 0.00018453541260558804, |
|
"loss": 0.2922, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 1.1993378400802612, |
|
"learning_rate": 0.00018323586744639376, |
|
"loss": 0.2599, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"eval_accuracy": 0.9226666666666666, |
|
"eval_f1": 0.804932735426009, |
|
"eval_loss": 0.2404223531484604, |
|
"eval_precision": 0.7821350762527233, |
|
"eval_recall": 0.8290993071593533, |
|
"eval_runtime": 2.7313, |
|
"eval_samples_per_second": 82.378, |
|
"eval_steps_per_second": 10.618, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.43859649122807, |
|
"grad_norm": 0.8134835362434387, |
|
"learning_rate": 0.00018193632228719948, |
|
"loss": 0.2645, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.614035087719298, |
|
"grad_norm": 0.7742730975151062, |
|
"learning_rate": 0.0001806367771280052, |
|
"loss": 0.2345, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.7894736842105265, |
|
"grad_norm": 0.5191880464553833, |
|
"learning_rate": 0.00017933723196881092, |
|
"loss": 0.2504, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.964912280701754, |
|
"grad_norm": 0.7682189345359802, |
|
"learning_rate": 0.00017803768680961664, |
|
"loss": 0.2654, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.140350877192983, |
|
"grad_norm": 0.7704707384109497, |
|
"learning_rate": 0.00017673814165042236, |
|
"loss": 0.2431, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 0.9333469867706299, |
|
"learning_rate": 0.00017543859649122806, |
|
"loss": 0.2382, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.491228070175438, |
|
"grad_norm": 0.8412513136863708, |
|
"learning_rate": 0.00017413905133203378, |
|
"loss": 0.2207, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 0.7568041086196899, |
|
"learning_rate": 0.0001728395061728395, |
|
"loss": 0.2271, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.842105263157895, |
|
"grad_norm": 0.689445436000824, |
|
"learning_rate": 0.00017153996101364522, |
|
"loss": 0.2076, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 0.7390238046646118, |
|
"learning_rate": 0.00017024041585445094, |
|
"loss": 0.2074, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"eval_accuracy": 0.9346666666666666, |
|
"eval_f1": 0.8256227758007118, |
|
"eval_loss": 0.1941838562488556, |
|
"eval_precision": 0.848780487804878, |
|
"eval_recall": 0.8036951501154734, |
|
"eval_runtime": 1.7733, |
|
"eval_samples_per_second": 126.88, |
|
"eval_steps_per_second": 16.353, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.192982456140351, |
|
"grad_norm": 0.4645775258541107, |
|
"learning_rate": 0.00016894087069525666, |
|
"loss": 0.2233, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.368421052631579, |
|
"grad_norm": 0.6826916337013245, |
|
"learning_rate": 0.00016764132553606238, |
|
"loss": 0.1846, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.543859649122807, |
|
"grad_norm": 0.6299170851707458, |
|
"learning_rate": 0.0001663417803768681, |
|
"loss": 0.1807, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.719298245614035, |
|
"grad_norm": 0.40688008069992065, |
|
"learning_rate": 0.00016504223521767383, |
|
"loss": 0.1925, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.894736842105263, |
|
"grad_norm": 0.8310642242431641, |
|
"learning_rate": 0.00016374269005847952, |
|
"loss": 0.1906, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.070175438596491, |
|
"grad_norm": 0.7561126351356506, |
|
"learning_rate": 0.00016244314489928524, |
|
"loss": 0.2537, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.24561403508772, |
|
"grad_norm": 1.5505608320236206, |
|
"learning_rate": 0.00016114359974009096, |
|
"loss": 0.2134, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.421052631578947, |
|
"grad_norm": 0.5844523310661316, |
|
"learning_rate": 0.00015984405458089668, |
|
"loss": 0.1927, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.596491228070175, |
|
"grad_norm": 0.6846328377723694, |
|
"learning_rate": 0.0001585445094217024, |
|
"loss": 0.1843, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 0.5246126651763916, |
|
"learning_rate": 0.00015724496426250813, |
|
"loss": 0.167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"eval_accuracy": 0.9364444444444444, |
|
"eval_f1": 0.8354430379746836, |
|
"eval_loss": 0.17720411717891693, |
|
"eval_precision": 0.8325688073394495, |
|
"eval_recall": 0.8383371824480369, |
|
"eval_runtime": 2.7456, |
|
"eval_samples_per_second": 81.95, |
|
"eval_steps_per_second": 10.562, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.947368421052632, |
|
"grad_norm": 0.9557002782821655, |
|
"learning_rate": 0.00015594541910331385, |
|
"loss": 0.1752, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.12280701754386, |
|
"grad_norm": 1.115300178527832, |
|
"learning_rate": 0.00015464587394411957, |
|
"loss": 0.2, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.298245614035087, |
|
"grad_norm": 0.6540657877922058, |
|
"learning_rate": 0.00015334632878492526, |
|
"loss": 0.158, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.473684210526315, |
|
"grad_norm": 0.8491069078445435, |
|
"learning_rate": 0.00015204678362573098, |
|
"loss": 0.1813, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.649122807017545, |
|
"grad_norm": 1.3543705940246582, |
|
"learning_rate": 0.0001507472384665367, |
|
"loss": 0.1951, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"grad_norm": 0.8627998232841492, |
|
"learning_rate": 0.00014944769330734243, |
|
"loss": 0.1945, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.2822953462600708, |
|
"learning_rate": 0.00014814814814814815, |
|
"loss": 0.1591, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.175438596491228, |
|
"grad_norm": 0.6904670596122742, |
|
"learning_rate": 0.00014684860298895387, |
|
"loss": 0.1545, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.350877192982455, |
|
"grad_norm": 1.3155221939086914, |
|
"learning_rate": 0.0001455490578297596, |
|
"loss": 0.1385, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 0.8683547973632812, |
|
"learning_rate": 0.0001442495126705653, |
|
"loss": 0.1661, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"eval_accuracy": 0.9342222222222222, |
|
"eval_f1": 0.8258823529411765, |
|
"eval_loss": 0.16532927751541138, |
|
"eval_precision": 0.841726618705036, |
|
"eval_recall": 0.8106235565819861, |
|
"eval_runtime": 1.7784, |
|
"eval_samples_per_second": 126.515, |
|
"eval_steps_per_second": 16.306, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.701754385964913, |
|
"grad_norm": 0.7406933307647705, |
|
"learning_rate": 0.00014294996751137103, |
|
"loss": 0.1569, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.87719298245614, |
|
"grad_norm": 1.5100739002227783, |
|
"learning_rate": 0.00014165042235217672, |
|
"loss": 0.1873, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.052631578947368, |
|
"grad_norm": 0.8658424019813538, |
|
"learning_rate": 0.00014035087719298245, |
|
"loss": 0.1771, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.228070175438596, |
|
"grad_norm": 0.761426568031311, |
|
"learning_rate": 0.00013905133203378817, |
|
"loss": 0.1522, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.403508771929825, |
|
"grad_norm": 0.6994770765304565, |
|
"learning_rate": 0.0001377517868745939, |
|
"loss": 0.1462, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.578947368421053, |
|
"grad_norm": 0.6044259071350098, |
|
"learning_rate": 0.0001364522417153996, |
|
"loss": 0.1688, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.75438596491228, |
|
"grad_norm": 0.6377450227737427, |
|
"learning_rate": 0.00013515269655620533, |
|
"loss": 0.1726, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.929824561403509, |
|
"grad_norm": 0.45792627334594727, |
|
"learning_rate": 0.00013385315139701105, |
|
"loss": 0.1578, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.105263157894736, |
|
"grad_norm": 0.5658883452415466, |
|
"learning_rate": 0.00013255360623781677, |
|
"loss": 0.1528, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.280701754385966, |
|
"grad_norm": 0.568031370639801, |
|
"learning_rate": 0.0001312540610786225, |
|
"loss": 0.1603, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.280701754385966, |
|
"eval_accuracy": 0.9408888888888889, |
|
"eval_f1": 0.8473019517795637, |
|
"eval_loss": 0.16492225229740143, |
|
"eval_precision": 0.8424657534246576, |
|
"eval_recall": 0.8521939953810623, |
|
"eval_runtime": 2.4488, |
|
"eval_samples_per_second": 91.883, |
|
"eval_steps_per_second": 11.843, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.456140350877194, |
|
"grad_norm": 0.8529219031333923, |
|
"learning_rate": 0.0001299545159194282, |
|
"loss": 0.1438, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.631578947368421, |
|
"grad_norm": 0.7960824370384216, |
|
"learning_rate": 0.0001286549707602339, |
|
"loss": 0.1245, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.807017543859649, |
|
"grad_norm": 0.8270284533500671, |
|
"learning_rate": 0.00012748538011695908, |
|
"loss": 0.1775, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 12.982456140350877, |
|
"grad_norm": 0.407463014125824, |
|
"learning_rate": 0.0001261858349577648, |
|
"loss": 0.1583, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.157894736842104, |
|
"grad_norm": 1.2405822277069092, |
|
"learning_rate": 0.0001248862897985705, |
|
"loss": 0.1412, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 0.7762990593910217, |
|
"learning_rate": 0.00012358674463937622, |
|
"loss": 0.137, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.508771929824562, |
|
"grad_norm": 0.7772154808044434, |
|
"learning_rate": 0.00012228719948018194, |
|
"loss": 0.1618, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.68421052631579, |
|
"grad_norm": 0.3346017599105835, |
|
"learning_rate": 0.00012098765432098766, |
|
"loss": 0.1276, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.859649122807017, |
|
"grad_norm": 0.7661828994750977, |
|
"learning_rate": 0.00011968810916179338, |
|
"loss": 0.1606, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 14.035087719298245, |
|
"grad_norm": 1.2454911470413208, |
|
"learning_rate": 0.0001183885640025991, |
|
"loss": 0.1523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.035087719298245, |
|
"eval_accuracy": 0.9466666666666667, |
|
"eval_f1": 0.8591549295774648, |
|
"eval_loss": 0.15682315826416016, |
|
"eval_precision": 0.8735083532219571, |
|
"eval_recall": 0.8452655889145496, |
|
"eval_runtime": 1.8011, |
|
"eval_samples_per_second": 124.926, |
|
"eval_steps_per_second": 16.102, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.210526315789474, |
|
"grad_norm": 3.0044612884521484, |
|
"learning_rate": 0.00011708901884340481, |
|
"loss": 0.1331, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.385964912280702, |
|
"grad_norm": 0.7117482423782349, |
|
"learning_rate": 0.00011578947368421053, |
|
"loss": 0.1619, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.56140350877193, |
|
"grad_norm": 0.6939218044281006, |
|
"learning_rate": 0.00011448992852501626, |
|
"loss": 0.1531, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.736842105263158, |
|
"grad_norm": 0.5622960329055786, |
|
"learning_rate": 0.00011319038336582198, |
|
"loss": 0.131, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.912280701754385, |
|
"grad_norm": 0.9399430155754089, |
|
"learning_rate": 0.0001118908382066277, |
|
"loss": 0.1276, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.087719298245615, |
|
"grad_norm": 1.6480320692062378, |
|
"learning_rate": 0.0001105912930474334, |
|
"loss": 0.1656, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.263157894736842, |
|
"grad_norm": 0.7238647937774658, |
|
"learning_rate": 0.00010929174788823913, |
|
"loss": 0.1261, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.43859649122807, |
|
"grad_norm": 1.0423846244812012, |
|
"learning_rate": 0.00010799220272904485, |
|
"loss": 0.1328, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.614035087719298, |
|
"grad_norm": 1.1374431848526, |
|
"learning_rate": 0.00010669265756985057, |
|
"loss": 0.1427, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.789473684210526, |
|
"grad_norm": 0.7375030517578125, |
|
"learning_rate": 0.00010539311241065628, |
|
"loss": 0.1506, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.789473684210526, |
|
"eval_accuracy": 0.9431111111111111, |
|
"eval_f1": 0.8494117647058823, |
|
"eval_loss": 0.15481138229370117, |
|
"eval_precision": 0.8657074340527577, |
|
"eval_recall": 0.8337182448036952, |
|
"eval_runtime": 1.8243, |
|
"eval_samples_per_second": 123.334, |
|
"eval_steps_per_second": 15.896, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.964912280701755, |
|
"grad_norm": 0.7035567164421082, |
|
"learning_rate": 0.000104093567251462, |
|
"loss": 0.1324, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 16.140350877192983, |
|
"grad_norm": 0.6969211101531982, |
|
"learning_rate": 0.00010279402209226772, |
|
"loss": 0.1257, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.31578947368421, |
|
"grad_norm": 0.3633826673030853, |
|
"learning_rate": 0.00010149447693307344, |
|
"loss": 0.1306, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.49122807017544, |
|
"grad_norm": 0.8118802309036255, |
|
"learning_rate": 0.00010019493177387915, |
|
"loss": 0.1091, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.6684471964836121, |
|
"learning_rate": 9.889538661468485e-05, |
|
"loss": 0.1323, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.842105263157894, |
|
"grad_norm": 0.6080668568611145, |
|
"learning_rate": 9.759584145549058e-05, |
|
"loss": 0.1168, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.017543859649123, |
|
"grad_norm": 0.7799493670463562, |
|
"learning_rate": 9.62962962962963e-05, |
|
"loss": 0.141, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 17.19298245614035, |
|
"grad_norm": 0.5670738816261292, |
|
"learning_rate": 9.499675113710202e-05, |
|
"loss": 0.1244, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.36842105263158, |
|
"grad_norm": 0.9652756452560425, |
|
"learning_rate": 9.369720597790773e-05, |
|
"loss": 0.1354, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.54385964912281, |
|
"grad_norm": 0.8537412881851196, |
|
"learning_rate": 9.239766081871345e-05, |
|
"loss": 0.1485, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.54385964912281, |
|
"eval_accuracy": 0.9444444444444444, |
|
"eval_f1": 0.8544819557625145, |
|
"eval_loss": 0.15389865636825562, |
|
"eval_precision": 0.8615023474178404, |
|
"eval_recall": 0.8475750577367206, |
|
"eval_runtime": 1.7887, |
|
"eval_samples_per_second": 125.789, |
|
"eval_steps_per_second": 16.213, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.719298245614034, |
|
"grad_norm": 0.9258742928504944, |
|
"learning_rate": 9.109811565951917e-05, |
|
"loss": 0.1284, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 17.894736842105264, |
|
"grad_norm": 0.6817509531974792, |
|
"learning_rate": 8.979857050032489e-05, |
|
"loss": 0.1226, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.07017543859649, |
|
"grad_norm": 0.8437041640281677, |
|
"learning_rate": 8.849902534113061e-05, |
|
"loss": 0.1527, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 18.24561403508772, |
|
"grad_norm": 1.2362749576568604, |
|
"learning_rate": 8.719948018193632e-05, |
|
"loss": 0.1224, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.42105263157895, |
|
"grad_norm": 0.4136218726634979, |
|
"learning_rate": 8.589993502274204e-05, |
|
"loss": 0.1293, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.596491228070175, |
|
"grad_norm": 0.8913040161132812, |
|
"learning_rate": 8.460038986354776e-05, |
|
"loss": 0.1305, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.771929824561404, |
|
"grad_norm": 1.0768448114395142, |
|
"learning_rate": 8.330084470435348e-05, |
|
"loss": 0.1134, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 18.94736842105263, |
|
"grad_norm": 0.9289010763168335, |
|
"learning_rate": 8.200129954515919e-05, |
|
"loss": 0.1551, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 19.12280701754386, |
|
"grad_norm": 0.4481465220451355, |
|
"learning_rate": 8.070175438596491e-05, |
|
"loss": 0.1263, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 19.29824561403509, |
|
"grad_norm": 0.7408900260925293, |
|
"learning_rate": 7.940220922677063e-05, |
|
"loss": 0.1263, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.29824561403509, |
|
"eval_accuracy": 0.944, |
|
"eval_f1": 0.8534883720930233, |
|
"eval_loss": 0.15210777521133423, |
|
"eval_precision": 0.8594847775175644, |
|
"eval_recall": 0.8475750577367206, |
|
"eval_runtime": 1.7885, |
|
"eval_samples_per_second": 125.802, |
|
"eval_steps_per_second": 16.214, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.473684210526315, |
|
"grad_norm": 0.8939012289047241, |
|
"learning_rate": 7.810266406757635e-05, |
|
"loss": 0.1206, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.649122807017545, |
|
"grad_norm": 0.6809560656547546, |
|
"learning_rate": 7.680311890838207e-05, |
|
"loss": 0.1225, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 19.82456140350877, |
|
"grad_norm": 1.1481623649597168, |
|
"learning_rate": 7.550357374918778e-05, |
|
"loss": 0.1291, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.0011980533599854, |
|
"learning_rate": 7.42040285899935e-05, |
|
"loss": 0.1482, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 20.17543859649123, |
|
"grad_norm": 0.6619019508361816, |
|
"learning_rate": 7.290448343079922e-05, |
|
"loss": 0.1123, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.350877192982455, |
|
"grad_norm": 0.796700656414032, |
|
"learning_rate": 7.160493827160494e-05, |
|
"loss": 0.1166, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.526315789473685, |
|
"grad_norm": 0.9634900689125061, |
|
"learning_rate": 7.030539311241065e-05, |
|
"loss": 0.1263, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.70175438596491, |
|
"grad_norm": 0.505535900592804, |
|
"learning_rate": 6.900584795321637e-05, |
|
"loss": 0.1117, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 20.87719298245614, |
|
"grad_norm": 0.5166471600532532, |
|
"learning_rate": 6.770630279402209e-05, |
|
"loss": 0.1279, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"grad_norm": 1.2773476839065552, |
|
"learning_rate": 6.640675763482781e-05, |
|
"loss": 0.1444, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"eval_accuracy": 0.9417777777777778, |
|
"eval_f1": 0.8471411901983664, |
|
"eval_loss": 0.155166357755661, |
|
"eval_precision": 0.8561320754716981, |
|
"eval_recall": 0.8383371824480369, |
|
"eval_runtime": 2.37, |
|
"eval_samples_per_second": 94.937, |
|
"eval_steps_per_second": 12.236, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.228070175438596, |
|
"grad_norm": 0.793021559715271, |
|
"learning_rate": 6.510721247563352e-05, |
|
"loss": 0.1168, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 21.403508771929825, |
|
"grad_norm": 1.2551689147949219, |
|
"learning_rate": 6.380766731643924e-05, |
|
"loss": 0.1089, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.57894736842105, |
|
"grad_norm": 0.6803563237190247, |
|
"learning_rate": 6.250812215724496e-05, |
|
"loss": 0.1186, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 21.75438596491228, |
|
"grad_norm": 1.2632770538330078, |
|
"learning_rate": 6.120857699805068e-05, |
|
"loss": 0.1116, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.92982456140351, |
|
"grad_norm": 0.525141716003418, |
|
"learning_rate": 5.99090318388564e-05, |
|
"loss": 0.0979, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 22.105263157894736, |
|
"grad_norm": 0.5942980647087097, |
|
"learning_rate": 5.860948667966212e-05, |
|
"loss": 0.1483, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 22.280701754385966, |
|
"grad_norm": 1.0624207258224487, |
|
"learning_rate": 5.7309941520467835e-05, |
|
"loss": 0.1155, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 22.45614035087719, |
|
"grad_norm": 0.6244792938232422, |
|
"learning_rate": 5.6010396361273556e-05, |
|
"loss": 0.1159, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.63157894736842, |
|
"grad_norm": 1.9767743349075317, |
|
"learning_rate": 5.471085120207927e-05, |
|
"loss": 0.1165, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 22.80701754385965, |
|
"grad_norm": 2.270113468170166, |
|
"learning_rate": 5.341130604288499e-05, |
|
"loss": 0.1133, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 22.80701754385965, |
|
"eval_accuracy": 0.9448888888888889, |
|
"eval_f1": 0.8561484918793504, |
|
"eval_loss": 0.1531468778848648, |
|
"eval_precision": 0.8601398601398601, |
|
"eval_recall": 0.8521939953810623, |
|
"eval_runtime": 4.5112, |
|
"eval_samples_per_second": 49.875, |
|
"eval_steps_per_second": 6.428, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 22.982456140350877, |
|
"grad_norm": 2.3252851963043213, |
|
"learning_rate": 5.2111760883690706e-05, |
|
"loss": 0.1018, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 23.157894736842106, |
|
"grad_norm": 1.3282454013824463, |
|
"learning_rate": 5.081221572449643e-05, |
|
"loss": 0.1194, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 23.333333333333332, |
|
"grad_norm": 0.652642548084259, |
|
"learning_rate": 4.951267056530214e-05, |
|
"loss": 0.1016, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 23.50877192982456, |
|
"grad_norm": 1.584074854850769, |
|
"learning_rate": 4.821312540610786e-05, |
|
"loss": 0.1109, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 23.68421052631579, |
|
"grad_norm": 0.5799722075462341, |
|
"learning_rate": 4.691358024691358e-05, |
|
"loss": 0.0901, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 23.859649122807017, |
|
"grad_norm": 1.9589979648590088, |
|
"learning_rate": 4.56140350877193e-05, |
|
"loss": 0.1195, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 24.035087719298247, |
|
"grad_norm": 0.784710705280304, |
|
"learning_rate": 4.431448992852502e-05, |
|
"loss": 0.1318, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 24.210526315789473, |
|
"grad_norm": 1.0715792179107666, |
|
"learning_rate": 4.301494476933073e-05, |
|
"loss": 0.1236, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 24.385964912280702, |
|
"grad_norm": 0.8761755228042603, |
|
"learning_rate": 4.1715399610136454e-05, |
|
"loss": 0.1076, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 24.56140350877193, |
|
"grad_norm": 0.8874859809875488, |
|
"learning_rate": 4.041585445094217e-05, |
|
"loss": 0.1019, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.56140350877193, |
|
"eval_accuracy": 0.9431111111111111, |
|
"eval_f1": 0.8490566037735849, |
|
"eval_loss": 0.15768744051456451, |
|
"eval_precision": 0.8674698795180723, |
|
"eval_recall": 0.8314087759815243, |
|
"eval_runtime": 1.817, |
|
"eval_samples_per_second": 123.828, |
|
"eval_steps_per_second": 15.96, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.736842105263158, |
|
"grad_norm": 0.569615364074707, |
|
"learning_rate": 3.911630929174789e-05, |
|
"loss": 0.1114, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 24.912280701754387, |
|
"grad_norm": 0.4636388123035431, |
|
"learning_rate": 3.7816764132553604e-05, |
|
"loss": 0.1016, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 25.087719298245613, |
|
"grad_norm": 0.7966068983078003, |
|
"learning_rate": 3.6517218973359325e-05, |
|
"loss": 0.1181, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 25.263157894736842, |
|
"grad_norm": 0.7331326603889465, |
|
"learning_rate": 3.521767381416504e-05, |
|
"loss": 0.1037, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 25.43859649122807, |
|
"grad_norm": 1.1376439332962036, |
|
"learning_rate": 3.391812865497076e-05, |
|
"loss": 0.091, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.614035087719298, |
|
"grad_norm": 0.43491020798683167, |
|
"learning_rate": 3.2618583495776475e-05, |
|
"loss": 0.102, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 25.789473684210527, |
|
"grad_norm": 0.9410120844841003, |
|
"learning_rate": 3.1319038336582196e-05, |
|
"loss": 0.1108, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 25.964912280701753, |
|
"grad_norm": 0.9321810603141785, |
|
"learning_rate": 3.0019493177387914e-05, |
|
"loss": 0.1059, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 26.140350877192983, |
|
"grad_norm": 0.5571371912956238, |
|
"learning_rate": 2.871994801819363e-05, |
|
"loss": 0.0926, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 26.31578947368421, |
|
"grad_norm": 1.9081007242202759, |
|
"learning_rate": 2.742040285899935e-05, |
|
"loss": 0.1141, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.31578947368421, |
|
"eval_accuracy": 0.9413333333333334, |
|
"eval_f1": 0.8472222222222222, |
|
"eval_loss": 0.15601032972335815, |
|
"eval_precision": 0.8491879350348028, |
|
"eval_recall": 0.8452655889145496, |
|
"eval_runtime": 1.867, |
|
"eval_samples_per_second": 120.511, |
|
"eval_steps_per_second": 15.533, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.49122807017544, |
|
"grad_norm": 0.8356673121452332, |
|
"learning_rate": 2.6120857699805067e-05, |
|
"loss": 0.1077, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 1.3644295930862427, |
|
"learning_rate": 2.4821312540610784e-05, |
|
"loss": 0.1212, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 26.842105263157894, |
|
"grad_norm": 0.779222309589386, |
|
"learning_rate": 2.3521767381416506e-05, |
|
"loss": 0.1229, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 27.017543859649123, |
|
"grad_norm": 0.5873481631278992, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0998, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 27.19298245614035, |
|
"grad_norm": 0.9948704242706299, |
|
"learning_rate": 2.092267706302794e-05, |
|
"loss": 0.1435, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 27.36842105263158, |
|
"grad_norm": 0.32820120453834534, |
|
"learning_rate": 1.962313190383366e-05, |
|
"loss": 0.0992, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 27.54385964912281, |
|
"grad_norm": 1.0797744989395142, |
|
"learning_rate": 1.8323586744639376e-05, |
|
"loss": 0.1095, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 27.719298245614034, |
|
"grad_norm": 1.5036197900772095, |
|
"learning_rate": 1.7024041585445094e-05, |
|
"loss": 0.119, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 27.894736842105264, |
|
"grad_norm": 1.0871007442474365, |
|
"learning_rate": 1.5724496426250812e-05, |
|
"loss": 0.0974, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 28.07017543859649, |
|
"grad_norm": 0.6861986517906189, |
|
"learning_rate": 1.442495126705653e-05, |
|
"loss": 0.1087, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.07017543859649, |
|
"eval_accuracy": 0.9422222222222222, |
|
"eval_f1": 0.8491879350348028, |
|
"eval_loss": 0.15734025835990906, |
|
"eval_precision": 0.8531468531468531, |
|
"eval_recall": 0.8452655889145496, |
|
"eval_runtime": 3.5904, |
|
"eval_samples_per_second": 62.668, |
|
"eval_steps_per_second": 8.077, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.24561403508772, |
|
"grad_norm": 1.5399742126464844, |
|
"learning_rate": 1.3125406107862247e-05, |
|
"loss": 0.1243, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 28.42105263157895, |
|
"grad_norm": 0.7721771001815796, |
|
"learning_rate": 1.1825860948667967e-05, |
|
"loss": 0.0965, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 28.596491228070175, |
|
"grad_norm": 1.040131688117981, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.1133, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 28.771929824561404, |
|
"grad_norm": 0.9755656123161316, |
|
"learning_rate": 9.226770630279402e-06, |
|
"loss": 0.0885, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 28.94736842105263, |
|
"grad_norm": 0.5838367342948914, |
|
"learning_rate": 7.92722547108512e-06, |
|
"loss": 0.1134, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 29.12280701754386, |
|
"grad_norm": 1.698116421699524, |
|
"learning_rate": 6.6276803118908384e-06, |
|
"loss": 0.1278, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 29.29824561403509, |
|
"grad_norm": 0.581572413444519, |
|
"learning_rate": 5.328135152696556e-06, |
|
"loss": 0.1209, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 29.473684210526315, |
|
"grad_norm": 0.4100797772407532, |
|
"learning_rate": 4.028589993502274e-06, |
|
"loss": 0.1108, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 29.649122807017545, |
|
"grad_norm": 1.5013538599014282, |
|
"learning_rate": 2.729044834307992e-06, |
|
"loss": 0.1195, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 29.82456140350877, |
|
"grad_norm": 1.0121512413024902, |
|
"learning_rate": 1.4294996751137102e-06, |
|
"loss": 0.1015, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 29.82456140350877, |
|
"eval_accuracy": 0.9422222222222222, |
|
"eval_f1": 0.8488372093023255, |
|
"eval_loss": 0.15452326834201813, |
|
"eval_precision": 0.8548009367681498, |
|
"eval_recall": 0.8429561200923787, |
|
"eval_runtime": 1.8193, |
|
"eval_samples_per_second": 123.672, |
|
"eval_steps_per_second": 15.94, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.770343780517578, |
|
"learning_rate": 1.299545159194282e-07, |
|
"loss": 0.1342, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 1710, |
|
"total_flos": 1.77124415883264e+17, |
|
"train_loss": 0.20865077226482637, |
|
"train_runtime": 373.9101, |
|
"train_samples_per_second": 72.21, |
|
"train_steps_per_second": 4.573 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1710, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.77124415883264e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|