|
{ |
|
"best_metric": 0.1725098043680191, |
|
"best_model_checkpoint": "frost-mobile-apple/mobilevit-xx-small-v2024-10-22/checkpoint-1500", |
|
"epoch": 30.0, |
|
"eval_steps": 100, |
|
"global_step": 1710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 0.2625730037689209, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.6928, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 0.2961116135120392, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.6936, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.24333663284778595, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.6917, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 0.2218523770570755, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.6887, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 0.23965124785900116, |
|
"learning_rate": 5.847953216374269e-05, |
|
"loss": 0.685, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 0.23081418871879578, |
|
"learning_rate": 7.017543859649122e-05, |
|
"loss": 0.6815, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.2280701754385965, |
|
"grad_norm": 0.23212119936943054, |
|
"learning_rate": 8.187134502923976e-05, |
|
"loss": 0.676, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 0.2775309383869171, |
|
"learning_rate": 9.35672514619883e-05, |
|
"loss": 0.6711, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 0.38230618834495544, |
|
"learning_rate": 0.00010526315789473685, |
|
"loss": 0.6617, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 0.29050251841545105, |
|
"learning_rate": 0.00011695906432748539, |
|
"loss": 0.6549, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"eval_accuracy": 0.82, |
|
"eval_f1": 0.6260387811634349, |
|
"eval_loss": 0.6288657784461975, |
|
"eval_precision": 0.5191424196018377, |
|
"eval_recall": 0.7883720930232558, |
|
"eval_runtime": 2.6915, |
|
"eval_samples_per_second": 83.597, |
|
"eval_steps_per_second": 10.775, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9298245614035088, |
|
"grad_norm": 0.3310299217700958, |
|
"learning_rate": 0.0001286549707602339, |
|
"loss": 0.6389, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 0.35385212302207947, |
|
"learning_rate": 0.00014035087719298245, |
|
"loss": 0.6276, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.280701754385965, |
|
"grad_norm": 0.31887122988700867, |
|
"learning_rate": 0.00015204678362573098, |
|
"loss": 0.6068, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 0.38656044006347656, |
|
"learning_rate": 0.00016374269005847952, |
|
"loss": 0.5876, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 0.40553656220436096, |
|
"learning_rate": 0.00017543859649122806, |
|
"loss": 0.5782, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 0.5055739879608154, |
|
"learning_rate": 0.0001871345029239766, |
|
"loss": 0.546, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"grad_norm": 0.6473321318626404, |
|
"learning_rate": 0.00019883040935672513, |
|
"loss": 0.5322, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.1578947368421053, |
|
"grad_norm": 0.5542100667953491, |
|
"learning_rate": 0.00019883040935672513, |
|
"loss": 0.5081, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 0.525965690612793, |
|
"learning_rate": 0.00019753086419753085, |
|
"loss": 0.4906, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"grad_norm": 0.6686927676200867, |
|
"learning_rate": 0.00019623131903833657, |
|
"loss": 0.4616, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.5087719298245617, |
|
"eval_accuracy": 0.8866666666666667, |
|
"eval_f1": 0.7295864262990456, |
|
"eval_loss": 0.41918542981147766, |
|
"eval_precision": 0.6705653021442495, |
|
"eval_recall": 0.8, |
|
"eval_runtime": 2.7897, |
|
"eval_samples_per_second": 80.654, |
|
"eval_steps_per_second": 10.395, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.6842105263157894, |
|
"grad_norm": 1.5412182807922363, |
|
"learning_rate": 0.0001949317738791423, |
|
"loss": 0.4506, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.8596491228070176, |
|
"grad_norm": 0.4173012375831604, |
|
"learning_rate": 0.00019363222871994802, |
|
"loss": 0.4371, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.035087719298246, |
|
"grad_norm": 0.42248570919036865, |
|
"learning_rate": 0.00019233268356075374, |
|
"loss": 0.4064, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.2105263157894735, |
|
"grad_norm": 0.5491617918014526, |
|
"learning_rate": 0.00019103313840155946, |
|
"loss": 0.3724, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"grad_norm": 0.35062703490257263, |
|
"learning_rate": 0.00018973359324236518, |
|
"loss": 0.3671, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.56140350877193, |
|
"grad_norm": 0.40491071343421936, |
|
"learning_rate": 0.0001884340480831709, |
|
"loss": 0.3683, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.7368421052631575, |
|
"grad_norm": 0.9965174794197083, |
|
"learning_rate": 0.0001871345029239766, |
|
"loss": 0.3402, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.912280701754386, |
|
"grad_norm": 0.7184051275253296, |
|
"learning_rate": 0.00018583495776478232, |
|
"loss": 0.3348, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.087719298245614, |
|
"grad_norm": 1.8915038108825684, |
|
"learning_rate": 0.00018453541260558804, |
|
"loss": 0.32, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"grad_norm": 0.5761589407920837, |
|
"learning_rate": 0.00018323586744639376, |
|
"loss": 0.3101, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.2631578947368425, |
|
"eval_accuracy": 0.9035555555555556, |
|
"eval_f1": 0.7317676143386898, |
|
"eval_loss": 0.30708780884742737, |
|
"eval_precision": 0.7810026385224275, |
|
"eval_recall": 0.6883720930232559, |
|
"eval_runtime": 2.8811, |
|
"eval_samples_per_second": 78.095, |
|
"eval_steps_per_second": 10.066, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.43859649122807, |
|
"grad_norm": 1.1592423915863037, |
|
"learning_rate": 0.00018193632228719948, |
|
"loss": 0.3258, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.614035087719298, |
|
"grad_norm": 0.8307028412818909, |
|
"learning_rate": 0.0001806367771280052, |
|
"loss": 0.3149, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.7894736842105265, |
|
"grad_norm": 0.9469823837280273, |
|
"learning_rate": 0.00017933723196881092, |
|
"loss": 0.3033, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.964912280701754, |
|
"grad_norm": 2.199500322341919, |
|
"learning_rate": 0.00017803768680961664, |
|
"loss": 0.3164, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.140350877192983, |
|
"grad_norm": 0.6772398948669434, |
|
"learning_rate": 0.00017673814165042236, |
|
"loss": 0.2806, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.315789473684211, |
|
"grad_norm": 0.4862241744995117, |
|
"learning_rate": 0.00017543859649122806, |
|
"loss": 0.2817, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.491228070175438, |
|
"grad_norm": 1.2349482774734497, |
|
"learning_rate": 0.00017413905133203378, |
|
"loss": 0.288, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 2.9781813621520996, |
|
"learning_rate": 0.00017296946068875895, |
|
"loss": 0.3039, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.842105263157895, |
|
"grad_norm": 0.7632750272750854, |
|
"learning_rate": 0.00017166991552956468, |
|
"loss": 0.2836, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"grad_norm": 1.2420198917388916, |
|
"learning_rate": 0.00017037037037037037, |
|
"loss": 0.2932, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.017543859649122, |
|
"eval_accuracy": 0.908, |
|
"eval_f1": 0.7460122699386503, |
|
"eval_loss": 0.24856920540332794, |
|
"eval_precision": 0.7896103896103897, |
|
"eval_recall": 0.7069767441860465, |
|
"eval_runtime": 2.8347, |
|
"eval_samples_per_second": 79.373, |
|
"eval_steps_per_second": 10.23, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.192982456140351, |
|
"grad_norm": 0.8554529547691345, |
|
"learning_rate": 0.0001690708252111761, |
|
"loss": 0.2583, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.368421052631579, |
|
"grad_norm": 0.5736662745475769, |
|
"learning_rate": 0.0001677712800519818, |
|
"loss": 0.2809, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.543859649122807, |
|
"grad_norm": 0.7552086114883423, |
|
"learning_rate": 0.00016647173489278753, |
|
"loss": 0.2774, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.719298245614035, |
|
"grad_norm": 0.6094131469726562, |
|
"learning_rate": 0.00016517218973359325, |
|
"loss": 0.2771, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.894736842105263, |
|
"grad_norm": 0.5392113924026489, |
|
"learning_rate": 0.00016387264457439898, |
|
"loss": 0.2755, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.070175438596491, |
|
"grad_norm": 0.4927959740161896, |
|
"learning_rate": 0.0001625730994152047, |
|
"loss": 0.2572, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.24561403508772, |
|
"grad_norm": 0.9484465718269348, |
|
"learning_rate": 0.00016127355425601042, |
|
"loss": 0.2354, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.421052631578947, |
|
"grad_norm": 0.71286940574646, |
|
"learning_rate": 0.0001599740090968161, |
|
"loss": 0.2611, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.596491228070175, |
|
"grad_norm": 1.9641995429992676, |
|
"learning_rate": 0.00015867446393762183, |
|
"loss": 0.2547, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 1.1893583536148071, |
|
"learning_rate": 0.00015737491877842755, |
|
"loss": 0.2652, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"eval_accuracy": 0.9137777777777778, |
|
"eval_f1": 0.7673860911270983, |
|
"eval_loss": 0.22792504727840424, |
|
"eval_precision": 0.7920792079207921, |
|
"eval_recall": 0.7441860465116279, |
|
"eval_runtime": 1.8141, |
|
"eval_samples_per_second": 124.03, |
|
"eval_steps_per_second": 15.986, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.947368421052632, |
|
"grad_norm": 1.0071460008621216, |
|
"learning_rate": 0.00015607537361923327, |
|
"loss": 0.244, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.12280701754386, |
|
"grad_norm": 1.22650146484375, |
|
"learning_rate": 0.000154775828460039, |
|
"loss": 0.2377, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.298245614035087, |
|
"grad_norm": 2.428567886352539, |
|
"learning_rate": 0.00015347628330084472, |
|
"loss": 0.2494, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.473684210526315, |
|
"grad_norm": 1.8254860639572144, |
|
"learning_rate": 0.00015217673814165044, |
|
"loss": 0.2603, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.649122807017545, |
|
"grad_norm": 0.6592786908149719, |
|
"learning_rate": 0.00015087719298245616, |
|
"loss": 0.2597, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.824561403508772, |
|
"grad_norm": 1.3194756507873535, |
|
"learning_rate": 0.00014957764782326188, |
|
"loss": 0.2313, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.1871612071990967, |
|
"learning_rate": 0.00014827810266406757, |
|
"loss": 0.282, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 10.175438596491228, |
|
"grad_norm": 0.759860098361969, |
|
"learning_rate": 0.0001469785575048733, |
|
"loss": 0.2319, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 10.350877192982455, |
|
"grad_norm": 1.447387933731079, |
|
"learning_rate": 0.00014567901234567902, |
|
"loss": 0.2457, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"grad_norm": 0.9954220056533813, |
|
"learning_rate": 0.00014437946718648474, |
|
"loss": 0.2253, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.526315789473685, |
|
"eval_accuracy": 0.9217777777777778, |
|
"eval_f1": 0.7858880778588808, |
|
"eval_loss": 0.21004962921142578, |
|
"eval_precision": 0.8239795918367347, |
|
"eval_recall": 0.7511627906976744, |
|
"eval_runtime": 4.2925, |
|
"eval_samples_per_second": 52.417, |
|
"eval_steps_per_second": 6.756, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.701754385964913, |
|
"grad_norm": 0.715815544128418, |
|
"learning_rate": 0.00014307992202729046, |
|
"loss": 0.2391, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.87719298245614, |
|
"grad_norm": 0.6449007391929626, |
|
"learning_rate": 0.00014178037686809618, |
|
"loss": 0.2516, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 11.052631578947368, |
|
"grad_norm": 0.9613096117973328, |
|
"learning_rate": 0.0001404808317089019, |
|
"loss": 0.2157, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 11.228070175438596, |
|
"grad_norm": 2.206623077392578, |
|
"learning_rate": 0.00013918128654970762, |
|
"loss": 0.2365, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 11.403508771929825, |
|
"grad_norm": 1.8694980144500732, |
|
"learning_rate": 0.00013788174139051334, |
|
"loss": 0.2263, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.578947368421053, |
|
"grad_norm": 0.7060205340385437, |
|
"learning_rate": 0.00013658219623131904, |
|
"loss": 0.2173, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.75438596491228, |
|
"grad_norm": 0.8581671714782715, |
|
"learning_rate": 0.00013528265107212476, |
|
"loss": 0.2204, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.929824561403509, |
|
"grad_norm": 1.196590781211853, |
|
"learning_rate": 0.00013398310591293048, |
|
"loss": 0.2519, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 12.105263157894736, |
|
"grad_norm": 1.0726817846298218, |
|
"learning_rate": 0.0001326835607537362, |
|
"loss": 0.2184, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 12.280701754385966, |
|
"grad_norm": 0.6241493821144104, |
|
"learning_rate": 0.00013138401559454192, |
|
"loss": 0.2257, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.280701754385966, |
|
"eval_accuracy": 0.9248888888888889, |
|
"eval_f1": 0.8018757327080891, |
|
"eval_loss": 0.19510744512081146, |
|
"eval_precision": 0.8085106382978723, |
|
"eval_recall": 0.7953488372093023, |
|
"eval_runtime": 2.9139, |
|
"eval_samples_per_second": 77.217, |
|
"eval_steps_per_second": 9.952, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 12.456140350877194, |
|
"grad_norm": 1.382541298866272, |
|
"learning_rate": 0.00013008447043534764, |
|
"loss": 0.217, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.631578947368421, |
|
"grad_norm": 0.7372106909751892, |
|
"learning_rate": 0.00012878492527615336, |
|
"loss": 0.2209, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.807017543859649, |
|
"grad_norm": 1.3437495231628418, |
|
"learning_rate": 0.00012748538011695908, |
|
"loss": 0.2215, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 12.982456140350877, |
|
"grad_norm": 0.8328105807304382, |
|
"learning_rate": 0.0001261858349577648, |
|
"loss": 0.247, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 13.157894736842104, |
|
"grad_norm": 1.166037917137146, |
|
"learning_rate": 0.0001248862897985705, |
|
"loss": 0.2362, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 1.8687838315963745, |
|
"learning_rate": 0.00012358674463937622, |
|
"loss": 0.2247, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 13.508771929824562, |
|
"grad_norm": 1.2782139778137207, |
|
"learning_rate": 0.00012228719948018194, |
|
"loss": 0.2134, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.68421052631579, |
|
"grad_norm": 1.114933967590332, |
|
"learning_rate": 0.00012098765432098766, |
|
"loss": 0.1965, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.859649122807017, |
|
"grad_norm": 1.7937145233154297, |
|
"learning_rate": 0.00011968810916179338, |
|
"loss": 0.2124, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 14.035087719298245, |
|
"grad_norm": 1.6698014736175537, |
|
"learning_rate": 0.0001183885640025991, |
|
"loss": 0.2468, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.035087719298245, |
|
"eval_accuracy": 0.9306666666666666, |
|
"eval_f1": 0.8198614318706697, |
|
"eval_loss": 0.19064003229141235, |
|
"eval_precision": 0.8142201834862385, |
|
"eval_recall": 0.8255813953488372, |
|
"eval_runtime": 2.8315, |
|
"eval_samples_per_second": 79.464, |
|
"eval_steps_per_second": 10.242, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.210526315789474, |
|
"grad_norm": 0.6950424313545227, |
|
"learning_rate": 0.00011708901884340481, |
|
"loss": 0.2004, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 14.385964912280702, |
|
"grad_norm": 1.5043634176254272, |
|
"learning_rate": 0.00011578947368421053, |
|
"loss": 0.2317, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 14.56140350877193, |
|
"grad_norm": 1.2491843700408936, |
|
"learning_rate": 0.00011448992852501626, |
|
"loss": 0.2027, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.736842105263158, |
|
"grad_norm": 0.6502349376678467, |
|
"learning_rate": 0.00011319038336582198, |
|
"loss": 0.2112, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.912280701754385, |
|
"grad_norm": 0.40061789751052856, |
|
"learning_rate": 0.0001118908382066277, |
|
"loss": 0.1756, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 15.087719298245615, |
|
"grad_norm": 2.8378994464874268, |
|
"learning_rate": 0.0001105912930474334, |
|
"loss": 0.2216, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 15.263157894736842, |
|
"grad_norm": 1.7187498807907104, |
|
"learning_rate": 0.00010929174788823913, |
|
"loss": 0.2072, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 15.43859649122807, |
|
"grad_norm": 1.774376392364502, |
|
"learning_rate": 0.00010799220272904485, |
|
"loss": 0.239, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 15.614035087719298, |
|
"grad_norm": 1.5812989473342896, |
|
"learning_rate": 0.00010669265756985057, |
|
"loss": 0.2191, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 15.789473684210526, |
|
"grad_norm": 0.9877386689186096, |
|
"learning_rate": 0.00010539311241065628, |
|
"loss": 0.1796, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.789473684210526, |
|
"eval_accuracy": 0.9275555555555556, |
|
"eval_f1": 0.81199538638985, |
|
"eval_loss": 0.19485591351985931, |
|
"eval_precision": 0.8054919908466819, |
|
"eval_recall": 0.8186046511627907, |
|
"eval_runtime": 1.8216, |
|
"eval_samples_per_second": 123.517, |
|
"eval_steps_per_second": 15.92, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 15.964912280701755, |
|
"grad_norm": 1.059669017791748, |
|
"learning_rate": 0.000104093567251462, |
|
"loss": 0.1838, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 16.140350877192983, |
|
"grad_norm": 1.4218086004257202, |
|
"learning_rate": 0.00010279402209226772, |
|
"loss": 0.2281, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 16.31578947368421, |
|
"grad_norm": 1.2070213556289673, |
|
"learning_rate": 0.00010149447693307344, |
|
"loss": 0.1997, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 16.49122807017544, |
|
"grad_norm": 2.351250410079956, |
|
"learning_rate": 0.00010019493177387915, |
|
"loss": 0.1843, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 16.666666666666668, |
|
"grad_norm": 0.8852570056915283, |
|
"learning_rate": 9.889538661468485e-05, |
|
"loss": 0.2357, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 16.842105263157894, |
|
"grad_norm": 2.0466091632843018, |
|
"learning_rate": 9.759584145549058e-05, |
|
"loss": 0.2277, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 17.017543859649123, |
|
"grad_norm": 5.798379898071289, |
|
"learning_rate": 9.62962962962963e-05, |
|
"loss": 0.2246, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 17.19298245614035, |
|
"grad_norm": 1.6754958629608154, |
|
"learning_rate": 9.499675113710202e-05, |
|
"loss": 0.1904, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 17.36842105263158, |
|
"grad_norm": 0.6962611675262451, |
|
"learning_rate": 9.369720597790773e-05, |
|
"loss": 0.202, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 17.54385964912281, |
|
"grad_norm": 0.5351881384849548, |
|
"learning_rate": 9.239766081871345e-05, |
|
"loss": 0.1888, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.54385964912281, |
|
"eval_accuracy": 0.9306666666666666, |
|
"eval_f1": 0.8177570093457944, |
|
"eval_loss": 0.18066002428531647, |
|
"eval_precision": 0.8215962441314554, |
|
"eval_recall": 0.813953488372093, |
|
"eval_runtime": 1.8596, |
|
"eval_samples_per_second": 120.996, |
|
"eval_steps_per_second": 15.595, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.719298245614034, |
|
"grad_norm": 1.2162110805511475, |
|
"learning_rate": 9.109811565951917e-05, |
|
"loss": 0.1789, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 17.894736842105264, |
|
"grad_norm": 1.2040334939956665, |
|
"learning_rate": 8.979857050032489e-05, |
|
"loss": 0.2109, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 18.07017543859649, |
|
"grad_norm": 0.8599823117256165, |
|
"learning_rate": 8.849902534113061e-05, |
|
"loss": 0.2113, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 18.24561403508772, |
|
"grad_norm": 1.0291296243667603, |
|
"learning_rate": 8.719948018193632e-05, |
|
"loss": 0.1981, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 18.42105263157895, |
|
"grad_norm": 3.214996576309204, |
|
"learning_rate": 8.589993502274204e-05, |
|
"loss": 0.1903, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 18.596491228070175, |
|
"grad_norm": 1.1698780059814453, |
|
"learning_rate": 8.460038986354776e-05, |
|
"loss": 0.192, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 18.771929824561404, |
|
"grad_norm": 3.0040793418884277, |
|
"learning_rate": 8.330084470435348e-05, |
|
"loss": 0.2062, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 18.94736842105263, |
|
"grad_norm": 1.365694522857666, |
|
"learning_rate": 8.200129954515919e-05, |
|
"loss": 0.1885, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 19.12280701754386, |
|
"grad_norm": 0.5183665156364441, |
|
"learning_rate": 8.070175438596491e-05, |
|
"loss": 0.2089, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 19.29824561403509, |
|
"grad_norm": 0.6474595069885254, |
|
"learning_rate": 7.940220922677063e-05, |
|
"loss": 0.202, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.29824561403509, |
|
"eval_accuracy": 0.9342222222222222, |
|
"eval_f1": 0.8287037037037037, |
|
"eval_loss": 0.1772110015153885, |
|
"eval_precision": 0.8248847926267281, |
|
"eval_recall": 0.8325581395348837, |
|
"eval_runtime": 2.2017, |
|
"eval_samples_per_second": 102.193, |
|
"eval_steps_per_second": 13.171, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 19.473684210526315, |
|
"grad_norm": 0.7569323778152466, |
|
"learning_rate": 7.810266406757635e-05, |
|
"loss": 0.2037, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 19.649122807017545, |
|
"grad_norm": 1.068310260772705, |
|
"learning_rate": 7.680311890838207e-05, |
|
"loss": 0.1842, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 19.82456140350877, |
|
"grad_norm": 1.1388903856277466, |
|
"learning_rate": 7.550357374918778e-05, |
|
"loss": 0.2057, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 5.906609535217285, |
|
"learning_rate": 7.42040285899935e-05, |
|
"loss": 0.2088, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 20.17543859649123, |
|
"grad_norm": 0.9702988862991333, |
|
"learning_rate": 7.290448343079922e-05, |
|
"loss": 0.1939, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 20.350877192982455, |
|
"grad_norm": 3.627027988433838, |
|
"learning_rate": 7.160493827160494e-05, |
|
"loss": 0.1953, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 20.526315789473685, |
|
"grad_norm": 1.11257004737854, |
|
"learning_rate": 7.030539311241065e-05, |
|
"loss": 0.1908, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 20.70175438596491, |
|
"grad_norm": 1.626079797744751, |
|
"learning_rate": 6.900584795321637e-05, |
|
"loss": 0.2011, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 20.87719298245614, |
|
"grad_norm": 1.8711522817611694, |
|
"learning_rate": 6.770630279402209e-05, |
|
"loss": 0.2106, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"grad_norm": 2.9188010692596436, |
|
"learning_rate": 6.640675763482781e-05, |
|
"loss": 0.1824, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.05263157894737, |
|
"eval_accuracy": 0.9275555555555556, |
|
"eval_f1": 0.8080094228504122, |
|
"eval_loss": 0.18258829414844513, |
|
"eval_precision": 0.8186157517899761, |
|
"eval_recall": 0.7976744186046512, |
|
"eval_runtime": 1.8368, |
|
"eval_samples_per_second": 122.496, |
|
"eval_steps_per_second": 15.788, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 21.228070175438596, |
|
"grad_norm": 1.2156211137771606, |
|
"learning_rate": 6.510721247563352e-05, |
|
"loss": 0.1926, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 21.403508771929825, |
|
"grad_norm": 0.5184522271156311, |
|
"learning_rate": 6.380766731643924e-05, |
|
"loss": 0.1695, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 21.57894736842105, |
|
"grad_norm": 1.8020312786102295, |
|
"learning_rate": 6.250812215724496e-05, |
|
"loss": 0.2222, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 21.75438596491228, |
|
"grad_norm": 1.808860421180725, |
|
"learning_rate": 6.120857699805068e-05, |
|
"loss": 0.2026, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 21.92982456140351, |
|
"grad_norm": 0.5891908407211304, |
|
"learning_rate": 5.99090318388564e-05, |
|
"loss": 0.1861, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 22.105263157894736, |
|
"grad_norm": 0.7829120755195618, |
|
"learning_rate": 5.860948667966212e-05, |
|
"loss": 0.1911, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 22.280701754385966, |
|
"grad_norm": 0.8304038643836975, |
|
"learning_rate": 5.7309941520467835e-05, |
|
"loss": 0.1595, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 22.45614035087719, |
|
"grad_norm": 0.9477715492248535, |
|
"learning_rate": 5.6010396361273556e-05, |
|
"loss": 0.2207, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 22.63157894736842, |
|
"grad_norm": 1.6679517030715942, |
|
"learning_rate": 5.471085120207927e-05, |
|
"loss": 0.1885, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 22.80701754385965, |
|
"grad_norm": 1.945037603378296, |
|
"learning_rate": 5.341130604288499e-05, |
|
"loss": 0.1808, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 22.80701754385965, |
|
"eval_accuracy": 0.9346666666666666, |
|
"eval_f1": 0.8296639629200464, |
|
"eval_loss": 0.16815528273582458, |
|
"eval_precision": 0.8267898383371824, |
|
"eval_recall": 0.8325581395348837, |
|
"eval_runtime": 2.4881, |
|
"eval_samples_per_second": 90.429, |
|
"eval_steps_per_second": 11.655, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 22.982456140350877, |
|
"grad_norm": 0.7030972242355347, |
|
"learning_rate": 5.2111760883690706e-05, |
|
"loss": 0.1717, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 23.157894736842106, |
|
"grad_norm": 1.0622111558914185, |
|
"learning_rate": 5.081221572449643e-05, |
|
"loss": 0.2162, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 23.333333333333332, |
|
"grad_norm": 1.3687249422073364, |
|
"learning_rate": 4.951267056530214e-05, |
|
"loss": 0.197, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 23.50877192982456, |
|
"grad_norm": 1.218827724456787, |
|
"learning_rate": 4.821312540610786e-05, |
|
"loss": 0.1811, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 23.68421052631579, |
|
"grad_norm": 3.9379024505615234, |
|
"learning_rate": 4.691358024691358e-05, |
|
"loss": 0.1896, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 23.859649122807017, |
|
"grad_norm": 0.9299766421318054, |
|
"learning_rate": 4.56140350877193e-05, |
|
"loss": 0.1663, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 24.035087719298247, |
|
"grad_norm": 4.373446941375732, |
|
"learning_rate": 4.431448992852502e-05, |
|
"loss": 0.1883, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 24.210526315789473, |
|
"grad_norm": 1.0416285991668701, |
|
"learning_rate": 4.301494476933073e-05, |
|
"loss": 0.1884, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 24.385964912280702, |
|
"grad_norm": 1.9816950559616089, |
|
"learning_rate": 4.1715399610136454e-05, |
|
"loss": 0.1949, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 24.56140350877193, |
|
"grad_norm": 1.6888455152511597, |
|
"learning_rate": 4.041585445094217e-05, |
|
"loss": 0.1792, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.56140350877193, |
|
"eval_accuracy": 0.9364444444444444, |
|
"eval_f1": 0.8323563892145369, |
|
"eval_loss": 0.16882646083831787, |
|
"eval_precision": 0.8392434988179669, |
|
"eval_recall": 0.8255813953488372, |
|
"eval_runtime": 1.8209, |
|
"eval_samples_per_second": 123.562, |
|
"eval_steps_per_second": 15.926, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 24.736842105263158, |
|
"grad_norm": 0.6522326469421387, |
|
"learning_rate": 3.911630929174789e-05, |
|
"loss": 0.149, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 24.912280701754387, |
|
"grad_norm": 1.053612470626831, |
|
"learning_rate": 3.7816764132553604e-05, |
|
"loss": 0.1983, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 25.087719298245613, |
|
"grad_norm": 1.025525689125061, |
|
"learning_rate": 3.664717348927875e-05, |
|
"loss": 0.1973, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 25.263157894736842, |
|
"grad_norm": 2.1537649631500244, |
|
"learning_rate": 3.534762833008447e-05, |
|
"loss": 0.1797, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 25.43859649122807, |
|
"grad_norm": 2.6327617168426514, |
|
"learning_rate": 3.404808317089019e-05, |
|
"loss": 0.1693, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 25.614035087719298, |
|
"grad_norm": 1.1369807720184326, |
|
"learning_rate": 3.274853801169591e-05, |
|
"loss": 0.1826, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 25.789473684210527, |
|
"grad_norm": 2.0842247009277344, |
|
"learning_rate": 3.1448992852501624e-05, |
|
"loss": 0.1778, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 25.964912280701753, |
|
"grad_norm": 0.8993640542030334, |
|
"learning_rate": 3.014944769330734e-05, |
|
"loss": 0.1688, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 26.140350877192983, |
|
"grad_norm": 0.9640088677406311, |
|
"learning_rate": 2.8979857050032487e-05, |
|
"loss": 0.2139, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 26.31578947368421, |
|
"grad_norm": 1.134974718093872, |
|
"learning_rate": 2.7680311890838205e-05, |
|
"loss": 0.1852, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.31578947368421, |
|
"eval_accuracy": 0.9337777777777778, |
|
"eval_f1": 0.826945412311266, |
|
"eval_loss": 0.1725098043680191, |
|
"eval_precision": 0.8259860788863109, |
|
"eval_recall": 0.827906976744186, |
|
"eval_runtime": 1.8397, |
|
"eval_samples_per_second": 122.305, |
|
"eval_steps_per_second": 15.764, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 26.49122807017544, |
|
"grad_norm": 0.3995600640773773, |
|
"learning_rate": 2.6380766731643926e-05, |
|
"loss": 0.1703, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 1.8065487146377563, |
|
"learning_rate": 2.5081221572449644e-05, |
|
"loss": 0.2017, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 26.842105263157894, |
|
"grad_norm": 2.3725926876068115, |
|
"learning_rate": 2.378167641325536e-05, |
|
"loss": 0.1926, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 27.017543859649123, |
|
"grad_norm": 1.9128490686416626, |
|
"learning_rate": 2.248213125406108e-05, |
|
"loss": 0.1771, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 27.19298245614035, |
|
"grad_norm": 1.2254141569137573, |
|
"learning_rate": 2.1182586094866797e-05, |
|
"loss": 0.1791, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 27.36842105263158, |
|
"grad_norm": 1.3266674280166626, |
|
"learning_rate": 1.9883040935672515e-05, |
|
"loss": 0.1671, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 27.54385964912281, |
|
"grad_norm": 1.2818776369094849, |
|
"learning_rate": 1.8583495776478232e-05, |
|
"loss": 0.17, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 27.719298245614034, |
|
"grad_norm": 1.0659555196762085, |
|
"learning_rate": 1.728395061728395e-05, |
|
"loss": 0.1712, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 27.894736842105264, |
|
"grad_norm": 1.0451716184616089, |
|
"learning_rate": 1.5984405458089668e-05, |
|
"loss": 0.1854, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 28.07017543859649, |
|
"grad_norm": 2.3844401836395264, |
|
"learning_rate": 1.4684860298895387e-05, |
|
"loss": 0.177, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.07017543859649, |
|
"eval_accuracy": 0.9351111111111111, |
|
"eval_f1": 0.8282352941176471, |
|
"eval_loss": 0.16903221607208252, |
|
"eval_precision": 0.8380952380952381, |
|
"eval_recall": 0.8186046511627907, |
|
"eval_runtime": 2.2907, |
|
"eval_samples_per_second": 98.225, |
|
"eval_steps_per_second": 12.66, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 28.24561403508772, |
|
"grad_norm": 1.8458149433135986, |
|
"learning_rate": 1.3385315139701105e-05, |
|
"loss": 0.2091, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 28.42105263157895, |
|
"grad_norm": 0.7621822953224182, |
|
"learning_rate": 1.2085769980506823e-05, |
|
"loss": 0.1626, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 28.596491228070175, |
|
"grad_norm": 0.9533030986785889, |
|
"learning_rate": 1.078622482131254e-05, |
|
"loss": 0.1872, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 28.771929824561404, |
|
"grad_norm": 1.495856761932373, |
|
"learning_rate": 9.486679662118258e-06, |
|
"loss": 0.1816, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 28.94736842105263, |
|
"grad_norm": 1.397376537322998, |
|
"learning_rate": 8.187134502923977e-06, |
|
"loss": 0.1821, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 29.12280701754386, |
|
"grad_norm": 2.07928729057312, |
|
"learning_rate": 6.887589343729694e-06, |
|
"loss": 0.1801, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 29.29824561403509, |
|
"grad_norm": 1.2872428894042969, |
|
"learning_rate": 5.588044184535413e-06, |
|
"loss": 0.1835, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 29.473684210526315, |
|
"grad_norm": 0.40397679805755615, |
|
"learning_rate": 4.2884990253411305e-06, |
|
"loss": 0.1597, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 29.649122807017545, |
|
"grad_norm": 1.12138032913208, |
|
"learning_rate": 2.9889538661468487e-06, |
|
"loss": 0.1771, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 29.82456140350877, |
|
"grad_norm": 1.8918460607528687, |
|
"learning_rate": 1.6894087069525666e-06, |
|
"loss": 0.1857, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 29.82456140350877, |
|
"eval_accuracy": 0.9297777777777778, |
|
"eval_f1": 0.8175519630484989, |
|
"eval_loss": 0.17081834375858307, |
|
"eval_precision": 0.8119266055045872, |
|
"eval_recall": 0.8232558139534883, |
|
"eval_runtime": 1.7795, |
|
"eval_samples_per_second": 126.443, |
|
"eval_steps_per_second": 16.297, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 1.7220489978790283, |
|
"learning_rate": 3.898635477582846e-07, |
|
"loss": 0.166, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 1710, |
|
"total_flos": 3.8465920659456e+16, |
|
"train_loss": 0.2702594916025797, |
|
"train_runtime": 403.8981, |
|
"train_samples_per_second": 66.849, |
|
"train_steps_per_second": 4.234 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1710, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.8465920659456e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|