|
{ |
|
"best_metric": 0.8838017754864972, |
|
"best_model_checkpoint": "videomae-surf-analytics-runpod/checkpoint-372", |
|
"epoch": 9.085245901639345, |
|
"eval_steps": 500, |
|
"global_step": 610, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01639344262295082, |
|
"grad_norm": 14.228123664855957, |
|
"learning_rate": 8.196721311475409e-06, |
|
"loss": 1.5374, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03278688524590164, |
|
"grad_norm": 8.263284683227539, |
|
"learning_rate": 1.6393442622950818e-05, |
|
"loss": 1.33, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04918032786885246, |
|
"grad_norm": 10.129837989807129, |
|
"learning_rate": 2.459016393442623e-05, |
|
"loss": 1.1214, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06557377049180328, |
|
"grad_norm": 5.447906017303467, |
|
"learning_rate": 3.2786885245901635e-05, |
|
"loss": 1.023, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08196721311475409, |
|
"grad_norm": 5.764439105987549, |
|
"learning_rate": 4.098360655737705e-05, |
|
"loss": 0.7497, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09836065573770492, |
|
"grad_norm": 6.531442165374756, |
|
"learning_rate": 4.918032786885246e-05, |
|
"loss": 0.6712, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10163934426229508, |
|
"eval_accuracy": 0.6680497925311203, |
|
"eval_f1": 0.6623298693885865, |
|
"eval_loss": 0.8671284317970276, |
|
"eval_runtime": 81.9529, |
|
"eval_samples_per_second": 2.941, |
|
"eval_steps_per_second": 0.256, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.0131147540983607, |
|
"grad_norm": 6.3692240715026855, |
|
"learning_rate": 4.918032786885246e-05, |
|
"loss": 0.5055, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.0295081967213116, |
|
"grad_norm": 7.524365425109863, |
|
"learning_rate": 4.8269581056466304e-05, |
|
"loss": 0.4465, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0459016393442624, |
|
"grad_norm": 3.4210591316223145, |
|
"learning_rate": 4.7358834244080144e-05, |
|
"loss": 0.3804, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0622950819672132, |
|
"grad_norm": 7.561025619506836, |
|
"learning_rate": 4.644808743169399e-05, |
|
"loss": 0.4557, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0786885245901638, |
|
"grad_norm": 2.567615509033203, |
|
"learning_rate": 4.553734061930783e-05, |
|
"loss": 0.3558, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0950819672131147, |
|
"grad_norm": 5.366397380828857, |
|
"learning_rate": 4.462659380692168e-05, |
|
"loss": 0.3119, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1016393442622952, |
|
"eval_accuracy": 0.7883817427385892, |
|
"eval_f1": 0.7887466325291007, |
|
"eval_loss": 0.5910844802856445, |
|
"eval_runtime": 82.6936, |
|
"eval_samples_per_second": 2.914, |
|
"eval_steps_per_second": 0.254, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 2.0098360655737704, |
|
"grad_norm": 2.0202689170837402, |
|
"learning_rate": 4.371584699453552e-05, |
|
"loss": 0.2248, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0262295081967214, |
|
"grad_norm": 5.155245780944824, |
|
"learning_rate": 4.280510018214937e-05, |
|
"loss": 0.2797, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.042622950819672, |
|
"grad_norm": 8.79983139038086, |
|
"learning_rate": 4.189435336976321e-05, |
|
"loss": 0.2696, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.059016393442623, |
|
"grad_norm": 2.6221516132354736, |
|
"learning_rate": 4.098360655737705e-05, |
|
"loss": 0.2733, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0754098360655737, |
|
"grad_norm": 1.32801353931427, |
|
"learning_rate": 4.007285974499089e-05, |
|
"loss": 0.159, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.091803278688525, |
|
"grad_norm": 7.7792792320251465, |
|
"learning_rate": 3.916211293260474e-05, |
|
"loss": 0.2505, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.101639344262295, |
|
"eval_accuracy": 0.8008298755186722, |
|
"eval_f1": 0.8002014090167812, |
|
"eval_loss": 0.5296825170516968, |
|
"eval_runtime": 80.6025, |
|
"eval_samples_per_second": 2.99, |
|
"eval_steps_per_second": 0.261, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 3.0065573770491802, |
|
"grad_norm": 6.463658332824707, |
|
"learning_rate": 3.825136612021858e-05, |
|
"loss": 0.3667, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0229508196721313, |
|
"grad_norm": 7.710232257843018, |
|
"learning_rate": 3.7340619307832425e-05, |
|
"loss": 0.1461, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.039344262295082, |
|
"grad_norm": 8.827251434326172, |
|
"learning_rate": 3.6429872495446266e-05, |
|
"loss": 0.1328, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.055737704918033, |
|
"grad_norm": 14.754895210266113, |
|
"learning_rate": 3.551912568306011e-05, |
|
"loss": 0.4213, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0721311475409836, |
|
"grad_norm": 7.798481464385986, |
|
"learning_rate": 3.4608378870673954e-05, |
|
"loss": 0.2256, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.088524590163934, |
|
"grad_norm": 1.4291377067565918, |
|
"learning_rate": 3.36976320582878e-05, |
|
"loss": 0.207, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.101639344262295, |
|
"eval_accuracy": 0.7800829875518672, |
|
"eval_f1": 0.778687519310884, |
|
"eval_loss": 0.5970368981361389, |
|
"eval_runtime": 76.1019, |
|
"eval_samples_per_second": 3.167, |
|
"eval_steps_per_second": 0.276, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 4.00327868852459, |
|
"grad_norm": 6.123374938964844, |
|
"learning_rate": 3.2786885245901635e-05, |
|
"loss": 0.1879, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.019672131147541, |
|
"grad_norm": 2.9475815296173096, |
|
"learning_rate": 3.187613843351548e-05, |
|
"loss": 0.0801, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.036065573770492, |
|
"grad_norm": 0.0647067278623581, |
|
"learning_rate": 3.096539162112932e-05, |
|
"loss": 0.0925, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.052459016393443, |
|
"grad_norm": 2.0591495037078857, |
|
"learning_rate": 3.005464480874317e-05, |
|
"loss": 0.1952, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.0688524590163935, |
|
"grad_norm": 9.379964828491211, |
|
"learning_rate": 2.9143897996357018e-05, |
|
"loss": 0.2213, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.085245901639344, |
|
"grad_norm": 0.2886744737625122, |
|
"learning_rate": 2.823315118397086e-05, |
|
"loss": 0.1651, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.101639344262295, |
|
"grad_norm": 10.280256271362305, |
|
"learning_rate": 2.7322404371584703e-05, |
|
"loss": 0.1743, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.101639344262295, |
|
"eval_accuracy": 0.8049792531120332, |
|
"eval_f1": 0.7984134204702362, |
|
"eval_loss": 0.5612084865570068, |
|
"eval_runtime": 76.7892, |
|
"eval_samples_per_second": 3.138, |
|
"eval_steps_per_second": 0.273, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.016393442622951, |
|
"grad_norm": 0.10767544060945511, |
|
"learning_rate": 2.6411657559198543e-05, |
|
"loss": 0.1745, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.032786885245901, |
|
"grad_norm": 0.31735455989837646, |
|
"learning_rate": 2.550091074681239e-05, |
|
"loss": 0.0893, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.049180327868853, |
|
"grad_norm": 0.657698392868042, |
|
"learning_rate": 2.459016393442623e-05, |
|
"loss": 0.0209, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.065573770491803, |
|
"grad_norm": 6.128834247589111, |
|
"learning_rate": 2.3679417122040072e-05, |
|
"loss": 0.0931, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.081967213114754, |
|
"grad_norm": 3.8232431411743164, |
|
"learning_rate": 2.2768670309653916e-05, |
|
"loss": 0.1862, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.098360655737705, |
|
"grad_norm": 0.28946903347969055, |
|
"learning_rate": 2.185792349726776e-05, |
|
"loss": 0.1005, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.101639344262295, |
|
"eval_accuracy": 0.8838174273858921, |
|
"eval_f1": 0.8838017754864972, |
|
"eval_loss": 0.40273839235305786, |
|
"eval_runtime": 79.1005, |
|
"eval_samples_per_second": 3.047, |
|
"eval_steps_per_second": 0.265, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 6.0131147540983605, |
|
"grad_norm": 7.813482761383057, |
|
"learning_rate": 2.0947176684881604e-05, |
|
"loss": 0.1164, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.029508196721311, |
|
"grad_norm": 4.695272445678711, |
|
"learning_rate": 2.0036429872495445e-05, |
|
"loss": 0.128, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.045901639344263, |
|
"grad_norm": 0.22056636214256287, |
|
"learning_rate": 1.912568306010929e-05, |
|
"loss": 0.1201, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.062295081967213, |
|
"grad_norm": 0.24654638767242432, |
|
"learning_rate": 1.8214936247723133e-05, |
|
"loss": 0.0823, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.078688524590164, |
|
"grad_norm": 1.5276825428009033, |
|
"learning_rate": 1.7304189435336977e-05, |
|
"loss": 0.0994, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.0950819672131145, |
|
"grad_norm": 5.22976016998291, |
|
"learning_rate": 1.6393442622950818e-05, |
|
"loss": 0.0147, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 6.101639344262295, |
|
"eval_accuracy": 0.8589211618257261, |
|
"eval_f1": 0.8572866763193951, |
|
"eval_loss": 0.4360053241252899, |
|
"eval_runtime": 76.601, |
|
"eval_samples_per_second": 3.146, |
|
"eval_steps_per_second": 0.274, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 7.00983606557377, |
|
"grad_norm": 0.01562822423875332, |
|
"learning_rate": 1.548269581056466e-05, |
|
"loss": 0.0178, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.026229508196721, |
|
"grad_norm": 0.15828734636306763, |
|
"learning_rate": 1.4571948998178509e-05, |
|
"loss": 0.1265, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.0426229508196725, |
|
"grad_norm": 0.9054508805274963, |
|
"learning_rate": 1.3661202185792351e-05, |
|
"loss": 0.0251, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.059016393442623, |
|
"grad_norm": 0.2960349917411804, |
|
"learning_rate": 1.2750455373406195e-05, |
|
"loss": 0.0241, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.075409836065574, |
|
"grad_norm": 0.08049295842647552, |
|
"learning_rate": 1.1839708561020036e-05, |
|
"loss": 0.0518, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.091803278688524, |
|
"grad_norm": 0.25148022174835205, |
|
"learning_rate": 1.092896174863388e-05, |
|
"loss": 0.0573, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 7.101639344262295, |
|
"eval_accuracy": 0.8713692946058091, |
|
"eval_f1": 0.8697143392786835, |
|
"eval_loss": 0.4450831115245819, |
|
"eval_runtime": 75.7767, |
|
"eval_samples_per_second": 3.18, |
|
"eval_steps_per_second": 0.277, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 8.00655737704918, |
|
"grad_norm": 0.028504155576229095, |
|
"learning_rate": 1.0018214936247722e-05, |
|
"loss": 0.0705, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.02295081967213, |
|
"grad_norm": 0.7975661158561707, |
|
"learning_rate": 9.107468123861566e-06, |
|
"loss": 0.0547, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.039344262295081, |
|
"grad_norm": 0.035094812512397766, |
|
"learning_rate": 8.196721311475409e-06, |
|
"loss": 0.0484, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.055737704918032, |
|
"grad_norm": 4.1898651123046875, |
|
"learning_rate": 7.2859744990892545e-06, |
|
"loss": 0.0834, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.072131147540984, |
|
"grad_norm": 0.8504851460456848, |
|
"learning_rate": 6.375227686703098e-06, |
|
"loss": 0.1502, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.088524590163935, |
|
"grad_norm": 0.8113920092582703, |
|
"learning_rate": 5.46448087431694e-06, |
|
"loss": 0.0143, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 8.101639344262296, |
|
"eval_accuracy": 0.8672199170124482, |
|
"eval_f1": 0.8666497215968316, |
|
"eval_loss": 0.40985623002052307, |
|
"eval_runtime": 79.8008, |
|
"eval_samples_per_second": 3.02, |
|
"eval_steps_per_second": 0.263, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 9.00327868852459, |
|
"grad_norm": 6.550829887390137, |
|
"learning_rate": 4.553734061930783e-06, |
|
"loss": 0.067, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.01967213114754, |
|
"grad_norm": 2.0722174644470215, |
|
"learning_rate": 3.6429872495446273e-06, |
|
"loss": 0.0158, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 9.036065573770491, |
|
"grad_norm": 0.09152109175920486, |
|
"learning_rate": 2.73224043715847e-06, |
|
"loss": 0.0203, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.052459016393442, |
|
"grad_norm": 0.12251006811857224, |
|
"learning_rate": 1.8214936247723136e-06, |
|
"loss": 0.0169, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.068852459016393, |
|
"grad_norm": 0.1367557793855667, |
|
"learning_rate": 9.107468123861568e-07, |
|
"loss": 0.0043, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.085245901639345, |
|
"grad_norm": 6.975312232971191, |
|
"learning_rate": 0.0, |
|
"loss": 0.1311, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.085245901639345, |
|
"eval_accuracy": 0.8755186721991701, |
|
"eval_f1": 0.8751571381793353, |
|
"eval_loss": 0.40564292669296265, |
|
"eval_runtime": 76.4754, |
|
"eval_samples_per_second": 3.151, |
|
"eval_steps_per_second": 0.275, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.085245901639345, |
|
"step": 610, |
|
"total_flos": 6.389823367042892e+18, |
|
"train_loss": 0.24862837887933997, |
|
"train_runtime": 3777.5727, |
|
"train_samples_per_second": 1.938, |
|
"train_steps_per_second": 0.161 |
|
}, |
|
{ |
|
"epoch": 9.085245901639345, |
|
"eval_accuracy": 0.9824561403508771, |
|
"eval_f1": 0.9824594539682324, |
|
"eval_loss": 0.04523608461022377, |
|
"eval_runtime": 250.7162, |
|
"eval_samples_per_second": 2.956, |
|
"eval_steps_per_second": 0.247, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.085245901639345, |
|
"eval_accuracy": 0.8337078651685393, |
|
"eval_f1": 0.8322580739791249, |
|
"eval_loss": 0.6123429536819458, |
|
"eval_runtime": 140.7823, |
|
"eval_samples_per_second": 3.161, |
|
"eval_steps_per_second": 0.27, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 9.085245901639345, |
|
"eval_accuracy": 0.8838174273858921, |
|
"eval_f1": 0.8838017754864972, |
|
"eval_loss": 0.40273845195770264, |
|
"eval_runtime": 76.8071, |
|
"eval_samples_per_second": 3.138, |
|
"eval_steps_per_second": 0.273, |
|
"step": 610 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 610, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.389823367042892e+18, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|