{ "best_metric": 0.8838017754864972, "best_model_checkpoint": "videomae-surf-analytics-runpod/checkpoint-372", "epoch": 9.085245901639345, "eval_steps": 500, "global_step": 610, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01639344262295082, "grad_norm": 14.228123664855957, "learning_rate": 8.196721311475409e-06, "loss": 1.5374, "step": 10 }, { "epoch": 0.03278688524590164, "grad_norm": 8.263284683227539, "learning_rate": 1.6393442622950818e-05, "loss": 1.33, "step": 20 }, { "epoch": 0.04918032786885246, "grad_norm": 10.129837989807129, "learning_rate": 2.459016393442623e-05, "loss": 1.1214, "step": 30 }, { "epoch": 0.06557377049180328, "grad_norm": 5.447906017303467, "learning_rate": 3.2786885245901635e-05, "loss": 1.023, "step": 40 }, { "epoch": 0.08196721311475409, "grad_norm": 5.764439105987549, "learning_rate": 4.098360655737705e-05, "loss": 0.7497, "step": 50 }, { "epoch": 0.09836065573770492, "grad_norm": 6.531442165374756, "learning_rate": 4.918032786885246e-05, "loss": 0.6712, "step": 60 }, { "epoch": 0.10163934426229508, "eval_accuracy": 0.6680497925311203, "eval_f1": 0.6623298693885865, "eval_loss": 0.8671284317970276, "eval_runtime": 81.9529, "eval_samples_per_second": 2.941, "eval_steps_per_second": 0.256, "step": 62 }, { "epoch": 1.0131147540983607, "grad_norm": 6.3692240715026855, "learning_rate": 4.918032786885246e-05, "loss": 0.5055, "step": 70 }, { "epoch": 1.0295081967213116, "grad_norm": 7.524365425109863, "learning_rate": 4.8269581056466304e-05, "loss": 0.4465, "step": 80 }, { "epoch": 1.0459016393442624, "grad_norm": 3.4210591316223145, "learning_rate": 4.7358834244080144e-05, "loss": 0.3804, "step": 90 }, { "epoch": 1.0622950819672132, "grad_norm": 7.561025619506836, "learning_rate": 4.644808743169399e-05, "loss": 0.4557, "step": 100 }, { "epoch": 1.0786885245901638, "grad_norm": 2.567615509033203, "learning_rate": 4.553734061930783e-05, "loss": 0.3558, "step": 110 }, { "epoch": 1.0950819672131147, "grad_norm": 5.366397380828857, "learning_rate": 4.462659380692168e-05, "loss": 0.3119, "step": 120 }, { "epoch": 1.1016393442622952, "eval_accuracy": 0.7883817427385892, "eval_f1": 0.7887466325291007, "eval_loss": 0.5910844802856445, "eval_runtime": 82.6936, "eval_samples_per_second": 2.914, "eval_steps_per_second": 0.254, "step": 124 }, { "epoch": 2.0098360655737704, "grad_norm": 2.0202689170837402, "learning_rate": 4.371584699453552e-05, "loss": 0.2248, "step": 130 }, { "epoch": 2.0262295081967214, "grad_norm": 5.155245780944824, "learning_rate": 4.280510018214937e-05, "loss": 0.2797, "step": 140 }, { "epoch": 2.042622950819672, "grad_norm": 8.79983139038086, "learning_rate": 4.189435336976321e-05, "loss": 0.2696, "step": 150 }, { "epoch": 2.059016393442623, "grad_norm": 2.6221516132354736, "learning_rate": 4.098360655737705e-05, "loss": 0.2733, "step": 160 }, { "epoch": 2.0754098360655737, "grad_norm": 1.32801353931427, "learning_rate": 4.007285974499089e-05, "loss": 0.159, "step": 170 }, { "epoch": 2.091803278688525, "grad_norm": 7.7792792320251465, "learning_rate": 3.916211293260474e-05, "loss": 0.2505, "step": 180 }, { "epoch": 2.101639344262295, "eval_accuracy": 0.8008298755186722, "eval_f1": 0.8002014090167812, "eval_loss": 0.5296825170516968, "eval_runtime": 80.6025, "eval_samples_per_second": 2.99, "eval_steps_per_second": 0.261, "step": 186 }, { "epoch": 3.0065573770491802, "grad_norm": 6.463658332824707, "learning_rate": 3.825136612021858e-05, "loss": 0.3667, "step": 190 }, { "epoch": 3.0229508196721313, "grad_norm": 7.710232257843018, "learning_rate": 3.7340619307832425e-05, "loss": 0.1461, "step": 200 }, { "epoch": 3.039344262295082, "grad_norm": 8.827251434326172, "learning_rate": 3.6429872495446266e-05, "loss": 0.1328, "step": 210 }, { "epoch": 3.055737704918033, "grad_norm": 14.754895210266113, "learning_rate": 3.551912568306011e-05, "loss": 0.4213, "step": 220 }, { "epoch": 3.0721311475409836, "grad_norm": 7.798481464385986, "learning_rate": 3.4608378870673954e-05, "loss": 0.2256, "step": 230 }, { "epoch": 3.088524590163934, "grad_norm": 1.4291377067565918, "learning_rate": 3.36976320582878e-05, "loss": 0.207, "step": 240 }, { "epoch": 3.101639344262295, "eval_accuracy": 0.7800829875518672, "eval_f1": 0.778687519310884, "eval_loss": 0.5970368981361389, "eval_runtime": 76.1019, "eval_samples_per_second": 3.167, "eval_steps_per_second": 0.276, "step": 248 }, { "epoch": 4.00327868852459, "grad_norm": 6.123374938964844, "learning_rate": 3.2786885245901635e-05, "loss": 0.1879, "step": 250 }, { "epoch": 4.019672131147541, "grad_norm": 2.9475815296173096, "learning_rate": 3.187613843351548e-05, "loss": 0.0801, "step": 260 }, { "epoch": 4.036065573770492, "grad_norm": 0.0647067278623581, "learning_rate": 3.096539162112932e-05, "loss": 0.0925, "step": 270 }, { "epoch": 4.052459016393443, "grad_norm": 2.0591495037078857, "learning_rate": 3.005464480874317e-05, "loss": 0.1952, "step": 280 }, { "epoch": 4.0688524590163935, "grad_norm": 9.379964828491211, "learning_rate": 2.9143897996357018e-05, "loss": 0.2213, "step": 290 }, { "epoch": 4.085245901639344, "grad_norm": 0.2886744737625122, "learning_rate": 2.823315118397086e-05, "loss": 0.1651, "step": 300 }, { "epoch": 4.101639344262295, "grad_norm": 10.280256271362305, "learning_rate": 2.7322404371584703e-05, "loss": 0.1743, "step": 310 }, { "epoch": 4.101639344262295, "eval_accuracy": 0.8049792531120332, "eval_f1": 0.7984134204702362, "eval_loss": 0.5612084865570068, "eval_runtime": 76.7892, "eval_samples_per_second": 3.138, "eval_steps_per_second": 0.273, "step": 310 }, { "epoch": 5.016393442622951, "grad_norm": 0.10767544060945511, "learning_rate": 2.6411657559198543e-05, "loss": 0.1745, "step": 320 }, { "epoch": 5.032786885245901, "grad_norm": 0.31735455989837646, "learning_rate": 2.550091074681239e-05, "loss": 0.0893, "step": 330 }, { "epoch": 5.049180327868853, "grad_norm": 0.657698392868042, "learning_rate": 2.459016393442623e-05, "loss": 0.0209, "step": 340 }, { "epoch": 5.065573770491803, "grad_norm": 6.128834247589111, "learning_rate": 2.3679417122040072e-05, "loss": 0.0931, "step": 350 }, { "epoch": 5.081967213114754, "grad_norm": 3.8232431411743164, "learning_rate": 2.2768670309653916e-05, "loss": 0.1862, "step": 360 }, { "epoch": 5.098360655737705, "grad_norm": 0.28946903347969055, "learning_rate": 2.185792349726776e-05, "loss": 0.1005, "step": 370 }, { "epoch": 5.101639344262295, "eval_accuracy": 0.8838174273858921, "eval_f1": 0.8838017754864972, "eval_loss": 0.40273839235305786, "eval_runtime": 79.1005, "eval_samples_per_second": 3.047, "eval_steps_per_second": 0.265, "step": 372 }, { "epoch": 6.0131147540983605, "grad_norm": 7.813482761383057, "learning_rate": 2.0947176684881604e-05, "loss": 0.1164, "step": 380 }, { "epoch": 6.029508196721311, "grad_norm": 4.695272445678711, "learning_rate": 2.0036429872495445e-05, "loss": 0.128, "step": 390 }, { "epoch": 6.045901639344263, "grad_norm": 0.22056636214256287, "learning_rate": 1.912568306010929e-05, "loss": 0.1201, "step": 400 }, { "epoch": 6.062295081967213, "grad_norm": 0.24654638767242432, "learning_rate": 1.8214936247723133e-05, "loss": 0.0823, "step": 410 }, { "epoch": 6.078688524590164, "grad_norm": 1.5276825428009033, "learning_rate": 1.7304189435336977e-05, "loss": 0.0994, "step": 420 }, { "epoch": 6.0950819672131145, "grad_norm": 5.22976016998291, "learning_rate": 1.6393442622950818e-05, "loss": 0.0147, "step": 430 }, { "epoch": 6.101639344262295, "eval_accuracy": 0.8589211618257261, "eval_f1": 0.8572866763193951, "eval_loss": 0.4360053241252899, "eval_runtime": 76.601, "eval_samples_per_second": 3.146, "eval_steps_per_second": 0.274, "step": 434 }, { "epoch": 7.00983606557377, "grad_norm": 0.01562822423875332, "learning_rate": 1.548269581056466e-05, "loss": 0.0178, "step": 440 }, { "epoch": 7.026229508196721, "grad_norm": 0.15828734636306763, "learning_rate": 1.4571948998178509e-05, "loss": 0.1265, "step": 450 }, { "epoch": 7.0426229508196725, "grad_norm": 0.9054508805274963, "learning_rate": 1.3661202185792351e-05, "loss": 0.0251, "step": 460 }, { "epoch": 7.059016393442623, "grad_norm": 0.2960349917411804, "learning_rate": 1.2750455373406195e-05, "loss": 0.0241, "step": 470 }, { "epoch": 7.075409836065574, "grad_norm": 0.08049295842647552, "learning_rate": 1.1839708561020036e-05, "loss": 0.0518, "step": 480 }, { "epoch": 7.091803278688524, "grad_norm": 0.25148022174835205, "learning_rate": 1.092896174863388e-05, "loss": 0.0573, "step": 490 }, { "epoch": 7.101639344262295, "eval_accuracy": 0.8713692946058091, "eval_f1": 0.8697143392786835, "eval_loss": 0.4450831115245819, "eval_runtime": 75.7767, "eval_samples_per_second": 3.18, "eval_steps_per_second": 0.277, "step": 496 }, { "epoch": 8.00655737704918, "grad_norm": 0.028504155576229095, "learning_rate": 1.0018214936247722e-05, "loss": 0.0705, "step": 500 }, { "epoch": 8.02295081967213, "grad_norm": 0.7975661158561707, "learning_rate": 9.107468123861566e-06, "loss": 0.0547, "step": 510 }, { "epoch": 8.039344262295081, "grad_norm": 0.035094812512397766, "learning_rate": 8.196721311475409e-06, "loss": 0.0484, "step": 520 }, { "epoch": 8.055737704918032, "grad_norm": 4.1898651123046875, "learning_rate": 7.2859744990892545e-06, "loss": 0.0834, "step": 530 }, { "epoch": 8.072131147540984, "grad_norm": 0.8504851460456848, "learning_rate": 6.375227686703098e-06, "loss": 0.1502, "step": 540 }, { "epoch": 8.088524590163935, "grad_norm": 0.8113920092582703, "learning_rate": 5.46448087431694e-06, "loss": 0.0143, "step": 550 }, { "epoch": 8.101639344262296, "eval_accuracy": 0.8672199170124482, "eval_f1": 0.8666497215968316, "eval_loss": 0.40985623002052307, "eval_runtime": 79.8008, "eval_samples_per_second": 3.02, "eval_steps_per_second": 0.263, "step": 558 }, { "epoch": 9.00327868852459, "grad_norm": 6.550829887390137, "learning_rate": 4.553734061930783e-06, "loss": 0.067, "step": 560 }, { "epoch": 9.01967213114754, "grad_norm": 2.0722174644470215, "learning_rate": 3.6429872495446273e-06, "loss": 0.0158, "step": 570 }, { "epoch": 9.036065573770491, "grad_norm": 0.09152109175920486, "learning_rate": 2.73224043715847e-06, "loss": 0.0203, "step": 580 }, { "epoch": 9.052459016393442, "grad_norm": 0.12251006811857224, "learning_rate": 1.8214936247723136e-06, "loss": 0.0169, "step": 590 }, { "epoch": 9.068852459016393, "grad_norm": 0.1367557793855667, "learning_rate": 9.107468123861568e-07, "loss": 0.0043, "step": 600 }, { "epoch": 9.085245901639345, "grad_norm": 6.975312232971191, "learning_rate": 0.0, "loss": 0.1311, "step": 610 }, { "epoch": 9.085245901639345, "eval_accuracy": 0.8755186721991701, "eval_f1": 0.8751571381793353, "eval_loss": 0.40564292669296265, "eval_runtime": 76.4754, "eval_samples_per_second": 3.151, "eval_steps_per_second": 0.275, "step": 610 }, { "epoch": 9.085245901639345, "step": 610, "total_flos": 6.389823367042892e+18, "train_loss": 0.24862837887933997, "train_runtime": 3777.5727, "train_samples_per_second": 1.938, "train_steps_per_second": 0.161 }, { "epoch": 9.085245901639345, "eval_accuracy": 0.9824561403508771, "eval_f1": 0.9824594539682324, "eval_loss": 0.04523608461022377, "eval_runtime": 250.7162, "eval_samples_per_second": 2.956, "eval_steps_per_second": 0.247, "step": 610 }, { "epoch": 9.085245901639345, "eval_accuracy": 0.8337078651685393, "eval_f1": 0.8322580739791249, "eval_loss": 0.6123429536819458, "eval_runtime": 140.7823, "eval_samples_per_second": 3.161, "eval_steps_per_second": 0.27, "step": 610 }, { "epoch": 9.085245901639345, "eval_accuracy": 0.8838174273858921, "eval_f1": 0.8838017754864972, "eval_loss": 0.40273845195770264, "eval_runtime": 76.8071, "eval_samples_per_second": 3.138, "eval_steps_per_second": 0.273, "step": 610 } ], "logging_steps": 10, "max_steps": 610, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.389823367042892e+18, "train_batch_size": 12, "trial_name": null, "trial_params": null }