kalita's picture
Model save
415bc54 verified
{
"best_metric": 0.927536231884058,
"best_model_checkpoint": "videomae-base-finetuned-ssv2-finetuned-traffic-dataset-mae/checkpoint-456",
"epoch": 7.125,
"eval_steps": 500,
"global_step": 608,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"grad_norm": 10.86192798614502,
"learning_rate": 8.196721311475409e-06,
"loss": 0.6065,
"step": 10
},
{
"epoch": 0.03,
"grad_norm": 5.969607830047607,
"learning_rate": 1.6393442622950818e-05,
"loss": 0.5224,
"step": 20
},
{
"epoch": 0.05,
"grad_norm": 7.2913336753845215,
"learning_rate": 2.459016393442623e-05,
"loss": 0.5223,
"step": 30
},
{
"epoch": 0.07,
"grad_norm": 1.6693388223648071,
"learning_rate": 3.2786885245901635e-05,
"loss": 0.1888,
"step": 40
},
{
"epoch": 0.08,
"grad_norm": 1.9622797966003418,
"learning_rate": 4.098360655737705e-05,
"loss": 0.2521,
"step": 50
},
{
"epoch": 0.1,
"grad_norm": 1.1212589740753174,
"learning_rate": 4.918032786885246e-05,
"loss": 0.0138,
"step": 60
},
{
"epoch": 0.12,
"grad_norm": 0.07918030023574829,
"learning_rate": 4.917733089579525e-05,
"loss": 0.0309,
"step": 70
},
{
"epoch": 0.12,
"eval_accuracy": 0.9130434782608695,
"eval_loss": 0.30077555775642395,
"eval_runtime": 121.2516,
"eval_samples_per_second": 0.569,
"eval_steps_per_second": 0.289,
"step": 76
},
{
"epoch": 1.01,
"grad_norm": 0.09372598677873611,
"learning_rate": 4.826325411334552e-05,
"loss": 0.0042,
"step": 80
},
{
"epoch": 1.02,
"grad_norm": 0.016796614974737167,
"learning_rate": 4.73491773308958e-05,
"loss": 0.0007,
"step": 90
},
{
"epoch": 1.04,
"grad_norm": 0.020755194127559662,
"learning_rate": 4.643510054844607e-05,
"loss": 0.3019,
"step": 100
},
{
"epoch": 1.06,
"grad_norm": 0.011669596657156944,
"learning_rate": 4.5521023765996346e-05,
"loss": 0.0003,
"step": 110
},
{
"epoch": 1.07,
"grad_norm": 0.003766011679545045,
"learning_rate": 4.460694698354662e-05,
"loss": 0.0002,
"step": 120
},
{
"epoch": 1.09,
"grad_norm": 0.08908724784851074,
"learning_rate": 4.369287020109689e-05,
"loss": 0.0002,
"step": 130
},
{
"epoch": 1.11,
"grad_norm": 0.005499520804733038,
"learning_rate": 4.2778793418647164e-05,
"loss": 0.0002,
"step": 140
},
{
"epoch": 1.12,
"grad_norm": 0.005470686126500368,
"learning_rate": 4.1864716636197444e-05,
"loss": 0.0002,
"step": 150
},
{
"epoch": 1.12,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 2.1030023097991943,
"eval_runtime": 100.6515,
"eval_samples_per_second": 0.686,
"eval_steps_per_second": 0.348,
"step": 152
},
{
"epoch": 2.01,
"grad_norm": 0.0026209524367004633,
"learning_rate": 4.095063985374772e-05,
"loss": 0.0001,
"step": 160
},
{
"epoch": 2.03,
"grad_norm": 0.009242965839803219,
"learning_rate": 4.003656307129799e-05,
"loss": 0.0001,
"step": 170
},
{
"epoch": 2.05,
"grad_norm": 0.0026453358586877584,
"learning_rate": 3.912248628884826e-05,
"loss": 0.0001,
"step": 180
},
{
"epoch": 2.06,
"grad_norm": 0.0033517158590257168,
"learning_rate": 3.820840950639854e-05,
"loss": 0.0001,
"step": 190
},
{
"epoch": 2.08,
"grad_norm": 0.003627925645560026,
"learning_rate": 3.7294332723948815e-05,
"loss": 0.0001,
"step": 200
},
{
"epoch": 2.1,
"grad_norm": 0.002879067324101925,
"learning_rate": 3.638025594149909e-05,
"loss": 0.0001,
"step": 210
},
{
"epoch": 2.11,
"grad_norm": 0.0036734691821038723,
"learning_rate": 3.546617915904936e-05,
"loss": 0.0001,
"step": 220
},
{
"epoch": 2.12,
"eval_accuracy": 0.7101449275362319,
"eval_loss": 1.84578275680542,
"eval_runtime": 105.6436,
"eval_samples_per_second": 0.653,
"eval_steps_per_second": 0.331,
"step": 228
},
{
"epoch": 3.0,
"grad_norm": 0.0011968453181907535,
"learning_rate": 3.455210237659964e-05,
"loss": 0.0001,
"step": 230
},
{
"epoch": 3.02,
"grad_norm": 0.0024677019100636244,
"learning_rate": 3.3638025594149906e-05,
"loss": 0.0001,
"step": 240
},
{
"epoch": 3.04,
"grad_norm": 0.0011525729205459356,
"learning_rate": 3.2723948811700186e-05,
"loss": 0.0001,
"step": 250
},
{
"epoch": 3.05,
"grad_norm": 0.003028567647561431,
"learning_rate": 3.180987202925046e-05,
"loss": 0.0001,
"step": 260
},
{
"epoch": 3.07,
"grad_norm": 0.0027707908302545547,
"learning_rate": 3.089579524680074e-05,
"loss": 0.0001,
"step": 270
},
{
"epoch": 3.09,
"grad_norm": 0.0017000396037474275,
"learning_rate": 2.9981718464351004e-05,
"loss": 0.0001,
"step": 280
},
{
"epoch": 3.1,
"grad_norm": 0.002608270151540637,
"learning_rate": 2.906764168190128e-05,
"loss": 0.0001,
"step": 290
},
{
"epoch": 3.12,
"grad_norm": 0.0012394236400723457,
"learning_rate": 2.8153564899451557e-05,
"loss": 0.0,
"step": 300
},
{
"epoch": 3.12,
"eval_accuracy": 0.7391304347826086,
"eval_loss": 1.5200165510177612,
"eval_runtime": 117.7366,
"eval_samples_per_second": 0.586,
"eval_steps_per_second": 0.297,
"step": 304
},
{
"epoch": 4.01,
"grad_norm": 0.00229161255992949,
"learning_rate": 2.7239488117001826e-05,
"loss": 0.0001,
"step": 310
},
{
"epoch": 4.03,
"grad_norm": 0.0013108043931424618,
"learning_rate": 2.6325411334552102e-05,
"loss": 0.0001,
"step": 320
},
{
"epoch": 4.04,
"grad_norm": 0.0012365940492600203,
"learning_rate": 2.541133455210238e-05,
"loss": 0.0,
"step": 330
},
{
"epoch": 4.06,
"grad_norm": 0.002082814695313573,
"learning_rate": 2.449725776965265e-05,
"loss": 0.0001,
"step": 340
},
{
"epoch": 4.08,
"grad_norm": 0.0035416895989328623,
"learning_rate": 2.3583180987202927e-05,
"loss": 0.0001,
"step": 350
},
{
"epoch": 4.09,
"grad_norm": 0.0009142422350123525,
"learning_rate": 2.26691042047532e-05,
"loss": 0.0001,
"step": 360
},
{
"epoch": 4.11,
"grad_norm": 0.0014322904171422124,
"learning_rate": 2.1755027422303476e-05,
"loss": 0.0001,
"step": 370
},
{
"epoch": 4.12,
"grad_norm": 0.0017242878675460815,
"learning_rate": 2.084095063985375e-05,
"loss": 0.0001,
"step": 380
},
{
"epoch": 4.12,
"eval_accuracy": 0.7536231884057971,
"eval_loss": 1.4568772315979004,
"eval_runtime": 106.8623,
"eval_samples_per_second": 0.646,
"eval_steps_per_second": 0.328,
"step": 380
},
{
"epoch": 5.02,
"grad_norm": 0.0011396174086257815,
"learning_rate": 1.9926873857404025e-05,
"loss": 0.0001,
"step": 390
},
{
"epoch": 5.03,
"grad_norm": 0.0009178342879749835,
"learning_rate": 1.90127970749543e-05,
"loss": 0.0,
"step": 400
},
{
"epoch": 5.05,
"grad_norm": 0.0034428227227181196,
"learning_rate": 1.809872029250457e-05,
"loss": 0.0,
"step": 410
},
{
"epoch": 5.07,
"grad_norm": 0.0011987154139205813,
"learning_rate": 1.7184643510054847e-05,
"loss": 0.0,
"step": 420
},
{
"epoch": 5.08,
"grad_norm": 0.000779572525061667,
"learning_rate": 1.627056672760512e-05,
"loss": 0.0,
"step": 430
},
{
"epoch": 5.1,
"grad_norm": 0.0013000366743654013,
"learning_rate": 1.5356489945155393e-05,
"loss": 0.0003,
"step": 440
},
{
"epoch": 5.12,
"grad_norm": 0.0019034247379750013,
"learning_rate": 1.4442413162705667e-05,
"loss": 0.0,
"step": 450
},
{
"epoch": 5.12,
"eval_accuracy": 0.927536231884058,
"eval_loss": 0.39408108592033386,
"eval_runtime": 103.5083,
"eval_samples_per_second": 0.667,
"eval_steps_per_second": 0.338,
"step": 456
},
{
"epoch": 6.01,
"grad_norm": 0.0016774114919826388,
"learning_rate": 1.3528336380255944e-05,
"loss": 0.0005,
"step": 460
},
{
"epoch": 6.02,
"grad_norm": 0.0013074069283902645,
"learning_rate": 1.2614259597806216e-05,
"loss": 0.0001,
"step": 470
},
{
"epoch": 6.04,
"grad_norm": 0.0015878825215622783,
"learning_rate": 1.1700182815356491e-05,
"loss": 0.0001,
"step": 480
},
{
"epoch": 6.06,
"grad_norm": 0.0008045001304708421,
"learning_rate": 1.0786106032906766e-05,
"loss": 0.0001,
"step": 490
},
{
"epoch": 6.07,
"grad_norm": 0.0012260900111868978,
"learning_rate": 9.872029250457038e-06,
"loss": 0.0002,
"step": 500
},
{
"epoch": 6.09,
"grad_norm": 0.001507502980530262,
"learning_rate": 8.957952468007313e-06,
"loss": 0.0001,
"step": 510
},
{
"epoch": 6.11,
"grad_norm": 0.001074848580174148,
"learning_rate": 8.043875685557587e-06,
"loss": 0.0001,
"step": 520
},
{
"epoch": 6.12,
"grad_norm": 0.0012807522434741259,
"learning_rate": 7.129798903107861e-06,
"loss": 0.0001,
"step": 530
},
{
"epoch": 6.12,
"eval_accuracy": 0.8695652173913043,
"eval_loss": 0.9657596945762634,
"eval_runtime": 105.1521,
"eval_samples_per_second": 0.656,
"eval_steps_per_second": 0.333,
"step": 532
},
{
"epoch": 7.01,
"grad_norm": 0.0008385140681639314,
"learning_rate": 6.2157221206581355e-06,
"loss": 0.0,
"step": 540
},
{
"epoch": 7.03,
"grad_norm": 0.001427665469236672,
"learning_rate": 5.301645338208409e-06,
"loss": 0.0,
"step": 550
},
{
"epoch": 7.05,
"grad_norm": 0.001054079388268292,
"learning_rate": 4.387568555758684e-06,
"loss": 0.0001,
"step": 560
},
{
"epoch": 7.06,
"grad_norm": 0.0016839707968756557,
"learning_rate": 3.4734917733089582e-06,
"loss": 0.0001,
"step": 570
},
{
"epoch": 7.08,
"grad_norm": 0.0011065697763115168,
"learning_rate": 2.5594149908592323e-06,
"loss": 0.0,
"step": 580
},
{
"epoch": 7.1,
"grad_norm": 0.0027244570665061474,
"learning_rate": 1.6453382084095064e-06,
"loss": 0.0,
"step": 590
},
{
"epoch": 7.11,
"grad_norm": 0.002320781582966447,
"learning_rate": 7.312614259597807e-07,
"loss": 0.0001,
"step": 600
},
{
"epoch": 7.12,
"eval_accuracy": 0.8405797101449275,
"eval_loss": 0.983595609664917,
"eval_runtime": 105.1841,
"eval_samples_per_second": 0.656,
"eval_steps_per_second": 0.333,
"step": 608
},
{
"epoch": 7.12,
"step": 608,
"total_flos": 1.5152145407976407e+18,
"train_loss": 0.040272084387700156,
"train_runtime": 2933.2373,
"train_samples_per_second": 0.415,
"train_steps_per_second": 0.207
},
{
"epoch": 7.12,
"eval_accuracy": 0.375,
"eval_loss": 3.811370849609375,
"eval_runtime": 15.0567,
"eval_samples_per_second": 1.063,
"eval_steps_per_second": 0.531,
"step": 608
},
{
"epoch": 7.12,
"eval_accuracy": 0.375,
"eval_loss": 3.811370849609375,
"eval_runtime": 14.4555,
"eval_samples_per_second": 1.107,
"eval_steps_per_second": 0.553,
"step": 608
}
],
"logging_steps": 10,
"max_steps": 608,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"total_flos": 1.5152145407976407e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}