Upload 8 files

Browse files

Files changed (8) hide show

config.json +46 -0
model.safetensors +3 -0
optimizer.pt +3 -0
preprocessor_config.json +22 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +682 -0
training_args.bin +3 -0

config.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "_name_or_path": "google/vit-base-patch16-224",
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "Gatak",
+    "1": "Kelihos_ver1",
+    "2": "Kelihos_ver3",
+    "3": "Lollipop",
+    "4": "Obfuscator.ACY",
+    "5": "Ramnit",
+    "6": "Simda",
+    "7": "Tracur",
+    "8": "Vundo"
+  },
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "Gatak": 0,
+    "Kelihos_ver1": 1,
+    "Kelihos_ver3": 2,
+    "Lollipop": 3,
+    "Obfuscator.ACY": 4,
+    "Ramnit": 5,
+    "Simda": 6,
+    "Tracur": 7,
+    "Vundo": 8
+  },
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d639cfa6c0c0a6dff1bc1dc022fa0318fa4467a430429788e3e7f6ce1274c8fe
+size 343245508

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:149980f70f4056542cce52de73abd4d087fd4b61c033dd4d6bad54dca6eabe68
+size 686611898

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "ViTImageProcessor",
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "resample": 2,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:27a2701b91920a744cb212eaaa2227a252769180fa2250531b99f54a7a77e59c
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,682 @@

+{
+  "best_metric": 0.983640081799591,
+  "best_model_checkpoint": "/home/user/Desktop/ViT/microsoft/vit_finetuned/checkpoint-4405",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 4405,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06,
+      "grad_norm": 7.11489200592041,
+      "learning_rate": 1.9772985244040862e-05,
+      "loss": 1.3281,
+      "step": 50
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 7.982043266296387,
+      "learning_rate": 1.9545970488081726e-05,
+      "loss": 0.855,
+      "step": 100
+    },
+    {
+      "epoch": 0.17,
+      "grad_norm": 10.429923057556152,
+      "learning_rate": 1.9318955732122587e-05,
+      "loss": 0.5551,
+      "step": 150
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 8.25513744354248,
+      "learning_rate": 1.909194097616345e-05,
+      "loss": 0.4069,
+      "step": 200
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 8.956159591674805,
+      "learning_rate": 1.8864926220204315e-05,
+      "loss": 0.3274,
+      "step": 250
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 1.4499558210372925,
+      "learning_rate": 1.863791146424518e-05,
+      "loss": 0.2836,
+      "step": 300
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 8.211179733276367,
+      "learning_rate": 1.841089670828604e-05,
+      "loss": 0.2992,
+      "step": 350
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 8.798752784729004,
+      "learning_rate": 1.8183881952326903e-05,
+      "loss": 0.2276,
+      "step": 400
+    },
+    {
+      "epoch": 0.51,
+      "grad_norm": 0.3845123052597046,
+      "learning_rate": 1.7956867196367764e-05,
+      "loss": 0.2877,
+      "step": 450
+    },
+    {
+      "epoch": 0.57,
+      "grad_norm": 15.287618637084961,
+      "learning_rate": 1.7729852440408628e-05,
+      "loss": 0.3295,
+      "step": 500
+    },
+    {
+      "epoch": 0.62,
+      "grad_norm": 19.116310119628906,
+      "learning_rate": 1.750283768444949e-05,
+      "loss": 0.2301,
+      "step": 550
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 8.03623104095459,
+      "learning_rate": 1.7275822928490352e-05,
+      "loss": 0.1888,
+      "step": 600
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 6.62972354888916,
+      "learning_rate": 1.7048808172531216e-05,
+      "loss": 0.2183,
+      "step": 650
+    },
+    {
+      "epoch": 0.79,
+      "grad_norm": 2.2878506183624268,
+      "learning_rate": 1.6821793416572077e-05,
+      "loss": 0.1921,
+      "step": 700
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 11.028101921081543,
+      "learning_rate": 1.659477866061294e-05,
+      "loss": 0.2508,
+      "step": 750
+    },
+    {
+      "epoch": 0.91,
+      "grad_norm": 19.02913475036621,
+      "learning_rate": 1.6367763904653805e-05,
+      "loss": 0.2718,
+      "step": 800
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 2.5358214378356934,
+      "learning_rate": 1.6140749148694666e-05,
+      "loss": 0.1693,
+      "step": 850
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9611451942740287,
+      "eval_loss": 0.14811581373214722,
+      "eval_runtime": 75.4555,
+      "eval_samples_per_second": 12.961,
+      "eval_steps_per_second": 3.247,
+      "step": 881
+    },
+    {
+      "epoch": 1.02,
+      "grad_norm": 0.04142925515770912,
+      "learning_rate": 1.591373439273553e-05,
+      "loss": 0.0968,
+      "step": 900
+    },
+    {
+      "epoch": 1.08,
+      "grad_norm": 12.202880859375,
+      "learning_rate": 1.568671963677639e-05,
+      "loss": 0.1501,
+      "step": 950
+    },
+    {
+      "epoch": 1.14,
+      "grad_norm": 0.7614470720291138,
+      "learning_rate": 1.5459704880817254e-05,
+      "loss": 0.1543,
+      "step": 1000
+    },
+    {
+      "epoch": 1.19,
+      "grad_norm": 13.012730598449707,
+      "learning_rate": 1.5232690124858116e-05,
+      "loss": 0.2138,
+      "step": 1050
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 25.198537826538086,
+      "learning_rate": 1.500567536889898e-05,
+      "loss": 0.1441,
+      "step": 1100
+    },
+    {
+      "epoch": 1.31,
+      "grad_norm": 10.802690505981445,
+      "learning_rate": 1.4778660612939841e-05,
+      "loss": 0.1082,
+      "step": 1150
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 32.39842224121094,
+      "learning_rate": 1.4551645856980705e-05,
+      "loss": 0.1193,
+      "step": 1200
+    },
+    {
+      "epoch": 1.42,
+      "grad_norm": 0.05127358064055443,
+      "learning_rate": 1.4324631101021567e-05,
+      "loss": 0.1431,
+      "step": 1250
+    },
+    {
+      "epoch": 1.48,
+      "grad_norm": 15.374510765075684,
+      "learning_rate": 1.409761634506243e-05,
+      "loss": 0.1306,
+      "step": 1300
+    },
+    {
+      "epoch": 1.53,
+      "grad_norm": 22.718175888061523,
+      "learning_rate": 1.3870601589103292e-05,
+      "loss": 0.1379,
+      "step": 1350
+    },
+    {
+      "epoch": 1.59,
+      "grad_norm": 0.05202079191803932,
+      "learning_rate": 1.3643586833144156e-05,
+      "loss": 0.1174,
+      "step": 1400
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 0.06224975362420082,
+      "learning_rate": 1.3416572077185016e-05,
+      "loss": 0.1303,
+      "step": 1450
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 11.104813575744629,
+      "learning_rate": 1.318955732122588e-05,
+      "loss": 0.1536,
+      "step": 1500
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 9.409710884094238,
+      "learning_rate": 1.2962542565266743e-05,
+      "loss": 0.1594,
+      "step": 1550
+    },
+    {
+      "epoch": 1.82,
+      "grad_norm": 1.2388224601745605,
+      "learning_rate": 1.2735527809307607e-05,
+      "loss": 0.0914,
+      "step": 1600
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 12.70537281036377,
+      "learning_rate": 1.2508513053348467e-05,
+      "loss": 0.1253,
+      "step": 1650
+    },
+    {
+      "epoch": 1.93,
+      "grad_norm": 19.78186798095703,
+      "learning_rate": 1.2281498297389331e-05,
+      "loss": 0.12,
+      "step": 1700
+    },
+    {
+      "epoch": 1.99,
+      "grad_norm": 1.3154646158218384,
+      "learning_rate": 1.2054483541430193e-05,
+      "loss": 0.1244,
+      "step": 1750
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9693251533742331,
+      "eval_loss": 0.11500896513462067,
+      "eval_runtime": 73.5646,
+      "eval_samples_per_second": 13.294,
+      "eval_steps_per_second": 3.33,
+      "step": 1762
+    },
+    {
+      "epoch": 2.04,
+      "grad_norm": 0.14741405844688416,
+      "learning_rate": 1.1827468785471057e-05,
+      "loss": 0.0995,
+      "step": 1800
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 0.8733153939247131,
+      "learning_rate": 1.1600454029511918e-05,
+      "loss": 0.041,
+      "step": 1850
+    },
+    {
+      "epoch": 2.16,
+      "grad_norm": 0.20158135890960693,
+      "learning_rate": 1.1373439273552782e-05,
+      "loss": 0.025,
+      "step": 1900
+    },
+    {
+      "epoch": 2.21,
+      "grad_norm": 0.3511994779109955,
+      "learning_rate": 1.1146424517593644e-05,
+      "loss": 0.1116,
+      "step": 1950
+    },
+    {
+      "epoch": 2.27,
+      "grad_norm": 0.01977057382464409,
+      "learning_rate": 1.0919409761634507e-05,
+      "loss": 0.0746,
+      "step": 2000
+    },
+    {
+      "epoch": 2.33,
+      "grad_norm": 0.0061535644344985485,
+      "learning_rate": 1.0692395005675369e-05,
+      "loss": 0.0586,
+      "step": 2050
+    },
+    {
+      "epoch": 2.38,
+      "grad_norm": 0.009720620699226856,
+      "learning_rate": 1.0465380249716233e-05,
+      "loss": 0.0499,
+      "step": 2100
+    },
+    {
+      "epoch": 2.44,
+      "grad_norm": 0.07518770545721054,
+      "learning_rate": 1.0238365493757093e-05,
+      "loss": 0.0497,
+      "step": 2150
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.17679017782211304,
+      "learning_rate": 1.0011350737797957e-05,
+      "loss": 0.1088,
+      "step": 2200
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 0.010072373785078526,
+      "learning_rate": 9.784335981838821e-06,
+      "loss": 0.0475,
+      "step": 2250
+    },
+    {
+      "epoch": 2.61,
+      "grad_norm": 13.180130004882812,
+      "learning_rate": 9.557321225879684e-06,
+      "loss": 0.0702,
+      "step": 2300
+    },
+    {
+      "epoch": 2.67,
+      "grad_norm": 10.65009880065918,
+      "learning_rate": 9.330306469920546e-06,
+      "loss": 0.0956,
+      "step": 2350
+    },
+    {
+      "epoch": 2.72,
+      "grad_norm": 0.06876125931739807,
+      "learning_rate": 9.103291713961408e-06,
+      "loss": 0.1123,
+      "step": 2400
+    },
+    {
+      "epoch": 2.78,
+      "grad_norm": 0.03692874684929848,
+      "learning_rate": 8.87627695800227e-06,
+      "loss": 0.0345,
+      "step": 2450
+    },
+    {
+      "epoch": 2.84,
+      "grad_norm": 30.104206085205078,
+      "learning_rate": 8.649262202043135e-06,
+      "loss": 0.0515,
+      "step": 2500
+    },
+    {
+      "epoch": 2.89,
+      "grad_norm": 3.871462345123291,
+      "learning_rate": 8.422247446083997e-06,
+      "loss": 0.0661,
+      "step": 2550
+    },
+    {
+      "epoch": 2.95,
+      "grad_norm": 0.03757273405790329,
+      "learning_rate": 8.195232690124859e-06,
+      "loss": 0.0692,
+      "step": 2600
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9785276073619632,
+      "eval_loss": 0.11215907335281372,
+      "eval_runtime": 73.8279,
+      "eval_samples_per_second": 13.247,
+      "eval_steps_per_second": 3.319,
+      "step": 2643
+    },
+    {
+      "epoch": 3.01,
+      "grad_norm": 0.36702996492385864,
+      "learning_rate": 7.968217934165721e-06,
+      "loss": 0.0561,
+      "step": 2650
+    },
+    {
+      "epoch": 3.06,
+      "grad_norm": 0.0126617681235075,
+      "learning_rate": 7.741203178206584e-06,
+      "loss": 0.039,
+      "step": 2700
+    },
+    {
+      "epoch": 3.12,
+      "grad_norm": 0.009421919472515583,
+      "learning_rate": 7.514188422247447e-06,
+      "loss": 0.0243,
+      "step": 2750
+    },
+    {
+      "epoch": 3.18,
+      "grad_norm": 0.09766176342964172,
+      "learning_rate": 7.28717366628831e-06,
+      "loss": 0.0299,
+      "step": 2800
+    },
+    {
+      "epoch": 3.23,
+      "grad_norm": 0.0054335640743374825,
+      "learning_rate": 7.060158910329172e-06,
+      "loss": 0.0066,
+      "step": 2850
+    },
+    {
+      "epoch": 3.29,
+      "grad_norm": 0.046133432537317276,
+      "learning_rate": 6.833144154370035e-06,
+      "loss": 0.0275,
+      "step": 2900
+    },
+    {
+      "epoch": 3.35,
+      "grad_norm": 0.003417497966438532,
+      "learning_rate": 6.606129398410898e-06,
+      "loss": 0.0139,
+      "step": 2950
+    },
+    {
+      "epoch": 3.41,
+      "grad_norm": 0.009167753159999847,
+      "learning_rate": 6.37911464245176e-06,
+      "loss": 0.0553,
+      "step": 3000
+    },
+    {
+      "epoch": 3.46,
+      "grad_norm": 0.03221438080072403,
+      "learning_rate": 6.152099886492623e-06,
+      "loss": 0.0601,
+      "step": 3050
+    },
+    {
+      "epoch": 3.52,
+      "grad_norm": 15.037586212158203,
+      "learning_rate": 5.925085130533485e-06,
+      "loss": 0.0343,
+      "step": 3100
+    },
+    {
+      "epoch": 3.58,
+      "grad_norm": 0.18505249917507172,
+      "learning_rate": 5.6980703745743485e-06,
+      "loss": 0.0539,
+      "step": 3150
+    },
+    {
+      "epoch": 3.63,
+      "grad_norm": 24.898054122924805,
+      "learning_rate": 5.471055618615211e-06,
+      "loss": 0.0455,
+      "step": 3200
+    },
+    {
+      "epoch": 3.69,
+      "grad_norm": 7.430636882781982,
+      "learning_rate": 5.244040862656073e-06,
+      "loss": 0.0502,
+      "step": 3250
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 0.005094760097563267,
+      "learning_rate": 5.017026106696936e-06,
+      "loss": 0.0114,
+      "step": 3300
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 0.03962313383817673,
+      "learning_rate": 4.7900113507377985e-06,
+      "loss": 0.0173,
+      "step": 3350
+    },
+    {
+      "epoch": 3.86,
+      "grad_norm": 0.7573426961898804,
+      "learning_rate": 4.562996594778661e-06,
+      "loss": 0.0185,
+      "step": 3400
+    },
+    {
+      "epoch": 3.92,
+      "grad_norm": 0.03129027783870697,
+      "learning_rate": 4.335981838819524e-06,
+      "loss": 0.0262,
+      "step": 3450
+    },
+    {
+      "epoch": 3.97,
+      "grad_norm": 0.18763615190982819,
+      "learning_rate": 4.108967082860386e-06,
+      "loss": 0.0326,
+      "step": 3500
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9775051124744376,
+      "eval_loss": 0.09212471544742584,
+      "eval_runtime": 73.5389,
+      "eval_samples_per_second": 13.299,
+      "eval_steps_per_second": 3.332,
+      "step": 3524
+    },
+    {
+      "epoch": 4.03,
+      "grad_norm": 0.052001625299453735,
+      "learning_rate": 3.8819523269012485e-06,
+      "loss": 0.0174,
+      "step": 3550
+    },
+    {
+      "epoch": 4.09,
+      "grad_norm": 0.017523808404803276,
+      "learning_rate": 3.6549375709421116e-06,
+      "loss": 0.0081,
+      "step": 3600
+    },
+    {
+      "epoch": 4.14,
+      "grad_norm": 0.007012099493294954,
+      "learning_rate": 3.427922814982974e-06,
+      "loss": 0.0152,
+      "step": 3650
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.01468530111014843,
+      "learning_rate": 3.2009080590238366e-06,
+      "loss": 0.0098,
+      "step": 3700
+    },
+    {
+      "epoch": 4.26,
+      "grad_norm": 0.160551518201828,
+      "learning_rate": 2.9738933030646993e-06,
+      "loss": 0.01,
+      "step": 3750
+    },
+    {
+      "epoch": 4.31,
+      "grad_norm": 0.006342815700918436,
+      "learning_rate": 2.746878547105562e-06,
+      "loss": 0.001,
+      "step": 3800
+    },
+    {
+      "epoch": 4.37,
+      "grad_norm": 0.011349351145327091,
+      "learning_rate": 2.5198637911464247e-06,
+      "loss": 0.0135,
+      "step": 3850
+    },
+    {
+      "epoch": 4.43,
+      "grad_norm": 0.004315485712140799,
+      "learning_rate": 2.2928490351872874e-06,
+      "loss": 0.0061,
+      "step": 3900
+    },
+    {
+      "epoch": 4.48,
+      "grad_norm": 0.25492075085639954,
+      "learning_rate": 2.06583427922815e-06,
+      "loss": 0.0122,
+      "step": 3950
+    },
+    {
+      "epoch": 4.54,
+      "grad_norm": 0.00799469742923975,
+      "learning_rate": 1.8388195232690126e-06,
+      "loss": 0.0036,
+      "step": 4000
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 0.12055113166570663,
+      "learning_rate": 1.6118047673098751e-06,
+      "loss": 0.004,
+      "step": 4050
+    },
+    {
+      "epoch": 4.65,
+      "grad_norm": 0.014251478016376495,
+      "learning_rate": 1.3847900113507379e-06,
+      "loss": 0.0191,
+      "step": 4100
+    },
+    {
+      "epoch": 4.71,
+      "grad_norm": 0.007032675202935934,
+      "learning_rate": 1.1577752553916006e-06,
+      "loss": 0.0106,
+      "step": 4150
+    },
+    {
+      "epoch": 4.77,
+      "grad_norm": 0.007455560844391584,
+      "learning_rate": 9.307604994324632e-07,
+      "loss": 0.0081,
+      "step": 4200
+    },
+    {
+      "epoch": 4.82,
+      "grad_norm": 0.06628051400184631,
+      "learning_rate": 7.037457434733258e-07,
+      "loss": 0.0038,
+      "step": 4250
+    },
+    {
+      "epoch": 4.88,
+      "grad_norm": 0.004159100819379091,
+      "learning_rate": 4.7673098751418843e-07,
+      "loss": 0.0134,
+      "step": 4300
+    },
+    {
+      "epoch": 4.94,
+      "grad_norm": 0.0036380901001393795,
+      "learning_rate": 2.497162315550511e-07,
+      "loss": 0.0211,
+      "step": 4350
+    },
+    {
+      "epoch": 4.99,
+      "grad_norm": 0.0037857021670788527,
+      "learning_rate": 2.2701475595913736e-08,
+      "loss": 0.0235,
+      "step": 4400
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.983640081799591,
+      "eval_loss": 0.09139783680438995,
+      "eval_runtime": 73.8525,
+      "eval_samples_per_second": 13.243,
+      "eval_steps_per_second": 3.317,
+      "step": 4405
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 4405,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 3.4106364287028634e+18,
+  "train_batch_size": 10,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b03b8afadef574f31977970ff75a155cb4dd85cd7dea9d2a032c204d80d5c2d9
+size 4856