ramdrop commited on
Commit
f1a48db
·
1 Parent(s): a7d69eb

add vit and onnx

Browse files
onnx/swin32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:588dd3567f6a9c6e890d85e06c5e973d5d3bd874f2767eb359e99fb17c47021f
3
+ size 202214212
onnx/swin_ctrs.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5878a1300b29c2073f59c3045e0fbfb6a99d17f939e7d05716970ff44a34cc
3
+ size 203092027
onnx/vit32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:affdd7a4f26ff3deba302d1396b6b244ce2638669e40e0d94a1146760585eafa
3
+ size 346501141
vits_eurosat/wandb/latest-run/files/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ CONFIG:
4
+ desc: null
5
+ value: vit_mix_ctrs.yaml
6
+ FUSED_WINDOW_PROCESS:
7
+ desc: null
8
+ value: false
9
+ MODEL:
10
+ desc: null
11
+ value:
12
+ NAME: ViT-B_16
13
+ TYPE: vit
14
+ DROP_PATH_RATE: 0.3
15
+ DROP_RATE: 0.0
16
+ clip_dim: 1024
17
+ PRETRAINED: pretrained_weights/ViT-B_16-224.npz
18
+ QUANTIZE_VERSION: 1
19
+ TRAIN:
20
+ desc: null
21
+ value:
22
+ EPOCHS: 120
23
+ WARMUP_EPOCHS: 0
24
+ USE_CHECKPOINT: false
25
+ BASE_LR: 1.0e-06
26
+ WEIGHT_DECAY: 0.001
27
+ WARMUP_LR: 5.0e-07
28
+ MIN_LR: 5.0e-07
29
+ CLIP_GRAD: 5.0
30
+ LR_SCHEDULER:
31
+ NAME: cosine
32
+ DECAY_EPOCHS: 30
33
+ DECAY_RATE: 0.1
34
+ MULTISTEPS: []
35
+ WARMUP_PREFIX: true
36
+ OPTIMIZER:
37
+ NAME: adamw
38
+ EPS: 1.0e-08
39
+ BETAS:
40
+ - 0.9
41
+ - 0.999
42
+ MOMENTUM: 0.9
43
+ CRETERION:
44
+ NAME: TripletMarginLoss
45
+ TRIPLET_MARGIN: 0.3
46
+ MINING_METHOD:
47
+ POSITIVE: easy
48
+ NEGATIVE: random
49
+ TOTAL: semihard
50
+ NEG_NUM: 3
51
+ KD:
52
+ ENABLE: false
53
+ WEIGHT: 10
54
+ WEIGHT: 0.05
55
+ DATA:
56
+ desc: null
57
+ value:
58
+ DATASET: eurosat
59
+ IMG_SIZE: 224
60
+ ROOT: dbs
61
+ BATCH_SIZE: 4
62
+ VAL_BATCH_SIZE: 32
63
+ EMBEDDING_BATCH_SIZE: 32
64
+ DEPTH_TRANSFORM: rgb
65
+ LABEL_TYPE: pseudo_labels
66
+ IS_SUBSET: true
67
+ DATASET_THRESHOLD: 0.25
68
+ GPU:
69
+ desc: null
70
+ value: 0
71
+ SEED:
72
+ desc: null
73
+ value: 1
74
+ CLIP_MODEL:
75
+ desc: null
76
+ value:
77
+ NAME: ViT-g-14
78
+ PRETRAINED: laion2b_s34b_b88k
79
+ MODAL:
80
+ desc: null
81
+ value: depth
82
+ PHASE:
83
+ desc: null
84
+ value: train_ctrs
85
+ CKPT:
86
+ desc: null
87
+ value: logs/vit_0216_143839/wandb/latest-run/files/src/best_model.pth
88
+ MIX_INPUT:
89
+ desc: null
90
+ value:
91
+ ENABLE: true
92
+ NUM_MIX: 2
93
+ WEIGHTED: false
94
+ ATTENTION:
95
+ desc: null
96
+ value:
97
+ ENABLE: false
98
+ WEIGHTED_SAMPLE:
99
+ desc: null
100
+ value:
101
+ ENABLE: false
102
+ quantization:
103
+ desc: null
104
+ value:
105
+ method: jacob
106
+ weight:
107
+ num_bits: 8
108
+ axis: per_channel
109
+ calib_method: max
110
+ activation:
111
+ num_bits: 8
112
+ axis: per_tensor
113
+ calib_method: max
114
+ calibration:
115
+ num_batch: 2
116
+ modal: rgbd
117
+ pre_calibration: false
118
+ cmd:
119
+ desc: null
120
+ value: run.py --phase=train_ctrs --config=configs/eurosat/vit_mix_ctrs.yaml --quant_config=quantization_configs/d2_jacob.yaml
121
+ _wandb:
122
+ desc: null
123
+ value:
124
+ code_path: code/run.py
125
+ python_version: 3.10.13
126
+ cli_version: 0.16.2
127
+ framework: huggingface
128
+ huggingface_version: 4.36.2
129
+ is_jupyter_run: false
130
+ is_kaggle_kernel: false
131
+ start_time: 1708335908.768161
132
+ t:
133
+ 1:
134
+ - 1
135
+ - 5
136
+ - 11
137
+ - 41
138
+ - 49
139
+ - 53
140
+ - 55
141
+ - 63
142
+ - 80
143
+ 2:
144
+ - 1
145
+ - 5
146
+ - 11
147
+ - 41
148
+ - 49
149
+ - 53
150
+ - 55
151
+ - 63
152
+ - 80
153
+ 3:
154
+ - 3
155
+ - 13
156
+ - 16
157
+ - 23
158
+ 4: 3.10.13
159
+ 5: 0.16.2
160
+ 6: 4.36.2
161
+ 8:
162
+ - 5
163
+ 13: linux-x86_64
vits_eurosat/wandb/latest-run/files/src/best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22dc81cd32da31bbe67a088b2b5049a00ea561ab562f61a0b354bf376c6f72d7
3
+ size 346449478
vits_eurosat/wandb/latest-run/files/src/vit_mix_ctrs.yaml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CONFIG: vit_mix_ctrs.yaml
2
+ FUSED_WINDOW_PROCESS: False
3
+ MODEL:
4
+ NAME: 'ViT-B_16'
5
+ TYPE: "vit"
6
+ DROP_PATH_RATE: 0.3
7
+ DROP_RATE: 0.0
8
+ clip_dim: 1024
9
+ PRETRAINED: pretrained_weights/ViT-B_16-224.npz
10
+ QUANTIZE_VERSION: 1
11
+
12
+ TRAIN:
13
+ EPOCHS: 120
14
+ WARMUP_EPOCHS: 0
15
+ USE_CHECKPOINT: False
16
+ BASE_LR: 1e-6
17
+ WEIGHT_DECAY: 1e-3
18
+ WARMUP_LR: 5e-7
19
+ MIN_LR: 5e-7
20
+ CLIP_GRAD: 5.0
21
+ LR_SCHEDULER:
22
+ NAME: "cosine"
23
+ DECAY_EPOCHS: 30
24
+ DECAY_RATE: 0.1
25
+ MULTISTEPS: []
26
+ WARMUP_PREFIX: True
27
+ OPTIMIZER:
28
+ NAME: "adamw"
29
+ EPS: 1e-8
30
+ BETAS: [0.9, 0.999]
31
+ MOMENTUM: 0.9
32
+ CRETERION:
33
+ NAME: "TripletMarginLoss" # MSE
34
+ TRIPLET_MARGIN: 0.3
35
+ MINING_METHOD:
36
+ POSITIVE: "easy" # easy, hard, random
37
+ NEGATIVE: "random" # easy, hard, random
38
+ TOTAL: "semihard" # all, semihard, hard
39
+ NEG_NUM: 3
40
+ KD:
41
+ ENABLE: False
42
+ WEIGHT: 10
43
+ WEIGHT: 5e-2
44
+
45
+ DATA:
46
+ DATASET: "eurosat"
47
+ IMG_SIZE: 224
48
+ ROOT: "dbs"
49
+ BATCH_SIZE: 4
50
+ VAL_BATCH_SIZE: 32
51
+ EMBEDDING_BATCH_SIZE: 32
52
+ DEPTH_TRANSFORM: "rgb"
53
+ LABEL_TYPE: "pseudo_labels" # "gt" or "clip_vitb32"
54
+ IS_SUBSET: True
55
+ DATASET_THRESHOLD: 0.25
56
+
57
+ GPU: 0
58
+ SEED: 1
59
+ CLIP_MODEL:
60
+ NAME: "ViT-g-14" # "ViT-B/32"
61
+ PRETRAINED: "laion2b_s34b_b88k"
62
+ MODAL: "depth" # "rgb" or "depth"
63
+ PHASE: "train_ctrs" # "train_ctrs" # "test"
64
+ CKPT: "logs/vit_0216_143839/wandb/latest-run/files/src/best_model.pth"
65
+
66
+ # [] Mix Input
67
+ MIX_INPUT:
68
+ ENABLE: True
69
+ NUM_MIX: 2
70
+ WEIGHTED: False
71
+
72
+ # [] Attention
73
+ ATTENTION:
74
+ ENABLE: False
75
+
76
+ # [] Weighted samples
77
+ WEIGHTED_SAMPLE:
78
+ ENABLE: False
79
+
80
+
81
+
vits_scannet/wandb/latest-run/files/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ CONFIG:
4
+ desc: null
5
+ value: vit_mix_ctrs.yaml
6
+ FUSED_WINDOW_PROCESS:
7
+ desc: null
8
+ value: false
9
+ MODEL:
10
+ desc: null
11
+ value:
12
+ NAME: ViT-B_16
13
+ TYPE: vit
14
+ DROP_PATH_RATE: 0.3
15
+ DROP_RATE: 0.0
16
+ clip_dim: 1024
17
+ PRETRAINED: pretrained_weights/ViT-B_16-224.npz
18
+ QUANTIZE_VERSION: 1
19
+ TRAIN:
20
+ desc: null
21
+ value:
22
+ EPOCHS: 120
23
+ WARMUP_EPOCHS: 0
24
+ USE_CHECKPOINT: false
25
+ BASE_LR: 1.0e-06
26
+ WEIGHT_DECAY: 0.001
27
+ WARMUP_LR: 5.0e-07
28
+ MIN_LR: 5.0e-07
29
+ CLIP_GRAD: 5.0
30
+ LR_SCHEDULER:
31
+ NAME: cosine
32
+ DECAY_EPOCHS: 30
33
+ DECAY_RATE: 0.1
34
+ MULTISTEPS: []
35
+ WARMUP_PREFIX: true
36
+ OPTIMIZER:
37
+ NAME: adamw
38
+ EPS: 1.0e-08
39
+ BETAS:
40
+ - 0.9
41
+ - 0.999
42
+ MOMENTUM: 0.9
43
+ CRETERION:
44
+ NAME: TripletMarginLoss
45
+ TRIPLET_MARGIN: 0.3
46
+ MINING_METHOD:
47
+ POSITIVE: easy
48
+ NEGATIVE: random
49
+ TOTAL: semihard
50
+ NEG_NUM: 3
51
+ KD:
52
+ ENABLE: false
53
+ WEIGHT: 10
54
+ WEIGHT: 0.05
55
+ DATA:
56
+ desc: null
57
+ value:
58
+ DATASET: scannet
59
+ IMG_SIZE: 224
60
+ ROOT: dbs
61
+ BATCH_SIZE: 4
62
+ VAL_BATCH_SIZE: 32
63
+ EMBEDDING_BATCH_SIZE: 32
64
+ DEPTH_TRANSFORM: rgb
65
+ LABEL_TYPE: pseudo_labels
66
+ IS_SUBSET: true
67
+ DATASET_THRESHOLD: 0.25
68
+ GPU:
69
+ desc: null
70
+ value: 0
71
+ SEED:
72
+ desc: null
73
+ value: 1
74
+ CLIP_MODEL:
75
+ desc: null
76
+ value:
77
+ NAME: ViT-g-14
78
+ PRETRAINED: laion2b_s34b_b88k
79
+ MODAL:
80
+ desc: null
81
+ value: depth
82
+ PHASE:
83
+ desc: null
84
+ value: train_ctrs
85
+ CKPT:
86
+ desc: null
87
+ value: logs/vit_0219_164526/wandb/latest-run/files/src/best_model.pth
88
+ MIX_INPUT:
89
+ desc: null
90
+ value:
91
+ ENABLE: true
92
+ NUM_MIX: 2
93
+ WEIGHTED: false
94
+ ATTENTION:
95
+ desc: null
96
+ value:
97
+ ENABLE: false
98
+ WEIGHTED_SAMPLE:
99
+ desc: null
100
+ value:
101
+ ENABLE: false
102
+ quantization:
103
+ desc: null
104
+ value:
105
+ method: jacob
106
+ weight:
107
+ num_bits: 8
108
+ axis: per_channel
109
+ calib_method: max
110
+ activation:
111
+ num_bits: 8
112
+ axis: per_tensor
113
+ calib_method: max
114
+ calibration:
115
+ num_batch: 2
116
+ modal: rgbd
117
+ pre_calibration: false
118
+ cmd:
119
+ desc: null
120
+ value: run.py --phase=train_ctrs --config=configs/scannet/vit_mix_ctrs.yaml --quant_config=quantization_configs/d2_jacob.yaml
121
+ _wandb:
122
+ desc: null
123
+ value:
124
+ code_path: code/run.py
125
+ python_version: 3.10.13
126
+ cli_version: 0.16.2
127
+ framework: huggingface
128
+ huggingface_version: 4.37.1
129
+ is_jupyter_run: false
130
+ is_kaggle_kernel: false
131
+ start_time: 1708385324.534928
132
+ t:
133
+ 1:
134
+ - 1
135
+ - 5
136
+ - 11
137
+ - 41
138
+ - 49
139
+ - 53
140
+ - 55
141
+ - 63
142
+ - 80
143
+ 2:
144
+ - 1
145
+ - 5
146
+ - 11
147
+ - 41
148
+ - 49
149
+ - 53
150
+ - 55
151
+ - 63
152
+ - 80
153
+ 3:
154
+ - 3
155
+ - 13
156
+ - 16
157
+ - 23
158
+ 4: 3.10.13
159
+ 5: 0.16.2
160
+ 6: 4.37.1
161
+ 8:
162
+ - 5
163
+ 13: linux-x86_64
vits_scannet/wandb/latest-run/files/src/best_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e3edfe8f477bac0369da9a6b3d7d51b1bc1f90694761b86129ab65441e6d963
3
+ size 346449478
vits_scannet/wandb/latest-run/files/src/vit_mix_ctrs.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CONFIG: vit_mix_ctrs.yaml
2
+ FUSED_WINDOW_PROCESS: False
3
+
4
+ MODEL:
5
+ NAME: 'ViT-B_16'
6
+ TYPE: "vit"
7
+ DROP_PATH_RATE: 0.3
8
+ DROP_RATE: 0.0
9
+ clip_dim: 1024
10
+ PRETRAINED: pretrained_weights/ViT-B_16-224.npz
11
+ QUANTIZE_VERSION: 1
12
+
13
+ TRAIN:
14
+ EPOCHS: 120
15
+ WARMUP_EPOCHS: 0
16
+ USE_CHECKPOINT: False
17
+ BASE_LR: 1e-6
18
+ WEIGHT_DECAY: 1e-3
19
+ WARMUP_LR: 5e-7
20
+ MIN_LR: 5e-7
21
+ CLIP_GRAD: 5.0
22
+ LR_SCHEDULER:
23
+ NAME: "cosine"
24
+ DECAY_EPOCHS: 30
25
+ DECAY_RATE: 0.1
26
+ MULTISTEPS: []
27
+ WARMUP_PREFIX: True
28
+ OPTIMIZER:
29
+ NAME: "adamw"
30
+ EPS: 1e-8
31
+ BETAS: [0.9, 0.999]
32
+ MOMENTUM: 0.9
33
+ CRETERION:
34
+ NAME: "TripletMarginLoss" # MSE
35
+ TRIPLET_MARGIN: 0.3
36
+ MINING_METHOD:
37
+ POSITIVE: "easy" # easy, hard, random
38
+ NEGATIVE: "random" # easy, hard, random
39
+ TOTAL: "semihard" # all, semihard, hard
40
+ NEG_NUM: 3
41
+ KD:
42
+ ENABLE: False
43
+ WEIGHT: 10
44
+ WEIGHT: 5e-2
45
+
46
+ DATA:
47
+ DATASET: "scannet"
48
+ IMG_SIZE: 224
49
+ ROOT: "dbs"
50
+ BATCH_SIZE: 4
51
+ VAL_BATCH_SIZE: 32
52
+ EMBEDDING_BATCH_SIZE: 32
53
+ DEPTH_TRANSFORM: "rgb"
54
+ LABEL_TYPE: "pseudo_labels" # "gt" or "clip_vitb32"
55
+ IS_SUBSET: True
56
+ DATASET_THRESHOLD: 0.25
57
+
58
+
59
+ GPU: 0
60
+ SEED: 1
61
+ CLIP_MODEL:
62
+ NAME: "ViT-g-14" # "ViT-B/32"
63
+ PRETRAINED: "laion2b_s34b_b88k"
64
+ MODAL: "depth" # "rgb" or "depth"
65
+ PHASE: "train_ctrs" # "train_ctrs" # "test"
66
+ CKPT: "logs/vit_0219_164526/wandb/latest-run/files/src/best_model.pth"
67
+
68
+ # [] Mix Input
69
+ MIX_INPUT:
70
+ ENABLE: True
71
+ NUM_MIX: 2
72
+ WEIGHTED: False
73
+
74
+ # [] Attention
75
+ ATTENTION:
76
+ ENABLE: False
77
+
78
+ # [] Weighted samples
79
+ WEIGHTED_SAMPLE:
80
+ ENABLE: False
81
+
82
+
83
+