makisekurisu-jp commited on
Commit
89748ed
1 Parent(s): 55c5057

Upload 30 files

Browse files
Files changed (30) hide show
  1. catvton_workflow.json +328 -0
  2. models/CatVTON/DensePose/Base-DensePose-RCNN-FPN.yaml +48 -0
  3. models/CatVTON/DensePose/densepose_rcnn_R_50_FPN_s1x.yaml +8 -0
  4. models/CatVTON/DensePose/model_final_162be9.pkl +3 -0
  5. models/CatVTON/SCHP/exp-schp-201908261155-lip.pth +3 -0
  6. models/CatVTON/SCHP/exp-schp-201908301523-atr.pth +3 -0
  7. models/CatVTON/dresscode-16k-512/attention/model.safetensors +3 -0
  8. models/CatVTON/mix-48k-1024/attention/model.safetensors +3 -0
  9. models/CatVTON/sd-vae-ft-mse/config.json +29 -0
  10. models/CatVTON/sd-vae-ft-mse/diffusion_pytorch_model.safetensors +3 -0
  11. models/CatVTON/stable-diffusion-inpainting/scheduler/scheduler_config.json +13 -0
  12. models/CatVTON/stable-diffusion-inpainting/unet/config.json +36 -0
  13. models/CatVTON/stable-diffusion-inpainting/unet/diffusion_pytorch_model.safetensors +3 -0
  14. models/CatVTON/vitonhd-16k-512/attention/model.safetensors +3 -0
  15. models/bert-base-uncased/config.json +23 -0
  16. models/bert-base-uncased/model.safetensors +3 -0
  17. models/bert-base-uncased/tokenizer.json +0 -0
  18. models/bert-base-uncased/tokenizer_config.json +1 -0
  19. models/bert-base-uncased/vocab.txt +0 -0
  20. models/grounding-dino/GroundingDINO_SwinB.cfg.py +43 -0
  21. models/grounding-dino/GroundingDINO_SwinT_OGC.cfg.py +43 -0
  22. models/grounding-dino/groundingdino_swinb_cogcoor.pth +3 -0
  23. models/grounding-dino/groundingdino_swint_ogc.pth +3 -0
  24. models/sams/mobile_sam.pt +3 -0
  25. models/sams/sam_hq_vit_b.pth +3 -0
  26. models/sams/sam_hq_vit_h.pth +3 -0
  27. models/sams/sam_hq_vit_l.pth +3 -0
  28. models/sams/sam_vit_b_01ec64.pth +3 -0
  29. models/sams/sam_vit_h_4b8939.pth +3 -0
  30. models/sams/sam_vit_l_0b3195.pth +3 -0
catvton_workflow.json ADDED
@@ -0,0 +1,328 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "last_node_id": 12,
3
+ "last_link_id": 15,
4
+ "nodes": [
5
+ {
6
+ "id": 12,
7
+ "type": "LayerMask: SegmentAnythingUltra V2",
8
+ "pos": [
9
+ 703,
10
+ 133
11
+ ],
12
+ "size": [
13
+ 395,
14
+ 366
15
+ ],
16
+ "flags": {},
17
+ "order": 2,
18
+ "mode": 0,
19
+ "inputs": [
20
+ {
21
+ "name": "image",
22
+ "type": "IMAGE",
23
+ "link": 13,
24
+ "label": "图像"
25
+ }
26
+ ],
27
+ "outputs": [
28
+ {
29
+ "name": "image",
30
+ "type": "IMAGE",
31
+ "links": null,
32
+ "shape": 3,
33
+ "label": "图像"
34
+ },
35
+ {
36
+ "name": "mask",
37
+ "type": "MASK",
38
+ "links": [
39
+ 14,
40
+ 15
41
+ ],
42
+ "shape": 3,
43
+ "slot_index": 1,
44
+ "label": "遮罩"
45
+ }
46
+ ],
47
+ "properties": {
48
+ "Node name for S&R": "LayerMask: SegmentAnythingUltra V2"
49
+ },
50
+ "widgets_values": [
51
+ "sam_vit_h (2.56GB)",
52
+ "GroundingDINO_SwinT_OGC (694MB)",
53
+ 0.3,
54
+ "VITMatte",
55
+ 6,
56
+ 6,
57
+ 0.01,
58
+ 0.99,
59
+ false,
60
+ "shirt",
61
+ "cuda",
62
+ 2
63
+ ],
64
+ "locked": true
65
+ },
66
+ {
67
+ "id": 11,
68
+ "type": "LayerMask: MaskPreview",
69
+ "pos": [
70
+ 1203,
71
+ 133
72
+ ],
73
+ "size": [
74
+ 295,
75
+ 366
76
+ ],
77
+ "flags": {},
78
+ "order": 3,
79
+ "mode": 0,
80
+ "inputs": [
81
+ {
82
+ "name": "mask",
83
+ "type": "MASK",
84
+ "link": 14,
85
+ "label": "遮罩"
86
+ }
87
+ ],
88
+ "properties": {
89
+ "Node name for S&R": "LayerMask: MaskPreview"
90
+ },
91
+ "locked": true
92
+ },
93
+ {
94
+ "id": 2,
95
+ "type": "LoadImage",
96
+ "pos": [
97
+ 303,
98
+ 133
99
+ ],
100
+ "size": [
101
+ 295,
102
+ 366
103
+ ],
104
+ "flags": {},
105
+ "order": 0,
106
+ "mode": 0,
107
+ "outputs": [
108
+ {
109
+ "name": "IMAGE",
110
+ "type": "IMAGE",
111
+ "links": [
112
+ 1,
113
+ 13
114
+ ],
115
+ "shape": 3,
116
+ "slot_index": 0,
117
+ "label": "图像"
118
+ },
119
+ {
120
+ "name": "MASK",
121
+ "type": "MASK",
122
+ "links": null,
123
+ "shape": 3,
124
+ "label": "遮罩"
125
+ }
126
+ ],
127
+ "properties": {
128
+ "Node name for S&R": "LoadImage"
129
+ },
130
+ "widgets_values": [
131
+ "girl.jpg",
132
+ "image"
133
+ ],
134
+ "locked": true
135
+ },
136
+ {
137
+ "id": 7,
138
+ "type": "LoadImage",
139
+ "pos": [
140
+ 303,
141
+ 533
142
+ ],
143
+ "size": [
144
+ 295,
145
+ 366
146
+ ],
147
+ "flags": {},
148
+ "order": 1,
149
+ "mode": 0,
150
+ "outputs": [
151
+ {
152
+ "name": "IMAGE",
153
+ "type": "IMAGE",
154
+ "links": [
155
+ 7
156
+ ],
157
+ "shape": 3,
158
+ "label": "图像"
159
+ },
160
+ {
161
+ "name": "MASK",
162
+ "type": "MASK",
163
+ "links": null,
164
+ "shape": 3,
165
+ "label": "遮罩"
166
+ }
167
+ ],
168
+ "properties": {
169
+ "Node name for S&R": "LoadImage"
170
+ },
171
+ "widgets_values": [
172
+ "panda_t_shirt.jpg",
173
+ "image"
174
+ ],
175
+ "locked": true
176
+ },
177
+ {
178
+ "id": 1,
179
+ "type": "CatVTONWrapper",
180
+ "pos": [
181
+ 703,
182
+ 533
183
+ ],
184
+ "size": [
185
+ 395,
186
+ 366
187
+ ],
188
+ "flags": {},
189
+ "order": 4,
190
+ "mode": 0,
191
+ "inputs": [
192
+ {
193
+ "name": "image",
194
+ "type": "IMAGE",
195
+ "link": 1,
196
+ "slot_index": 0,
197
+ "label": "image"
198
+ },
199
+ {
200
+ "name": "mask",
201
+ "type": "MASK",
202
+ "link": 15,
203
+ "label": "mask"
204
+ },
205
+ {
206
+ "name": "refer_image",
207
+ "type": "IMAGE",
208
+ "link": 7,
209
+ "slot_index": 2,
210
+ "label": "refer_image"
211
+ }
212
+ ],
213
+ "outputs": [
214
+ {
215
+ "name": "image",
216
+ "type": "IMAGE",
217
+ "links": [
218
+ 8
219
+ ],
220
+ "shape": 3,
221
+ "slot_index": 0,
222
+ "label": "image"
223
+ }
224
+ ],
225
+ "properties": {
226
+ "Node name for S&R": "CatVTONWrapper"
227
+ },
228
+ "widgets_values": [
229
+ 25,
230
+ "fp16",
231
+ 571003793697217,
232
+ "randomize",
233
+ 50,
234
+ 3
235
+ ],
236
+ "locked": true
237
+ },
238
+ {
239
+ "id": 8,
240
+ "type": "PreviewImage",
241
+ "pos": [
242
+ 1203,
243
+ 533
244
+ ],
245
+ "size": [
246
+ 295,
247
+ 366
248
+ ],
249
+ "flags": {},
250
+ "order": 5,
251
+ "mode": 0,
252
+ "inputs": [
253
+ {
254
+ "name": "images",
255
+ "type": "IMAGE",
256
+ "link": 8,
257
+ "label": "图像"
258
+ }
259
+ ],
260
+ "properties": {
261
+ "Node name for S&R": "PreviewImage"
262
+ },
263
+ "locked": true
264
+ }
265
+ ],
266
+ "links": [
267
+ [
268
+ 1,
269
+ 2,
270
+ 0,
271
+ 1,
272
+ 0,
273
+ "IMAGE"
274
+ ],
275
+ [
276
+ 7,
277
+ 7,
278
+ 0,
279
+ 1,
280
+ 2,
281
+ "IMAGE"
282
+ ],
283
+ [
284
+ 8,
285
+ 1,
286
+ 0,
287
+ 8,
288
+ 0,
289
+ "IMAGE"
290
+ ],
291
+ [
292
+ 13,
293
+ 2,
294
+ 0,
295
+ 12,
296
+ 0,
297
+ "IMAGE"
298
+ ],
299
+ [
300
+ 14,
301
+ 12,
302
+ 1,
303
+ 11,
304
+ 0,
305
+ "MASK"
306
+ ],
307
+ [
308
+ 15,
309
+ 12,
310
+ 1,
311
+ 1,
312
+ 1,
313
+ "MASK"
314
+ ]
315
+ ],
316
+ "groups": [],
317
+ "config": {},
318
+ "extra": {
319
+ "ds": {
320
+ "scale": 0.5644739300537773,
321
+ "offset": [
322
+ 485.35094109114993,
323
+ 333.2609641529487
324
+ ]
325
+ }
326
+ },
327
+ "version": 0.4
328
+ }
models/CatVTON/DensePose/Base-DensePose-RCNN-FPN.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VERSION: 2
2
+ MODEL:
3
+ META_ARCHITECTURE: "GeneralizedRCNN"
4
+ BACKBONE:
5
+ NAME: "build_resnet_fpn_backbone"
6
+ RESNETS:
7
+ OUT_FEATURES: ["res2", "res3", "res4", "res5"]
8
+ FPN:
9
+ IN_FEATURES: ["res2", "res3", "res4", "res5"]
10
+ ANCHOR_GENERATOR:
11
+ SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map
12
+ ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps)
13
+ RPN:
14
+ IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
15
+ PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
16
+ PRE_NMS_TOPK_TEST: 1000 # Per FPN level
17
+ # Detectron1 uses 2000 proposals per-batch,
18
+ # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
19
+ # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
20
+ POST_NMS_TOPK_TRAIN: 1000
21
+ POST_NMS_TOPK_TEST: 1000
22
+
23
+ DENSEPOSE_ON: True
24
+ ROI_HEADS:
25
+ NAME: "DensePoseROIHeads"
26
+ IN_FEATURES: ["p2", "p3", "p4", "p5"]
27
+ NUM_CLASSES: 1
28
+ ROI_BOX_HEAD:
29
+ NAME: "FastRCNNConvFCHead"
30
+ NUM_FC: 2
31
+ POOLER_RESOLUTION: 7
32
+ POOLER_SAMPLING_RATIO: 2
33
+ POOLER_TYPE: "ROIAlign"
34
+ ROI_DENSEPOSE_HEAD:
35
+ NAME: "DensePoseV1ConvXHead"
36
+ POOLER_TYPE: "ROIAlign"
37
+ NUM_COARSE_SEGM_CHANNELS: 2
38
+ DATASETS:
39
+ TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
40
+ TEST: ("densepose_coco_2014_minival",)
41
+ SOLVER:
42
+ IMS_PER_BATCH: 16
43
+ BASE_LR: 0.01
44
+ STEPS: (60000, 80000)
45
+ MAX_ITER: 90000
46
+ WARMUP_FACTOR: 0.1
47
+ INPUT:
48
+ MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
models/CatVTON/DensePose/densepose_rcnn_R_50_FPN_s1x.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ _BASE_: "Base-DensePose-RCNN-FPN.yaml"
2
+ MODEL:
3
+ WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4
+ RESNETS:
5
+ DEPTH: 50
6
+ SOLVER:
7
+ MAX_ITER: 130000
8
+ STEPS: (100000, 120000)
models/CatVTON/DensePose/model_final_162be9.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8a7382001b16e453bad95ca9dbc68ae8f2b839b304cf90eaf5c27fbdb4dae91
3
+ size 255757821
models/CatVTON/SCHP/exp-schp-201908261155-lip.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24fa3254ceeb74c8435458994a64b522fb439a3635b7b86ff470457e0413da00
3
+ size 267449349
models/CatVTON/SCHP/exp-schp-201908301523-atr.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d7c91ce3b4e7133df56b599fc817b533e3439c5e8d282a59126d2fda339a2a
3
+ size 267445237
models/CatVTON/dresscode-16k-512/attention/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d65a6c64a9bc48e8009a0006d5272b9332c1077c9c2a74302cbac9f256e84cbf
3
+ size 198303368
models/CatVTON/mix-48k-1024/attention/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1fc093f1b6744623079e6f4e7313411f524e388c4b7467df1e0e7f577cba23a
3
+ size 198303368
models/CatVTON/sd-vae-ft-mse/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.4.2",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 128,
7
+ 256,
8
+ 512,
9
+ 512
10
+ ],
11
+ "down_block_types": [
12
+ "DownEncoderBlock2D",
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "latent_channels": 4,
19
+ "layers_per_block": 2,
20
+ "norm_num_groups": 32,
21
+ "out_channels": 3,
22
+ "sample_size": 256,
23
+ "up_block_types": [
24
+ "UpDecoderBlock2D",
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D"
28
+ ]
29
+ }
models/CatVTON/sd-vae-ft-mse/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ size 334643276
models/CatVTON/stable-diffusion-inpainting/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.6.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "set_alpha_to_one": false,
10
+ "steps_offset": 1,
11
+ "trained_betas": null,
12
+ "skip_prk_steps": true
13
+ }
models/CatVTON/stable-diffusion-inpainting/unet/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.6.0.dev0",
4
+ "act_fn": "silu",
5
+ "attention_head_dim": 8,
6
+ "block_out_channels": [
7
+ 320,
8
+ 640,
9
+ 1280,
10
+ 1280
11
+ ],
12
+ "center_input_sample": false,
13
+ "cross_attention_dim": 768,
14
+ "down_block_types": [
15
+ "CrossAttnDownBlock2D",
16
+ "CrossAttnDownBlock2D",
17
+ "CrossAttnDownBlock2D",
18
+ "DownBlock2D"
19
+ ],
20
+ "downsample_padding": 1,
21
+ "flip_sin_to_cos": true,
22
+ "freq_shift": 0,
23
+ "in_channels": 9,
24
+ "layers_per_block": 2,
25
+ "mid_block_scale_factor": 1,
26
+ "norm_eps": 1e-05,
27
+ "norm_num_groups": 32,
28
+ "out_channels": 4,
29
+ "sample_size": 64,
30
+ "up_block_types": [
31
+ "UpBlock2D",
32
+ "CrossAttnUpBlock2D",
33
+ "CrossAttnUpBlock2D",
34
+ "CrossAttnUpBlock2D"
35
+ ]
36
+ }
models/CatVTON/stable-diffusion-inpainting/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24b788b4a777748377cc20364eea4ae113c8c42f4468c16bc8c02fdae5492af9
3
+ size 1719154104
models/CatVTON/vitonhd-16k-512/attention/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:915df7bf19a33bee36a28d5f9ceaef1e2267c47526f98ca9e4c49e90ae5f0fd0
3
+ size 198303368
models/bert-base-uncased/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "gradient_checkpointing": false,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 12,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "transformers_version": "4.6.0.dev0",
20
+ "type_vocab_size": 2,
21
+ "use_cache": true,
22
+ "vocab_size": 30522
23
+ }
models/bert-base-uncased/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d45e234eb4a928074dfd868cead0219ab85354cc53d20e772753c6bb9169d3
3
+ size 440449768
models/bert-base-uncased/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/bert-base-uncased/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "model_max_length": 512}
models/bert-base-uncased/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/grounding-dino/GroundingDINO_SwinB.cfg.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size = 1
2
+ modelname = "groundingdino"
3
+ backbone = "swin_B_384_22k"
4
+ position_embedding = "sine"
5
+ pe_temperatureH = 20
6
+ pe_temperatureW = 20
7
+ return_interm_indices = [1, 2, 3]
8
+ backbone_freeze_keywords = None
9
+ enc_layers = 6
10
+ dec_layers = 6
11
+ pre_norm = False
12
+ dim_feedforward = 2048
13
+ hidden_dim = 256
14
+ dropout = 0.0
15
+ nheads = 8
16
+ num_queries = 900
17
+ query_dim = 4
18
+ num_patterns = 0
19
+ num_feature_levels = 4
20
+ enc_n_points = 4
21
+ dec_n_points = 4
22
+ two_stage_type = "standard"
23
+ two_stage_bbox_embed_share = False
24
+ two_stage_class_embed_share = False
25
+ transformer_activation = "relu"
26
+ dec_pred_bbox_embed_share = True
27
+ dn_box_noise_scale = 1.0
28
+ dn_label_noise_ratio = 0.5
29
+ dn_label_coef = 1.0
30
+ dn_bbox_coef = 1.0
31
+ embed_init_tgt = True
32
+ dn_labelbook_size = 2000
33
+ max_text_len = 256
34
+ text_encoder_type = "bert-base-uncased"
35
+ use_text_enhancer = True
36
+ use_fusion_layer = True
37
+ use_checkpoint = True
38
+ use_transformer_ckpt = True
39
+ use_text_cross_attention = True
40
+ text_dropout = 0.0
41
+ fusion_dropout = 0.0
42
+ fusion_droppath = 0.1
43
+ sub_sentence_present = True
models/grounding-dino/GroundingDINO_SwinT_OGC.cfg.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ batch_size = 1
2
+ modelname = "groundingdino"
3
+ backbone = "swin_T_224_1k"
4
+ position_embedding = "sine"
5
+ pe_temperatureH = 20
6
+ pe_temperatureW = 20
7
+ return_interm_indices = [1, 2, 3]
8
+ backbone_freeze_keywords = None
9
+ enc_layers = 6
10
+ dec_layers = 6
11
+ pre_norm = False
12
+ dim_feedforward = 2048
13
+ hidden_dim = 256
14
+ dropout = 0.0
15
+ nheads = 8
16
+ num_queries = 900
17
+ query_dim = 4
18
+ num_patterns = 0
19
+ num_feature_levels = 4
20
+ enc_n_points = 4
21
+ dec_n_points = 4
22
+ two_stage_type = "standard"
23
+ two_stage_bbox_embed_share = False
24
+ two_stage_class_embed_share = False
25
+ transformer_activation = "relu"
26
+ dec_pred_bbox_embed_share = True
27
+ dn_box_noise_scale = 1.0
28
+ dn_label_noise_ratio = 0.5
29
+ dn_label_coef = 1.0
30
+ dn_bbox_coef = 1.0
31
+ embed_init_tgt = True
32
+ dn_labelbook_size = 2000
33
+ max_text_len = 256
34
+ text_encoder_type = "bert-base-uncased"
35
+ use_text_enhancer = True
36
+ use_fusion_layer = True
37
+ use_checkpoint = True
38
+ use_transformer_ckpt = True
39
+ use_text_cross_attention = True
40
+ text_dropout = 0.0
41
+ fusion_dropout = 0.0
42
+ fusion_droppath = 0.1
43
+ sub_sentence_present = True
models/grounding-dino/groundingdino_swinb_cogcoor.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46270f7a822e6906b655b729c90613e48929d0f2bb8b9b76fd10a856f3ac6ab7
3
+ size 938057991
models/grounding-dino/groundingdino_swint_ogc.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b3ca2563c77c69f651d7bd133e97139c186df06231157a64c507099c52bc799
3
+ size 693997677
models/sams/mobile_sam.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dbb90523a35330fedd7f1d3dfc66f995213d81b29a5ca8108dbcdd4e37d6c2f
3
+ size 40728226
models/sams/sam_hq_vit_b.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a9d662cd6f5a9c2dba6d40ab0058d88d287e4a18fd6fdc6ad5fb1a3fdeaa57
3
+ size 379335069
models/sams/sam_hq_vit_h.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7ac14a085326d9fa6199c8c698c4f0e7280afdbb974d2c4660ec60877b45e35
3
+ size 2570940653
models/sams/sam_hq_vit_l.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a6c385d62bf005ded91a54d5ec55c985cfc4103ef89c08d90f39f04934c343
3
+ size 1254865805
models/sams/sam_vit_b_01ec64.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912
3
+ size 375042383
models/sams/sam_vit_h_4b8939.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e
3
+ size 2564550879
models/sams/sam_vit_l_0b3195.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adcc4315b642a4d2101128f611684e8734c41232a17c648ed1693702a49a622
3
+ size 1249524607