makisekurisu-jp commited on Aug 13

Commit

89748ed

•

1 Parent(s): 55c5057

Upload 30 files

Browse files

Files changed (30) hide show

catvton_workflow.json +328 -0
models/CatVTON/DensePose/Base-DensePose-RCNN-FPN.yaml +48 -0
models/CatVTON/DensePose/densepose_rcnn_R_50_FPN_s1x.yaml +8 -0
models/CatVTON/DensePose/model_final_162be9.pkl +3 -0
models/CatVTON/SCHP/exp-schp-201908261155-lip.pth +3 -0
models/CatVTON/SCHP/exp-schp-201908301523-atr.pth +3 -0
models/CatVTON/dresscode-16k-512/attention/model.safetensors +3 -0
models/CatVTON/mix-48k-1024/attention/model.safetensors +3 -0
models/CatVTON/sd-vae-ft-mse/config.json +29 -0
models/CatVTON/sd-vae-ft-mse/diffusion_pytorch_model.safetensors +3 -0
models/CatVTON/stable-diffusion-inpainting/scheduler/scheduler_config.json +13 -0
models/CatVTON/stable-diffusion-inpainting/unet/config.json +36 -0
models/CatVTON/stable-diffusion-inpainting/unet/diffusion_pytorch_model.safetensors +3 -0
models/CatVTON/vitonhd-16k-512/attention/model.safetensors +3 -0
models/bert-base-uncased/config.json +23 -0
models/bert-base-uncased/model.safetensors +3 -0
models/bert-base-uncased/tokenizer.json +0 -0
models/bert-base-uncased/tokenizer_config.json +1 -0
models/bert-base-uncased/vocab.txt +0 -0
models/grounding-dino/GroundingDINO_SwinB.cfg.py +43 -0
models/grounding-dino/GroundingDINO_SwinT_OGC.cfg.py +43 -0
models/grounding-dino/groundingdino_swinb_cogcoor.pth +3 -0
models/grounding-dino/groundingdino_swint_ogc.pth +3 -0
models/sams/mobile_sam.pt +3 -0
models/sams/sam_hq_vit_b.pth +3 -0
models/sams/sam_hq_vit_h.pth +3 -0
models/sams/sam_hq_vit_l.pth +3 -0
models/sams/sam_vit_b_01ec64.pth +3 -0
models/sams/sam_vit_h_4b8939.pth +3 -0
models/sams/sam_vit_l_0b3195.pth +3 -0

catvton_workflow.json ADDED Viewed

	@@ -0,0 +1,328 @@

+{
+  "last_node_id": 12,
+  "last_link_id": 15,
+  "nodes": [
+    {
+      "id": 12,
+      "type": "LayerMask: SegmentAnythingUltra V2",
+      "pos": [
+        703,
+        133
+      ],
+      "size": [
+        395,
+        366
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 13,
+          "label": "图像"
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "links": null,
+          "shape": 3,
+          "label": "图像"
+        },
+        {
+          "name": "mask",
+          "type": "MASK",
+          "links": [
+            14,
+            15
+          ],
+          "shape": 3,
+          "slot_index": 1,
+          "label": "遮罩"
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "LayerMask: SegmentAnythingUltra V2"
+      },
+      "widgets_values": [
+        "sam_vit_h (2.56GB)",
+        "GroundingDINO_SwinT_OGC (694MB)",
+        0.3,
+        "VITMatte",
+        6,
+        6,
+        0.01,
+        0.99,
+        false,
+        "shirt",
+        "cuda",
+        2
+      ],
+      "locked": true
+    },
+    {
+      "id": 11,
+      "type": "LayerMask: MaskPreview",
+      "pos": [
+        1203,
+        133
+      ],
+      "size": [
+        295,
+        366
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "mask",
+          "type": "MASK",
+          "link": 14,
+          "label": "遮罩"
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "LayerMask: MaskPreview"
+      },
+      "locked": true
+    },
+    {
+      "id": 2,
+      "type": "LoadImage",
+      "pos": [
+        303,
+        133
+      ],
+      "size": [
+        295,
+        366
+      ],
+      "flags": {},
+      "order": 0,
+      "mode": 0,
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            1,
+            13
+          ],
+          "shape": 3,
+          "slot_index": 0,
+          "label": "图像"
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null,
+          "shape": 3,
+          "label": "遮罩"
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "LoadImage"
+      },
+      "widgets_values": [
+        "girl.jpg",
+        "image"
+      ],
+      "locked": true
+    },
+    {
+      "id": 7,
+      "type": "LoadImage",
+      "pos": [
+        303,
+        533
+      ],
+      "size": [
+        295,
+        366
+      ],
+      "flags": {},
+      "order": 1,
+      "mode": 0,
+      "outputs": [
+        {
+          "name": "IMAGE",
+          "type": "IMAGE",
+          "links": [
+            7
+          ],
+          "shape": 3,
+          "label": "图像"
+        },
+        {
+          "name": "MASK",
+          "type": "MASK",
+          "links": null,
+          "shape": 3,
+          "label": "遮罩"
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "LoadImage"
+      },
+      "widgets_values": [
+        "panda_t_shirt.jpg",
+        "image"
+      ],
+      "locked": true
+    },
+    {
+      "id": 1,
+      "type": "CatVTONWrapper",
+      "pos": [
+        703,
+        533
+      ],
+      "size": [
+        395,
+        366
+      ],
+      "flags": {},
+      "order": 4,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "link": 1,
+          "slot_index": 0,
+          "label": "image"
+        },
+        {
+          "name": "mask",
+          "type": "MASK",
+          "link": 15,
+          "label": "mask"
+        },
+        {
+          "name": "refer_image",
+          "type": "IMAGE",
+          "link": 7,
+          "slot_index": 2,
+          "label": "refer_image"
+        }
+      ],
+      "outputs": [
+        {
+          "name": "image",
+          "type": "IMAGE",
+          "links": [
+            8
+          ],
+          "shape": 3,
+          "slot_index": 0,
+          "label": "image"
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "CatVTONWrapper"
+      },
+      "widgets_values": [
+        25,
+        "fp16",
+        571003793697217,
+        "randomize",
+        50,
+        3
+      ],
+      "locked": true
+    },
+    {
+      "id": 8,
+      "type": "PreviewImage",
+      "pos": [
+        1203,
+        533
+      ],
+      "size": [
+        295,
+        366
+      ],
+      "flags": {},
+      "order": 5,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "images",
+          "type": "IMAGE",
+          "link": 8,
+          "label": "图像"
+        }
+      ],
+      "properties": {
+        "Node name for S&R": "PreviewImage"
+      },
+      "locked": true
+    }
+  ],
+  "links": [
+    [
+      1,
+      2,
+      0,
+      1,
+      0,
+      "IMAGE"
+    ],
+    [
+      7,
+      7,
+      0,
+      1,
+      2,
+      "IMAGE"
+    ],
+    [
+      8,
+      1,
+      0,
+      8,
+      0,
+      "IMAGE"
+    ],
+    [
+      13,
+      2,
+      0,
+      12,
+      0,
+      "IMAGE"
+    ],
+    [
+      14,
+      12,
+      1,
+      11,
+      0,
+      "MASK"
+    ],
+    [
+      15,
+      12,
+      1,
+      1,
+      1,
+      "MASK"
+    ]
+  ],
+  "groups": [],
+  "config": {},
+  "extra": {
+    "ds": {
+      "scale": 0.5644739300537773,
+      "offset": [
+        485.35094109114993,
+        333.2609641529487
+      ]
+    }
+  },
+  "version": 0.4
+}

models/CatVTON/DensePose/Base-DensePose-RCNN-FPN.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+VERSION: 2
+MODEL:
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  BACKBONE:
+    NAME: "build_resnet_fpn_backbone"
+  RESNETS:
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+  FPN:
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  DENSEPOSE_ON: True
+  ROI_HEADS:
+    NAME: "DensePoseROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 1
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 2
+    POOLER_TYPE: "ROIAlign"
+  ROI_DENSEPOSE_HEAD:
+    NAME: "DensePoseV1ConvXHead"
+    POOLER_TYPE: "ROIAlign"
+    NUM_COARSE_SEGM_CHANNELS: 2
+DATASETS:
+  TRAIN: ("densepose_coco_2014_train", "densepose_coco_2014_valminusminival")
+  TEST: ("densepose_coco_2014_minival",)
+SOLVER:
+  IMS_PER_BATCH: 16
+  BASE_LR: 0.01
+  STEPS: (60000, 80000)
+  MAX_ITER: 90000
+  WARMUP_FACTOR: 0.1
+INPUT:
+  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)

models/CatVTON/DensePose/densepose_rcnn_R_50_FPN_s1x.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+_BASE_: "Base-DensePose-RCNN-FPN.yaml"
+MODEL:
+  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
+  RESNETS:
+    DEPTH: 50
+SOLVER:
+  MAX_ITER: 130000
+  STEPS: (100000, 120000)

models/CatVTON/DensePose/model_final_162be9.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8a7382001b16e453bad95ca9dbc68ae8f2b839b304cf90eaf5c27fbdb4dae91
+size 255757821

models/CatVTON/SCHP/exp-schp-201908261155-lip.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24fa3254ceeb74c8435458994a64b522fb439a3635b7b86ff470457e0413da00
+size 267449349

models/CatVTON/SCHP/exp-schp-201908301523-atr.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9d7c91ce3b4e7133df56b599fc817b533e3439c5e8d282a59126d2fda339a2a
+size 267445237

models/CatVTON/dresscode-16k-512/attention/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d65a6c64a9bc48e8009a0006d5272b9332c1077c9c2a74302cbac9f256e84cbf
+size 198303368

models/CatVTON/mix-48k-1024/attention/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1fc093f1b6744623079e6f4e7313411f524e388c4b7467df1e0e7f577cba23a
+size 198303368

models/CatVTON/sd-vae-ft-mse/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.4.2",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 256,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

models/CatVTON/sd-vae-ft-mse/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
+size 334643276

models/CatVTON/stable-diffusion-inpainting/scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "_class_name": "DDIMScheduler",
+  "_diffusers_version": "0.6.0.dev0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "num_train_timesteps": 1000,
+  "set_alpha_to_one": false,
+  "steps_offset": 1,
+  "trained_betas": null,
+  "skip_prk_steps": true
+}

models/CatVTON/stable-diffusion-inpainting/unet/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.6.0.dev0",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 9,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ]
+}

models/CatVTON/stable-diffusion-inpainting/unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24b788b4a777748377cc20364eea4ae113c8c42f4468c16bc8c02fdae5492af9
+size 1719154104

models/CatVTON/vitonhd-16k-512/attention/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:915df7bf19a33bee36a28d5f9ceaef1e2267c47526f98ca9e4c49e90ae5f0fd0
+size 198303368

models/bert-base-uncased/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "architectures": [
+    "BertForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.6.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

models/bert-base-uncased/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68d45e234eb4a928074dfd868cead0219ab85354cc53d20e772753c6bb9169d3
+size 440449768

models/bert-base-uncased/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

models/bert-base-uncased/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"do_lower_case": true, "model_max_length": 512}

models/bert-base-uncased/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

models/grounding-dino/GroundingDINO_SwinB.cfg.py ADDED Viewed

	@@ -0,0 +1,43 @@

+batch_size = 1
+modelname = "groundingdino"
+backbone = "swin_B_384_22k"
+position_embedding = "sine"
+pe_temperatureH = 20
+pe_temperatureW = 20
+return_interm_indices = [1, 2, 3]
+backbone_freeze_keywords = None
+enc_layers = 6
+dec_layers = 6
+pre_norm = False
+dim_feedforward = 2048
+hidden_dim = 256
+dropout = 0.0
+nheads = 8
+num_queries = 900
+query_dim = 4
+num_patterns = 0
+num_feature_levels = 4
+enc_n_points = 4
+dec_n_points = 4
+two_stage_type = "standard"
+two_stage_bbox_embed_share = False
+two_stage_class_embed_share = False
+transformer_activation = "relu"
+dec_pred_bbox_embed_share = True
+dn_box_noise_scale = 1.0
+dn_label_noise_ratio = 0.5
+dn_label_coef = 1.0
+dn_bbox_coef = 1.0
+embed_init_tgt = True
+dn_labelbook_size = 2000
+max_text_len = 256
+text_encoder_type = "bert-base-uncased"
+use_text_enhancer = True
+use_fusion_layer = True
+use_checkpoint = True
+use_transformer_ckpt = True
+use_text_cross_attention = True
+text_dropout = 0.0
+fusion_dropout = 0.0
+fusion_droppath = 0.1
+sub_sentence_present = True

models/grounding-dino/GroundingDINO_SwinT_OGC.cfg.py ADDED Viewed

	@@ -0,0 +1,43 @@

+batch_size = 1
+modelname = "groundingdino"
+backbone = "swin_T_224_1k"
+position_embedding = "sine"
+pe_temperatureH = 20
+pe_temperatureW = 20
+return_interm_indices = [1, 2, 3]
+backbone_freeze_keywords = None
+enc_layers = 6
+dec_layers = 6
+pre_norm = False
+dim_feedforward = 2048
+hidden_dim = 256
+dropout = 0.0
+nheads = 8
+num_queries = 900
+query_dim = 4
+num_patterns = 0
+num_feature_levels = 4
+enc_n_points = 4
+dec_n_points = 4
+two_stage_type = "standard"
+two_stage_bbox_embed_share = False
+two_stage_class_embed_share = False
+transformer_activation = "relu"
+dec_pred_bbox_embed_share = True
+dn_box_noise_scale = 1.0
+dn_label_noise_ratio = 0.5
+dn_label_coef = 1.0
+dn_bbox_coef = 1.0
+embed_init_tgt = True
+dn_labelbook_size = 2000
+max_text_len = 256
+text_encoder_type = "bert-base-uncased"
+use_text_enhancer = True
+use_fusion_layer = True
+use_checkpoint = True
+use_transformer_ckpt = True
+use_text_cross_attention = True
+text_dropout = 0.0
+fusion_dropout = 0.0
+fusion_droppath = 0.1
+sub_sentence_present = True

models/grounding-dino/groundingdino_swinb_cogcoor.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46270f7a822e6906b655b729c90613e48929d0f2bb8b9b76fd10a856f3ac6ab7
+size 938057991

models/grounding-dino/groundingdino_swint_ogc.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b3ca2563c77c69f651d7bd133e97139c186df06231157a64c507099c52bc799
+size 693997677

models/sams/mobile_sam.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6dbb90523a35330fedd7f1d3dfc66f995213d81b29a5ca8108dbcdd4e37d6c2f
+size 40728226

models/sams/sam_hq_vit_b.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14a9d662cd6f5a9c2dba6d40ab0058d88d287e4a18fd6fdc6ad5fb1a3fdeaa57
+size 379335069

models/sams/sam_hq_vit_h.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7ac14a085326d9fa6199c8c698c4f0e7280afdbb974d2c4660ec60877b45e35
+size 2570940653

models/sams/sam_hq_vit_l.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1a6c385d62bf005ded91a54d5ec55c985cfc4103ef89c08d90f39f04934c343
+size 1254865805

models/sams/sam_vit_b_01ec64.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2df62732614e57411cdcf32a23ffdf28910380d03139ee0f4fcbe91eb8c912
+size 375042383

models/sams/sam_vit_h_4b8939.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e
+size 2564550879

models/sams/sam_vit_l_0b3195.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3adcc4315b642a4d2101128f611684e8734c41232a17c648ed1693702a49a622
+size 1249524607