IDEA-Research
/

grounding-dino-base

@@ -2,20 +2,28 @@
   "activation_dropout": 0.0,
   "activation_function": "relu",
   "architectures": [
-    "GroundingDINOForObjectDetection"
   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": false,
-  "backbone": "swin",
   "backbone_config": {
     "depths": [
       2,
       2,
       18,
       2
     ],
     "embed_dim": 128,
     "image_size": 384,
     "model_type": "swin",
     "num_heads": [
       4,
@@ -23,6 +31,7 @@
       16,
       32
     ],
     "out_features": [
       "stage2",
       "stage3",
@@ -33,19 +42,20 @@
       3,
       4
     ],
     "window_size": 12
   },
-  "bbox_cost": 5,
-  "bbox_loss_coefficient": 5,
-  "class_cost": 1,
   "d_model": 256,
   "decoder_attention_heads": 8,
   "decoder_bbox_embed_share": true,
   "decoder_ffn_dim": 2048,
   "decoder_layers": 6,
   "decoder_n_points": 4,
-  "dice_loss_coefficient": 1,
-  "dilation": false,
   "disable_custom_kernels": false,
   "dropout": 0.1,
   "embedding_init_target": true,
@@ -53,35 +63,26 @@
   "encoder_ffn_dim": 2048,
   "encoder_layers": 6,
   "encoder_n_points": 4,
-  "eos_coefficient": 0.1,
   "focal_alpha": 0.25,
   "fusion_dropout": 0.0,
   "fusion_droppath": 0.1,
-  "giou_cost": 2,
-  "giou_loss_coefficient": 2,
   "is_encoder_decoder": true,
-  "mask_loss_coefficient": 1,
-  "max_position_embeddings": 1024,
   "max_text_len": 256,
   "model_type": "grounding-dino",
-  "num_channels": 3,
   "num_feature_levels": 4,
   "num_queries": 900,
   "position_embedding_type": "sine",
   "positional_embedding_temperature": 20,
   "query_dim": 4,
-  "sub_sentence_present": true,
   "text_backbone_config": {
     "model_type": "grounding-dino-text-prenet"
   },
   "text_enhancer_dropout": 0.0,
   "torch_dtype": "float32",
-  "transformers_version": "4.33.0.dev0",
   "two_stage": true,
-  "two_stage_bbox_embed_share": false,
-  "two_stage_class_embed_share": false,
-  "two_stage_num_proposals": 900,
-  "use_pretrained_backbone": true,
-  "use_timm_backbone": false,
-  "with_box_refine": true
 }

   "activation_dropout": 0.0,
   "activation_function": "relu",
   "architectures": [
+    "GroundingDinoForObjectDetection"
   ],
   "attention_dropout": 0.0,
   "auxiliary_loss": false,
   "backbone_config": {
+    "attention_probs_dropout_prob": 0.0,
     "depths": [
       2,
       2,
       18,
       2
     ],
+    "drop_path_rate": 0.1,
     "embed_dim": 128,
+    "encoder_stride": 32,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 1024,
     "image_size": 384,
+    "initializer_range": 0.02,
+    "layer_norm_eps": 1e-05,
+    "mlp_ratio": 4.0,
     "model_type": "swin",
     "num_heads": [
       4,
       16,
       32
     ],
+    "num_layers": 4,
     "out_features": [
       "stage2",
       "stage3",
       3,
       4
     ],
+    "patch_size": 4,
+    "qkv_bias": true,
+    "use_absolute_embeddings": false,
     "window_size": 12
   },
+  "bbox_cost": 5.0,
+  "bbox_loss_coefficient": 5.0,
+  "class_cost": 1.0,
   "d_model": 256,
   "decoder_attention_heads": 8,
   "decoder_bbox_embed_share": true,
   "decoder_ffn_dim": 2048,
   "decoder_layers": 6,
   "decoder_n_points": 4,
   "disable_custom_kernels": false,
   "dropout": 0.1,
   "embedding_init_target": true,
   "encoder_ffn_dim": 2048,
   "encoder_layers": 6,
   "encoder_n_points": 4,
   "focal_alpha": 0.25,
   "fusion_dropout": 0.0,
   "fusion_droppath": 0.1,
+  "giou_cost": 2.0,
+  "giou_loss_coefficient": 2.0,
+  "init_std": 0.02,
   "is_encoder_decoder": true,
   "max_text_len": 256,
   "model_type": "grounding-dino",
   "num_feature_levels": 4,
   "num_queries": 900,
   "position_embedding_type": "sine",
   "positional_embedding_temperature": 20,
   "query_dim": 4,
   "text_backbone_config": {
     "model_type": "grounding-dino-text-prenet"
   },
   "text_enhancer_dropout": 0.0,
   "torch_dtype": "float32",
+  "transformers_version": "4.36.0.dev0",
   "two_stage": true,
+  "two_stage_bbox_embed_share": false
 }

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78cf02dda8891982a76e42194e84d7173329c2ef116937fa65bf6723c74fa89d
+size 935754584