developer0hye commited on
Commit
3cdc86d
·
verified ·
1 Parent(s): 12f9950

Upload 40 files

Browse files
Files changed (40) hide show
  1. configs/coco.yml +82 -0
  2. configs/dataset/coco_detection.yml +41 -0
  3. configs/dataset/crowdhuman_detection.yml +41 -0
  4. configs/dataset/custom_detection.yml +41 -0
  5. configs/dataset/obj365_detection.yml +41 -0
  6. configs/dataset/voc_detection.yml +40 -0
  7. configs/dfine/crowdhuman/dfine_hgnetv2_l_ch.yml +44 -0
  8. configs/dfine/crowdhuman/dfine_hgnetv2_m_ch.yml +60 -0
  9. configs/dfine/crowdhuman/dfine_hgnetv2_n_ch.yml +82 -0
  10. configs/dfine/crowdhuman/dfine_hgnetv2_s_ch.yml +65 -0
  11. configs/dfine/crowdhuman/dfine_hgnetv2_x_ch.yml +55 -0
  12. configs/dfine/custom/dfine_hgnetv2_l_custom.yml +44 -0
  13. configs/dfine/custom/dfine_hgnetv2_m_custom.yml +60 -0
  14. configs/dfine/custom/dfine_hgnetv2_n_custom.yml +82 -0
  15. configs/dfine/custom/dfine_hgnetv2_s_custom.yml +65 -0
  16. configs/dfine/custom/dfine_hgnetv2_x_custom.yml +55 -0
  17. configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml +53 -0
  18. configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml +66 -0
  19. configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml +67 -0
  20. configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml +62 -0
  21. configs/dfine/dfine_hgnetv2_l_coco.yml +44 -0
  22. configs/dfine/dfine_hgnetv2_m_coco.yml +60 -0
  23. configs/dfine/dfine_hgnetv2_n_coco.yml +82 -0
  24. configs/dfine/dfine_hgnetv2_s_coco.yml +61 -0
  25. configs/dfine/dfine_hgnetv2_x_coco.yml +56 -0
  26. configs/dfine/include/dataloader.yml +39 -0
  27. configs/dfine/include/dfine_hgnetv2.yml +82 -0
  28. configs/dfine/include/optimizer.yml +36 -0
  29. configs/dfine/objects365/dfine_hgnetv2_l_obj2coco.yml +52 -0
  30. configs/dfine/objects365/dfine_hgnetv2_l_obj365.yml +49 -0
  31. configs/dfine/objects365/dfine_hgnetv2_m_obj2coco.yml +65 -0
  32. configs/dfine/objects365/dfine_hgnetv2_m_obj365.yml +62 -0
  33. configs/dfine/objects365/dfine_hgnetv2_n_obj2coco.yml +88 -0
  34. configs/dfine/objects365/dfine_hgnetv2_n_obj365.yml +84 -0
  35. configs/dfine/objects365/dfine_hgnetv2_s_obj2coco.yml +66 -0
  36. configs/dfine/objects365/dfine_hgnetv2_s_obj365.yml +63 -0
  37. configs/dfine/objects365/dfine_hgnetv2_x_obj2coco.yml +61 -0
  38. configs/dfine/objects365/dfine_hgnetv2_x_obj365.yml +58 -0
  39. configs/obj365.yml +367 -0
  40. configs/runtime.yml +24 -0
configs/coco.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Classes
2
+ names:
3
+ 0: person
4
+ 1: bicycle
5
+ 2: car
6
+ 3: motorcycle
7
+ 4: airplane
8
+ 5: bus
9
+ 6: train
10
+ 7: truck
11
+ 8: boat
12
+ 9: traffic light
13
+ 10: fire hydrant
14
+ 11: stop sign
15
+ 12: parking meter
16
+ 13: bench
17
+ 14: bird
18
+ 15: cat
19
+ 16: dog
20
+ 17: horse
21
+ 18: sheep
22
+ 19: cow
23
+ 20: elephant
24
+ 21: bear
25
+ 22: zebra
26
+ 23: giraffe
27
+ 24: backpack
28
+ 25: umbrella
29
+ 26: handbag
30
+ 27: tie
31
+ 28: suitcase
32
+ 29: frisbee
33
+ 30: skis
34
+ 31: snowboard
35
+ 32: sports ball
36
+ 33: kite
37
+ 34: baseball bat
38
+ 35: baseball glove
39
+ 36: skateboard
40
+ 37: surfboard
41
+ 38: tennis racket
42
+ 39: bottle
43
+ 40: wine glass
44
+ 41: cup
45
+ 42: fork
46
+ 43: knife
47
+ 44: spoon
48
+ 45: bowl
49
+ 46: banana
50
+ 47: apple
51
+ 48: sandwich
52
+ 49: orange
53
+ 50: broccoli
54
+ 51: carrot
55
+ 52: hot dog
56
+ 53: pizza
57
+ 54: donut
58
+ 55: cake
59
+ 56: chair
60
+ 57: couch
61
+ 58: potted plant
62
+ 59: bed
63
+ 60: dining table
64
+ 61: toilet
65
+ 62: tv
66
+ 63: laptop
67
+ 64: mouse
68
+ 65: remote
69
+ 66: keyboard
70
+ 67: cell phone
71
+ 68: microwave
72
+ 69: oven
73
+ 70: toaster
74
+ 71: sink
75
+ 72: refrigerator
76
+ 73: book
77
+ 74: clock
78
+ 75: vase
79
+ 76: scissors
80
+ 77: teddy bear
81
+ 78: hair drier
82
+ 79: toothbrush
configs/dataset/coco_detection.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: detection
2
+
3
+ evaluator:
4
+ type: CocoEvaluator
5
+ iou_types: ['bbox', ]
6
+
7
+ num_classes: 80
8
+ remap_mscoco_category: True
9
+
10
+ train_dataloader:
11
+ type: DataLoader
12
+ dataset:
13
+ type: CocoDetection
14
+ img_folder: /data/COCO2017/train2017/
15
+ ann_file: /data/COCO2017/annotations/instances_train2017.json
16
+ return_masks: False
17
+ transforms:
18
+ type: Compose
19
+ ops: ~
20
+ shuffle: True
21
+ num_workers: 4
22
+ drop_last: True
23
+ collate_fn:
24
+ type: BatchImageCollateFunction
25
+
26
+
27
+ val_dataloader:
28
+ type: DataLoader
29
+ dataset:
30
+ type: CocoDetection
31
+ img_folder: /data/COCO2017/val2017/
32
+ ann_file: /data/COCO2017/annotations/instances_val2017.json
33
+ return_masks: False
34
+ transforms:
35
+ type: Compose
36
+ ops: ~
37
+ shuffle: False
38
+ num_workers: 4
39
+ drop_last: False
40
+ collate_fn:
41
+ type: BatchImageCollateFunction
configs/dataset/crowdhuman_detection.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: detection
2
+
3
+ evaluator:
4
+ type: CocoEvaluator
5
+ iou_types: ['bbox', ]
6
+
7
+ num_classes: 1 # your dataset classes
8
+ remap_mscoco_category: False
9
+
10
+ train_dataloader:
11
+ type: DataLoader
12
+ dataset:
13
+ type: CocoDetection
14
+ img_folder: /data/CrowdHuman/coco/CrowdHuman_train
15
+ ann_file: /data/CrowdHuman/coco/Chuman-train.json
16
+ return_masks: False
17
+ transforms:
18
+ type: Compose
19
+ ops: ~
20
+ shuffle: True
21
+ num_workers: 4
22
+ drop_last: True
23
+ collate_fn:
24
+ type: BatchImageCollateFunction
25
+
26
+
27
+ val_dataloader:
28
+ type: DataLoader
29
+ dataset:
30
+ type: CocoDetection
31
+ img_folder: /data/CrowdHuman/coco/CrowdHuman_val
32
+ ann_file: /data/CrowdHuman/coco/Chuman-val.json
33
+ return_masks: False
34
+ transforms:
35
+ type: Compose
36
+ ops: ~
37
+ shuffle: False
38
+ num_workers: 4
39
+ drop_last: False
40
+ collate_fn:
41
+ type: BatchImageCollateFunction
configs/dataset/custom_detection.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: detection
2
+
3
+ evaluator:
4
+ type: CocoEvaluator
5
+ iou_types: ['bbox', ]
6
+
7
+ num_classes: 777 # your dataset classes
8
+ remap_mscoco_category: False
9
+
10
+ train_dataloader:
11
+ type: DataLoader
12
+ dataset:
13
+ type: CocoDetection
14
+ img_folder: /data/yourdataset/train
15
+ ann_file: /data/yourdataset/train/train.json
16
+ return_masks: False
17
+ transforms:
18
+ type: Compose
19
+ ops: ~
20
+ shuffle: True
21
+ num_workers: 4
22
+ drop_last: True
23
+ collate_fn:
24
+ type: BatchImageCollateFunction
25
+
26
+
27
+ val_dataloader:
28
+ type: DataLoader
29
+ dataset:
30
+ type: CocoDetection
31
+ img_folder: /data/yourdataset/val
32
+ ann_file: /data/yourdataset/val/val.json
33
+ return_masks: False
34
+ transforms:
35
+ type: Compose
36
+ ops: ~
37
+ shuffle: False
38
+ num_workers: 4
39
+ drop_last: False
40
+ collate_fn:
41
+ type: BatchImageCollateFunction
configs/dataset/obj365_detection.yml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: detection
2
+
3
+ evaluator:
4
+ type: CocoEvaluator
5
+ iou_types: ['bbox', ]
6
+
7
+ num_classes: 366
8
+ remap_mscoco_category: False
9
+
10
+ train_dataloader:
11
+ type: DataLoader
12
+ dataset:
13
+ type: CocoDetection
14
+ img_folder: /data/Objects365/data/train
15
+ ann_file: /data/Objects365/data/train/new_zhiyuan_objv2_train_resized.json
16
+ return_masks: False
17
+ transforms:
18
+ type: Compose
19
+ ops: ~
20
+ shuffle: True
21
+ num_workers: 4
22
+ drop_last: True
23
+ collate_fn:
24
+ type: BatchImageCollateFunction
25
+
26
+
27
+ val_dataloader:
28
+ type: DataLoader
29
+ dataset:
30
+ type: CocoDetection
31
+ img_folder: /data/Objects365/data/val/
32
+ ann_file: /data/Objects365/data/val/new_zhiyuan_objv2_val_resized.json
33
+ return_masks: False
34
+ transforms:
35
+ type: Compose
36
+ ops: ~
37
+ shuffle: False
38
+ num_workers: 4
39
+ drop_last: False
40
+ collate_fn:
41
+ type: BatchImageCollateFunction
configs/dataset/voc_detection.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: detection
2
+
3
+ evaluator:
4
+ type: CocoEvaluator
5
+ iou_types: ['bbox', ]
6
+
7
+ num_classes: 20
8
+
9
+ train_dataloader:
10
+ type: DataLoader
11
+ dataset:
12
+ type: VOCDetection
13
+ root: ./dataset/voc/
14
+ ann_file: trainval.txt
15
+ label_file: label_list.txt
16
+ transforms:
17
+ type: Compose
18
+ ops: ~
19
+ shuffle: True
20
+ num_workers: 4
21
+ drop_last: True
22
+ collate_fn:
23
+ type: BatchImageCollateFunction
24
+
25
+
26
+ val_dataloader:
27
+ type: DataLoader
28
+ dataset:
29
+ type: VOCDetection
30
+ root: ./dataset/voc/
31
+ ann_file: test.txt
32
+ label_file: label_list.txt
33
+ transforms:
34
+ type: Compose
35
+ ops: ~
36
+ shuffle: False
37
+ num_workers: 4
38
+ drop_last: False
39
+ collate_fn:
40
+ type: BatchImageCollateFunction
configs/dfine/crowdhuman/dfine_hgnetv2_l_ch.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/crowdhuman_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_l_crowdhuman
10
+
11
+
12
+ HGNetv2:
13
+ name: 'B4'
14
+ return_idx: [1, 2, 3]
15
+ freeze_stem_only: True
16
+ freeze_at: 0
17
+ freeze_norm: True
18
+
19
+ optimizer:
20
+ type: AdamW
21
+ params:
22
+ -
23
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
24
+ lr: 0.0000125
25
+ -
26
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
27
+ weight_decay: 0.
28
+
29
+ lr: 0.00025
30
+ betas: [0.9, 0.999]
31
+ weight_decay: 0.000125
32
+
33
+
34
+ # Increase to search for the optimal ema
35
+ epochs: 140
36
+ train_dataloader:
37
+ dataset:
38
+ transforms:
39
+ policy:
40
+ epoch: 120
41
+ collate_fn:
42
+ stop_epoch: 120
43
+ ema_restart_decay: 0.9999
44
+ base_size_repeat: 4
configs/dfine/crowdhuman/dfine_hgnetv2_m_ch.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/crowdhuman_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_m_crowdhuman
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B2'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 4 # 5 6
24
+ eval_idx: -1 # -2 -3
25
+
26
+ HybridEncoder:
27
+ in_channels: [384, 768, 1536]
28
+ hidden_dim: 256
29
+ depth_mult: 0.67
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.000025
37
+ -
38
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
39
+ lr: 0.000025
40
+ weight_decay: 0.
41
+ -
42
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
43
+ weight_decay: 0.
44
+
45
+ lr: 0.00025
46
+ betas: [0.9, 0.999]
47
+ weight_decay: 0.000125
48
+
49
+
50
+ # Increase to search for the optimal ema
51
+ epochs: 220
52
+ train_dataloader:
53
+ dataset:
54
+ transforms:
55
+ policy:
56
+ epoch: 200
57
+ collate_fn:
58
+ stop_epoch: 200
59
+ ema_restart_decay: 0.9999
60
+ base_size_repeat: 6
configs/dfine/crowdhuman/dfine_hgnetv2_n_ch.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/crowdhuman_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_n_crowdhuman
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+
23
+ HybridEncoder:
24
+ in_channels: [512, 1024]
25
+ feat_strides: [16, 32]
26
+
27
+ # intra
28
+ hidden_dim: 128
29
+ use_encoder_idx: [1]
30
+ dim_feedforward: 512
31
+
32
+ # cross
33
+ expansion: 0.34
34
+ depth_mult: 0.5
35
+
36
+
37
+ DFINETransformer:
38
+ feat_channels: [128, 128]
39
+ feat_strides: [16, 32]
40
+ hidden_dim: 128
41
+ dim_feedforward: 512
42
+ num_levels: 2
43
+
44
+ num_layers: 3
45
+ eval_idx: -1
46
+
47
+ num_points: [6, 6]
48
+
49
+ optimizer:
50
+ type: AdamW
51
+ params:
52
+ -
53
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
54
+ lr: 0.0004
55
+ -
56
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
57
+ lr: 0.0004
58
+ weight_decay: 0.
59
+ -
60
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
61
+ weight_decay: 0.
62
+
63
+ lr: 0.0008
64
+ betas: [0.9, 0.999]
65
+ weight_decay: 0.0001
66
+
67
+
68
+ # Increase to search for the optimal ema
69
+ epochs: 220
70
+ train_dataloader:
71
+ total_batch_size: 128
72
+ dataset:
73
+ transforms:
74
+ policy:
75
+ epoch: 200
76
+ collate_fn:
77
+ stop_epoch: 200
78
+ ema_restart_decay: 0.9999
79
+ base_size_repeat: ~
80
+
81
+ val_dataloader:
82
+ total_batch_size: 256
configs/dfine/crowdhuman/dfine_hgnetv2_s_ch.yml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/crowdhuman_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_s_crowdhuman
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 3 # 4 5 6
24
+ eval_idx: -1 # -2 -3 -4
25
+
26
+ HybridEncoder:
27
+ in_channels: [256, 512, 1024]
28
+ hidden_dim: 256
29
+ depth_mult: 0.34
30
+ expansion: 0.5
31
+
32
+ optimizer:
33
+ type: AdamW
34
+ params:
35
+ -
36
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
37
+ lr: 0.0002
38
+ -
39
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
40
+ lr: 0.0002
41
+ weight_decay: 0.
42
+ -
43
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44
+ weight_decay: 0.
45
+
46
+ lr: 0.0004
47
+ betas: [0.9, 0.999]
48
+ weight_decay: 0.0001
49
+
50
+
51
+ # Increase to search for the optimal ema
52
+ epochs: 220
53
+ train_dataloader:
54
+ total_batch_size: 64
55
+ dataset:
56
+ transforms:
57
+ policy:
58
+ epoch: 200
59
+ collate_fn:
60
+ stop_epoch: 200
61
+ ema_restart_decay: 0.9999
62
+ base_size_repeat: 20
63
+
64
+ val_dataloader:
65
+ total_batch_size: 128
configs/dfine/crowdhuman/dfine_hgnetv2_x_ch.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/crowdhuman_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_x_crowdhuman
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B5'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+
22
+ HybridEncoder:
23
+ hidden_dim: 384
24
+ dim_feedforward: 2048
25
+
26
+ DFINETransformer:
27
+ feat_channels: [384, 384, 384]
28
+ reg_scale: 8
29
+
30
+ optimizer:
31
+ type: AdamW
32
+ params:
33
+ -
34
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
35
+ lr: 0.0000025
36
+ -
37
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
38
+ weight_decay: 0.
39
+
40
+ lr: 0.00025
41
+ betas: [0.9, 0.999]
42
+ weight_decay: 0.000125
43
+
44
+
45
+ # Increase to search for the optimal ema
46
+ epochs: 140
47
+ train_dataloader:
48
+ dataset:
49
+ transforms:
50
+ policy:
51
+ epoch: 120
52
+ collate_fn:
53
+ stop_epoch: 120
54
+ ema_restart_decay: 0.9998
55
+ base_size_repeat: 3
configs/dfine/custom/dfine_hgnetv2_l_custom.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/custom_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_l_custom
10
+
11
+
12
+ HGNetv2:
13
+ name: 'B4'
14
+ return_idx: [1, 2, 3]
15
+ freeze_stem_only: True
16
+ freeze_at: 0
17
+ freeze_norm: True
18
+
19
+ optimizer:
20
+ type: AdamW
21
+ params:
22
+ -
23
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
24
+ lr: 0.0000125
25
+ -
26
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
27
+ weight_decay: 0.
28
+
29
+ lr: 0.00025
30
+ betas: [0.9, 0.999]
31
+ weight_decay: 0.000125
32
+
33
+
34
+ # Increase to search for the optimal ema
35
+ epochs: 80 # 72 + 2n
36
+ train_dataloader:
37
+ dataset:
38
+ transforms:
39
+ policy:
40
+ epoch: 72
41
+ collate_fn:
42
+ stop_epoch: 72
43
+ ema_restart_decay: 0.9999
44
+ base_size_repeat: 4
configs/dfine/custom/dfine_hgnetv2_m_custom.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/custom_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_m_custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B2'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 4 # 5 6
24
+ eval_idx: -1 # -2 -3
25
+
26
+ HybridEncoder:
27
+ in_channels: [384, 768, 1536]
28
+ hidden_dim: 256
29
+ depth_mult: 0.67
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.000025
37
+ -
38
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
39
+ lr: 0.000025
40
+ weight_decay: 0.
41
+ -
42
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
43
+ weight_decay: 0.
44
+
45
+ lr: 0.00025
46
+ betas: [0.9, 0.999]
47
+ weight_decay: 0.000125
48
+
49
+
50
+ # Increase to search for the optimal ema
51
+ epochs: 132 # 120 + 4n
52
+ train_dataloader:
53
+ dataset:
54
+ transforms:
55
+ policy:
56
+ epoch: 120
57
+ collate_fn:
58
+ stop_epoch: 120
59
+ ema_restart_decay: 0.9999
60
+ base_size_repeat: 6
configs/dfine/custom/dfine_hgnetv2_n_custom.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/custom_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_n_custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+
23
+ HybridEncoder:
24
+ in_channels: [512, 1024]
25
+ feat_strides: [16, 32]
26
+
27
+ # intra
28
+ hidden_dim: 128
29
+ use_encoder_idx: [1]
30
+ dim_feedforward: 512
31
+
32
+ # cross
33
+ expansion: 0.34
34
+ depth_mult: 0.5
35
+
36
+
37
+ DFINETransformer:
38
+ feat_channels: [128, 128]
39
+ feat_strides: [16, 32]
40
+ hidden_dim: 128
41
+ dim_feedforward: 512
42
+ num_levels: 2
43
+
44
+ num_layers: 3
45
+ eval_idx: -1
46
+
47
+ num_points: [6, 6]
48
+
49
+ optimizer:
50
+ type: AdamW
51
+ params:
52
+ -
53
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
54
+ lr: 0.0004
55
+ -
56
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
57
+ lr: 0.0004
58
+ weight_decay: 0.
59
+ -
60
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
61
+ weight_decay: 0.
62
+
63
+ lr: 0.0008
64
+ betas: [0.9, 0.999]
65
+ weight_decay: 0.0001
66
+
67
+
68
+ # Increase to search for the optimal ema
69
+ epochs: 220
70
+ train_dataloader:
71
+ total_batch_size: 128
72
+ dataset:
73
+ transforms:
74
+ policy:
75
+ epoch: 200
76
+ collate_fn:
77
+ stop_epoch: 200
78
+ ema_restart_decay: 0.9999
79
+ base_size_repeat: ~
80
+
81
+ val_dataloader:
82
+ total_batch_size: 256
configs/dfine/custom/dfine_hgnetv2_s_custom.yml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/custom_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_s_custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 3 # 4 5 6
24
+ eval_idx: -1 # -2 -3 -4
25
+
26
+ HybridEncoder:
27
+ in_channels: [256, 512, 1024]
28
+ hidden_dim: 256
29
+ depth_mult: 0.34
30
+ expansion: 0.5
31
+
32
+ optimizer:
33
+ type: AdamW
34
+ params:
35
+ -
36
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
37
+ lr: 0.0002
38
+ -
39
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
40
+ lr: 0.0002
41
+ weight_decay: 0.
42
+ -
43
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44
+ weight_decay: 0.
45
+
46
+ lr: 0.0004
47
+ betas: [0.9, 0.999]
48
+ weight_decay: 0.0001
49
+
50
+
51
+ # Increase to search for the optimal ema
52
+ epochs: 220
53
+ train_dataloader:
54
+ total_batch_size: 64
55
+ dataset:
56
+ transforms:
57
+ policy:
58
+ epoch: 200
59
+ collate_fn:
60
+ stop_epoch: 200
61
+ ema_restart_decay: 0.9999
62
+ base_size_repeat: 20
63
+
64
+ val_dataloader:
65
+ total_batch_size: 128
configs/dfine/custom/dfine_hgnetv2_x_custom.yml ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/custom_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_x_custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B5'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+
22
+ HybridEncoder:
23
+ hidden_dim: 384
24
+ dim_feedforward: 2048
25
+
26
+ DFINETransformer:
27
+ feat_channels: [384, 384, 384]
28
+ reg_scale: 8
29
+
30
+ optimizer:
31
+ type: AdamW
32
+ params:
33
+ -
34
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
35
+ lr: 0.0000025
36
+ -
37
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
38
+ weight_decay: 0.
39
+
40
+ lr: 0.00025
41
+ betas: [0.9, 0.999]
42
+ weight_decay: 0.000125
43
+
44
+
45
+ # Increase to search for the optimal ema
46
+ epochs: 80 # 72 + 2n
47
+ train_dataloader:
48
+ dataset:
49
+ transforms:
50
+ policy:
51
+ epoch: 72
52
+ collate_fn:
53
+ stop_epoch: 72
54
+ ema_restart_decay: 0.9998
55
+ base_size_repeat: 3
configs/dfine/custom/objects365/dfine_hgnetv2_l_obj2custom.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../../dataset/custom_detection.yml',
3
+ '../../../runtime.yml',
4
+ '../../include/dataloader.yml',
5
+ '../../include/optimizer.yml',
6
+ '../../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_l_obj2custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B4'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+ pretrained: False
22
+
23
+ optimizer:
24
+ type: AdamW
25
+ params:
26
+ -
27
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
28
+ lr: 0.0000125
29
+ -
30
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
31
+ weight_decay: 0.
32
+
33
+ lr: 0.00025
34
+ betas: [0.9, 0.999]
35
+ weight_decay: 0.000125
36
+
37
+
38
+ epochs: 36 # Early stop
39
+ train_dataloader:
40
+ dataset:
41
+ transforms:
42
+ policy:
43
+ epoch: 30
44
+ collate_fn:
45
+ stop_epoch: 30
46
+ ema_restart_decay: 0.9999
47
+ base_size_repeat: 4
48
+
49
+ ema:
50
+ warmups: 0
51
+
52
+ lr_warmup_scheduler:
53
+ warmup_duration: 0
configs/dfine/custom/objects365/dfine_hgnetv2_m_obj2custom.yml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../../dataset/custom_detection.yml',
3
+ '../../../runtime.yml',
4
+ '../../include/dataloader.yml',
5
+ '../../include/optimizer.yml',
6
+ '../../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_m_obj2custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B2'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+ pretrained: False
22
+
23
+ DFINETransformer:
24
+ num_layers: 4 # 5 6
25
+ eval_idx: -1 # -2 -3
26
+
27
+ HybridEncoder:
28
+ in_channels: [384, 768, 1536]
29
+ hidden_dim: 256
30
+ depth_mult: 0.67
31
+
32
+ optimizer:
33
+ type: AdamW
34
+ params:
35
+ -
36
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
37
+ lr: 0.000025
38
+ -
39
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
40
+ lr: 0.000025
41
+ weight_decay: 0.
42
+ -
43
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44
+ weight_decay: 0.
45
+
46
+ lr: 0.00025
47
+ betas: [0.9, 0.999]
48
+ weight_decay: 0.000125
49
+
50
+
51
+ epochs: 56 # Early stop
52
+ train_dataloader:
53
+ dataset:
54
+ transforms:
55
+ policy:
56
+ epoch: 48
57
+ collate_fn:
58
+ stop_epoch: 48
59
+ ema_restart_decay: 0.9999
60
+ base_size_repeat: 6
61
+
62
+ ema:
63
+ warmups: 0
64
+
65
+ lr_warmup_scheduler:
66
+ warmup_duration: 0
configs/dfine/custom/objects365/dfine_hgnetv2_s_obj2custom.yml ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../../dataset/custom_detection.yml',
3
+ '../../../runtime.yml',
4
+ '../../include/dataloader.yml',
5
+ '../../include/optimizer.yml',
6
+ '../../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_s_obj2custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+ pretrained: False
22
+
23
+ DFINETransformer:
24
+ num_layers: 3 # 4 5 6
25
+ eval_idx: -1 # -2 -3 -4
26
+
27
+ HybridEncoder:
28
+ in_channels: [256, 512, 1024]
29
+ hidden_dim: 256
30
+ depth_mult: 0.34
31
+ expansion: 0.5
32
+
33
+ optimizer:
34
+ type: AdamW
35
+ params:
36
+ -
37
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
38
+ lr: 0.000125
39
+ -
40
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
41
+ lr: 0.000125
42
+ weight_decay: 0.
43
+ -
44
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
45
+ weight_decay: 0.
46
+
47
+ lr: 0.00025
48
+ betas: [0.9, 0.999]
49
+ weight_decay: 0.000125
50
+
51
+
52
+ epochs: 64 # Early stop
53
+ train_dataloader:
54
+ dataset:
55
+ transforms:
56
+ policy:
57
+ epoch: 56
58
+ collate_fn:
59
+ stop_epoch: 56
60
+ ema_restart_decay: 0.9999
61
+ base_size_repeat: 10
62
+
63
+ ema:
64
+ warmups: 0
65
+
66
+ lr_warmup_scheduler:
67
+ warmup_duration: 0
configs/dfine/custom/objects365/dfine_hgnetv2_x_obj2custom.yml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../../dataset/custom_detection.yml',
3
+ '../../../runtime.yml',
4
+ '../../include/dataloader.yml',
5
+ '../../include/optimizer.yml',
6
+ '../../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_x_obj2custom
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B5'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+ pretrained: False
22
+
23
+ HybridEncoder:
24
+ # intra
25
+ hidden_dim: 384
26
+ dim_feedforward: 2048
27
+
28
+ DFINETransformer:
29
+ feat_channels: [384, 384, 384]
30
+ reg_scale: 8
31
+
32
+ optimizer:
33
+ type: AdamW
34
+ params:
35
+ -
36
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
37
+ lr: 0.0000025
38
+ -
39
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
40
+ weight_decay: 0.
41
+
42
+ lr: 0.00025
43
+ betas: [0.9, 0.999]
44
+ weight_decay: 0.000125
45
+
46
+
47
+ epochs: 36 # Early stop
48
+ train_dataloader:
49
+ dataset:
50
+ transforms:
51
+ policy:
52
+ epoch: 30
53
+ collate_fn:
54
+ stop_epoch: 30
55
+ ema_restart_decay: 0.9999
56
+ base_size_repeat: 3
57
+
58
+ ema:
59
+ warmups: 0
60
+
61
+ lr_warmup_scheduler:
62
+ warmup_duration: 0
configs/dfine/dfine_hgnetv2_l_coco.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../dataset/coco_detection.yml',
3
+ '../runtime.yml',
4
+ './include/dataloader.yml',
5
+ './include/optimizer.yml',
6
+ './include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_l_coco
10
+
11
+
12
+ HGNetv2:
13
+ name: 'B4'
14
+ return_idx: [1, 2, 3]
15
+ freeze_stem_only: True
16
+ freeze_at: 0
17
+ freeze_norm: True
18
+
19
+ optimizer:
20
+ type: AdamW
21
+ params:
22
+ -
23
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
24
+ lr: 0.0000125
25
+ -
26
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
27
+ weight_decay: 0.
28
+
29
+ lr: 0.00025
30
+ betas: [0.9, 0.999]
31
+ weight_decay: 0.000125
32
+
33
+
34
+ # Increase to search for the optimal ema
35
+ epochs: 80 # 72 + 2n
36
+ train_dataloader:
37
+ dataset:
38
+ transforms:
39
+ policy:
40
+ epoch: 72
41
+ collate_fn:
42
+ stop_epoch: 72
43
+ ema_restart_decay: 0.9999
44
+ base_size_repeat: 4
configs/dfine/dfine_hgnetv2_m_coco.yml ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../dataset/coco_detection.yml',
3
+ '../runtime.yml',
4
+ './include/dataloader.yml',
5
+ './include/optimizer.yml',
6
+ './include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_m_coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B2'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 4 # 5 6
24
+ eval_idx: -1 # -2 -3
25
+
26
+ HybridEncoder:
27
+ in_channels: [384, 768, 1536]
28
+ hidden_dim: 256
29
+ depth_mult: 0.67
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.00002
37
+ -
38
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
39
+ lr: 0.00002
40
+ weight_decay: 0.
41
+ -
42
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
43
+ weight_decay: 0.
44
+
45
+ lr: 0.0002
46
+ betas: [0.9, 0.999]
47
+ weight_decay: 0.0001
48
+
49
+
50
+ # Increase to search for the optimal ema
51
+ epochs: 132 # 120 + 4n
52
+ train_dataloader:
53
+ dataset:
54
+ transforms:
55
+ policy:
56
+ epoch: 120
57
+ collate_fn:
58
+ stop_epoch: 120
59
+ ema_restart_decay: 0.9999
60
+ base_size_repeat: 6
configs/dfine/dfine_hgnetv2_n_coco.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../dataset/coco_detection.yml',
3
+ '../runtime.yml',
4
+ './include/dataloader.yml',
5
+ './include/optimizer.yml',
6
+ './include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_n_coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+
23
+ HybridEncoder:
24
+ in_channels: [512, 1024]
25
+ feat_strides: [16, 32]
26
+
27
+ # intra
28
+ hidden_dim: 128
29
+ use_encoder_idx: [1]
30
+ dim_feedforward: 512
31
+
32
+ # cross
33
+ expansion: 0.34
34
+ depth_mult: 0.5
35
+
36
+
37
+ DFINETransformer:
38
+ feat_channels: [128, 128]
39
+ feat_strides: [16, 32]
40
+ hidden_dim: 128
41
+ dim_feedforward: 512
42
+ num_levels: 2
43
+
44
+ num_layers: 3
45
+ eval_idx: -1
46
+
47
+ num_points: [6, 6]
48
+
49
+ optimizer:
50
+ type: AdamW
51
+ params:
52
+ -
53
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
54
+ lr: 0.0004
55
+ -
56
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
57
+ lr: 0.0004
58
+ weight_decay: 0.
59
+ -
60
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
61
+ weight_decay: 0.
62
+
63
+ lr: 0.0008
64
+ betas: [0.9, 0.999]
65
+ weight_decay: 0.0001
66
+
67
+
68
+ # Increase to search for the optimal ema
69
+ epochs: 160 # 148 + 4n
70
+ train_dataloader:
71
+ total_batch_size: 128
72
+ dataset:
73
+ transforms:
74
+ policy:
75
+ epoch: 148
76
+ collate_fn:
77
+ stop_epoch: 148
78
+ ema_restart_decay: 0.9999
79
+ base_size_repeat: ~
80
+
81
+ val_dataloader:
82
+ total_batch_size: 256
configs/dfine/dfine_hgnetv2_s_coco.yml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../dataset/coco_detection.yml',
3
+ '../runtime.yml',
4
+ './include/dataloader.yml',
5
+ './include/optimizer.yml',
6
+ './include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_s_coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 3 # 4 5 6
24
+ eval_idx: -1 # -2 -3 -4
25
+
26
+ HybridEncoder:
27
+ in_channels: [256, 512, 1024]
28
+ hidden_dim: 256
29
+ depth_mult: 0.34
30
+ expansion: 0.5
31
+
32
+ optimizer:
33
+ type: AdamW
34
+ params:
35
+ -
36
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
37
+ lr: 0.0001
38
+ -
39
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
40
+ lr: 0.0001
41
+ weight_decay: 0.
42
+ -
43
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44
+ weight_decay: 0.
45
+
46
+ lr: 0.0002
47
+ betas: [0.9, 0.999]
48
+ weight_decay: 0.0001
49
+
50
+
51
+ # Increase to search for the optimal ema
52
+ epochs: 132 # 120 + 4n
53
+ train_dataloader:
54
+ dataset:
55
+ transforms:
56
+ policy:
57
+ epoch: 120
58
+ collate_fn:
59
+ stop_epoch: 120
60
+ ema_restart_decay: 0.9999
61
+ base_size_repeat: 20
configs/dfine/dfine_hgnetv2_x_coco.yml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../dataset/coco_detection.yml',
3
+ '../runtime.yml',
4
+ './include/dataloader.yml',
5
+ './include/optimizer.yml',
6
+ './include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_x_coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B5'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+
22
+ HybridEncoder:
23
+ # intra
24
+ hidden_dim: 384
25
+ dim_feedforward: 2048
26
+
27
+ DFINETransformer:
28
+ feat_channels: [384, 384, 384]
29
+ reg_scale: 8
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.0000025
37
+ -
38
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
39
+ weight_decay: 0.
40
+
41
+ lr: 0.00025
42
+ betas: [0.9, 0.999]
43
+ weight_decay: 0.000125
44
+
45
+
46
+ # Increase to search for the optimal ema
47
+ epochs: 80 # 72 + 2n
48
+ train_dataloader:
49
+ dataset:
50
+ transforms:
51
+ policy:
52
+ epoch: 72
53
+ collate_fn:
54
+ stop_epoch: 72
55
+ ema_restart_decay: 0.9998
56
+ base_size_repeat: 3
configs/dfine/include/dataloader.yml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ train_dataloader:
3
+ dataset:
4
+ transforms:
5
+ ops:
6
+ - {type: RandomPhotometricDistort, p: 0.5}
7
+ - {type: RandomZoomOut, fill: 0}
8
+ - {type: RandomIoUCrop, p: 0.8}
9
+ - {type: SanitizeBoundingBoxes, min_size: 1}
10
+ - {type: RandomHorizontalFlip}
11
+ - {type: Resize, size: [640, 640], }
12
+ - {type: SanitizeBoundingBoxes, min_size: 1}
13
+ - {type: ConvertPILImage, dtype: 'float32', scale: True}
14
+ - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
15
+ policy:
16
+ name: stop_epoch
17
+ epoch: 72 # epoch in [71, ~) stop `ops`
18
+ ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
19
+
20
+ collate_fn:
21
+ type: BatchImageCollateFunction
22
+ base_size: 640
23
+ base_size_repeat: 3
24
+ stop_epoch: 72 # epoch in [72, ~) stop `multiscales`
25
+
26
+ shuffle: True
27
+ total_batch_size: 32 # total batch size equals to 32 (4 * 8)
28
+ num_workers: 4
29
+
30
+
31
+ val_dataloader:
32
+ dataset:
33
+ transforms:
34
+ ops:
35
+ - {type: Resize, size: [640, 640], }
36
+ - {type: ConvertPILImage, dtype: 'float32', scale: True}
37
+ shuffle: False
38
+ total_batch_size: 64
39
+ num_workers: 4
configs/dfine/include/dfine_hgnetv2.yml ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task: detection
2
+
3
+ model: DFINE
4
+ criterion: DFINECriterion
5
+ postprocessor: DFINEPostProcessor
6
+
7
+ use_focal_loss: True
8
+ eval_spatial_size: [640, 640] # h w
9
+
10
+ DFINE:
11
+ backbone: HGNetv2
12
+ encoder: HybridEncoder
13
+ decoder: DFINETransformer
14
+
15
+ HGNetv2:
16
+ pretrained: True
17
+ local_model_dir: weight/hgnetv2/
18
+
19
+ HybridEncoder:
20
+ in_channels: [512, 1024, 2048]
21
+ feat_strides: [8, 16, 32]
22
+
23
+ # intra
24
+ hidden_dim: 256
25
+ use_encoder_idx: [2]
26
+ num_encoder_layers: 1
27
+ nhead: 8
28
+ dim_feedforward: 1024
29
+ dropout: 0.
30
+ enc_act: 'gelu'
31
+
32
+ # cross
33
+ expansion: 1.0
34
+ depth_mult: 1
35
+ act: 'silu'
36
+
37
+
38
+ DFINETransformer:
39
+ feat_channels: [256, 256, 256]
40
+ feat_strides: [8, 16, 32]
41
+ hidden_dim: 256
42
+ num_levels: 3
43
+
44
+ num_layers: 6
45
+ eval_idx: -1
46
+ num_queries: 300
47
+
48
+ num_denoising: 100
49
+ label_noise_ratio: 0.5
50
+ box_noise_scale: 1.0
51
+
52
+ # NEW
53
+ reg_max: 32
54
+ reg_scale: 4
55
+
56
+ # Auxiliary decoder layers dimension scaling
57
+ # "eg. If num_layers: 6 eval_idx: -4,
58
+ # then layer 3, 4, 5 are auxiliary decoder layers."
59
+ layer_scale: 1 # 2
60
+
61
+
62
+ num_points: [3, 6, 3] # [4, 4, 4] [3, 6, 3]
63
+ cross_attn_method: default # default, discrete
64
+ query_select_method: default # default, agnostic
65
+
66
+
67
+ DFINEPostProcessor:
68
+ num_top_queries: 300
69
+
70
+
71
+ DFINECriterion:
72
+ weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2, loss_fgl: 0.15, loss_ddf: 1.5}
73
+ losses: ['vfl', 'boxes', 'local']
74
+ alpha: 0.75
75
+ gamma: 2.0
76
+ reg_max: 32
77
+
78
+ matcher:
79
+ type: HungarianMatcher
80
+ weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
81
+ alpha: 0.25
82
+ gamma: 2.0
configs/dfine/include/optimizer.yml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ use_amp: True
2
+ use_ema: True
3
+ ema:
4
+ type: ModelEMA
5
+ decay: 0.9999
6
+ warmups: 1000
7
+ start: 0
8
+
9
+
10
+ epochs: 72
11
+ clip_max_norm: 0.1
12
+
13
+
14
+ optimizer:
15
+ type: AdamW
16
+ params:
17
+ -
18
+ params: '^(?=.*backbone)(?!.*norm).*$'
19
+ lr: 0.0000125
20
+ -
21
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
22
+ weight_decay: 0.
23
+
24
+ lr: 0.00025
25
+ betas: [0.9, 0.999]
26
+ weight_decay: 0.000125
27
+
28
+
29
+ lr_scheduler:
30
+ type: MultiStepLR
31
+ milestones: [500]
32
+ gamma: 0.1
33
+
34
+ lr_warmup_scheduler:
35
+ type: LinearWarmup
36
+ warmup_duration: 500
configs/dfine/objects365/dfine_hgnetv2_l_obj2coco.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/coco_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_l_obj2coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B4'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+
22
+ optimizer:
23
+ type: AdamW
24
+ params:
25
+ -
26
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
27
+ lr: 0.0000125
28
+ -
29
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
30
+ weight_decay: 0.
31
+
32
+ lr: 0.00025
33
+ betas: [0.9, 0.999]
34
+ weight_decay: 0.000125
35
+
36
+
37
+ epochs: 36 # Early stop
38
+ train_dataloader:
39
+ dataset:
40
+ transforms:
41
+ policy:
42
+ epoch: 30
43
+ collate_fn:
44
+ stop_epoch: 30
45
+ ema_restart_decay: 0.9999
46
+ base_size_repeat: 4
47
+
48
+ ema:
49
+ warmups: 0
50
+
51
+ lr_warmup_scheduler:
52
+ warmup_duration: 0
configs/dfine/objects365/dfine_hgnetv2_l_obj365.yml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/obj365_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_l_obj365
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B4'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+
22
+ optimizer:
23
+ type: AdamW
24
+ params:
25
+ -
26
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
27
+ lr: 0.0000125
28
+ -
29
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
30
+ weight_decay: 0.
31
+
32
+ lr: 0.00025
33
+ betas: [0.9, 0.999]
34
+ weight_decay: 0.000125
35
+ # weight_decay: 0.00005 # Faster convergence (optional)
36
+
37
+
38
+ epochs: 24 # Early stop
39
+ train_dataloader:
40
+ dataset:
41
+ transforms:
42
+ policy:
43
+ epoch: 500
44
+ collate_fn:
45
+ stop_epoch: 500
46
+ base_size_repeat: 4
47
+
48
+ checkpoint_freq: 1
49
+ print_freq: 1000
configs/dfine/objects365/dfine_hgnetv2_m_obj2coco.yml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/coco_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_m_obj2coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B2'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 4 # 5 6
24
+ eval_idx: -1 # -2 -3
25
+
26
+ HybridEncoder:
27
+ in_channels: [384, 768, 1536]
28
+ hidden_dim: 256
29
+ depth_mult: 0.67
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.000025
37
+ -
38
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
39
+ lr: 0.000025
40
+ weight_decay: 0.
41
+ -
42
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
43
+ weight_decay: 0.
44
+
45
+ lr: 0.00025
46
+ betas: [0.9, 0.999]
47
+ weight_decay: 0.000125
48
+
49
+
50
+ epochs: 56 # Early stop
51
+ train_dataloader:
52
+ dataset:
53
+ transforms:
54
+ policy:
55
+ epoch: 48
56
+ collate_fn:
57
+ stop_epoch: 48
58
+ ema_restart_decay: 0.9999
59
+ base_size_repeat: 6
60
+
61
+ ema:
62
+ warmups: 0
63
+
64
+ lr_warmup_scheduler:
65
+ warmup_duration: 0
configs/dfine/objects365/dfine_hgnetv2_m_obj365.yml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/obj365_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: .output/dfine_hgnetv2_s_obj365
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B2'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 4 # 5 6
24
+ eval_idx: -1 # -2 -3
25
+
26
+ HybridEncoder:
27
+ in_channels: [384, 768, 1536]
28
+ hidden_dim: 256
29
+ depth_mult: 0.67
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.000025
37
+ -
38
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
39
+ lr: 0.000025
40
+ weight_decay: 0.
41
+ -
42
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
43
+ weight_decay: 0.
44
+
45
+ lr: 0.00025
46
+ betas: [0.9, 0.999]
47
+ weight_decay: 0.000125
48
+ # weight_decay: 0.00005 # Faster convergence (optional)
49
+
50
+
51
+ epochs: 36 # Early stop
52
+ train_dataloader:
53
+ dataset:
54
+ transforms:
55
+ policy:
56
+ epoch: 500
57
+ collate_fn:
58
+ stop_epoch: 500
59
+ base_size_repeat: 6
60
+
61
+ checkpoint_freq: 1
62
+ print_freq: 1000
configs/dfine/objects365/dfine_hgnetv2_n_obj2coco.yml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/coco_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_n_obj2coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+
23
+ HybridEncoder:
24
+ in_channels: [512, 1024]
25
+ feat_strides: [16, 32]
26
+
27
+ # intra
28
+ hidden_dim: 128
29
+ use_encoder_idx: [1]
30
+ dim_feedforward: 512
31
+
32
+ # cross
33
+ expansion: 0.34
34
+ depth_mult: 0.5
35
+
36
+
37
+ DFINETransformer:
38
+ feat_channels: [128, 128]
39
+ feat_strides: [16, 32]
40
+ hidden_dim: 128
41
+ dim_feedforward: 512
42
+ num_levels: 2
43
+
44
+ num_layers: 3
45
+ eval_idx: -1
46
+
47
+ num_points: [6, 6]
48
+
49
+ optimizer:
50
+ type: AdamW
51
+ params:
52
+ -
53
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
54
+ lr: 0.0004
55
+ -
56
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
57
+ lr: 0.0004
58
+ weight_decay: 0.
59
+ -
60
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
61
+ weight_decay: 0.
62
+
63
+ lr: 0.0008
64
+ betas: [0.9, 0.999]
65
+ weight_decay: 0.0001
66
+
67
+
68
+
69
+ epochs: 64 # Early stop
70
+ train_dataloader:
71
+ total_batch_size: 128
72
+ dataset:
73
+ transforms:
74
+ policy:
75
+ epoch: 56
76
+ collate_fn:
77
+ stop_epoch: 56
78
+ ema_restart_decay: 0.9999
79
+ base_size_repeat: ~
80
+
81
+ ema:
82
+ warmups: 0
83
+
84
+ lr_warmup_scheduler:
85
+ warmup_duration: 0
86
+
87
+ val_dataloader:
88
+ total_batch_size: 256
configs/dfine/objects365/dfine_hgnetv2_n_obj365.yml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/obj365_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_n_obj365
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+
23
+ HybridEncoder:
24
+ in_channels: [512, 1024]
25
+ feat_strides: [16, 32]
26
+
27
+ # intra
28
+ hidden_dim: 128
29
+ use_encoder_idx: [1]
30
+ dim_feedforward: 512
31
+
32
+ # cross
33
+ expansion: 0.34
34
+ depth_mult: 0.5
35
+
36
+
37
+ DFINETransformer:
38
+ feat_channels: [128, 128]
39
+ feat_strides: [16, 32]
40
+ hidden_dim: 128
41
+ dim_feedforward: 512
42
+ num_levels: 2
43
+
44
+ num_layers: 3
45
+ eval_idx: -1
46
+
47
+ num_points: [6, 6]
48
+
49
+ optimizer:
50
+ type: AdamW
51
+ params:
52
+ -
53
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
54
+ lr: 0.0004
55
+ -
56
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
57
+ lr: 0.0004
58
+ weight_decay: 0.
59
+ -
60
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
61
+ weight_decay: 0.
62
+
63
+ lr: 0.0008
64
+ betas: [0.9, 0.999]
65
+ weight_decay: 0.0001
66
+
67
+
68
+
69
+ epochs: 48 # Early stop
70
+ train_dataloader:
71
+ total_batch_size: 128
72
+ dataset:
73
+ transforms:
74
+ policy:
75
+ epoch: 500
76
+ collate_fn:
77
+ stop_epoch: 500
78
+ base_size_repeat: ~
79
+
80
+ checkpoint_freq: 1
81
+ print_freq: 500
82
+
83
+ val_dataloader:
84
+ total_batch_size: 256
configs/dfine/objects365/dfine_hgnetv2_s_obj2coco.yml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/coco_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_s_obj2coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 3 # 4 5 6
24
+ eval_idx: -1 # -2 -3 -4
25
+
26
+ HybridEncoder:
27
+ in_channels: [256, 512, 1024]
28
+ hidden_dim: 256
29
+ depth_mult: 0.34
30
+ expansion: 0.5
31
+
32
+ optimizer:
33
+ type: AdamW
34
+ params:
35
+ -
36
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
37
+ lr: 0.000125
38
+ -
39
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
40
+ lr: 0.000125
41
+ weight_decay: 0.
42
+ -
43
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44
+ weight_decay: 0.
45
+
46
+ lr: 0.00025
47
+ betas: [0.9, 0.999]
48
+ weight_decay: 0.000125
49
+
50
+
51
+ epochs: 64 # Early stop
52
+ train_dataloader:
53
+ dataset:
54
+ transforms:
55
+ policy:
56
+ epoch: 56
57
+ collate_fn:
58
+ stop_epoch: 56
59
+ ema_restart_decay: 0.9999
60
+ base_size_repeat: 10
61
+
62
+ ema:
63
+ warmups: 0
64
+
65
+ lr_warmup_scheduler:
66
+ warmup_duration: 0
configs/dfine/objects365/dfine_hgnetv2_s_obj365.yml ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/obj365_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_s_obj365
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B0'
17
+ return_idx: [1, 2, 3]
18
+ freeze_at: -1
19
+ freeze_norm: False
20
+ use_lab: True
21
+
22
+ DFINETransformer:
23
+ num_layers: 3 # 4 5 6
24
+ eval_idx: -1 # -2 -3 -4
25
+
26
+ HybridEncoder:
27
+ in_channels: [256, 512, 1024]
28
+ hidden_dim: 256
29
+ depth_mult: 0.34
30
+ expansion: 0.5
31
+
32
+ optimizer:
33
+ type: AdamW
34
+ params:
35
+ -
36
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
37
+ lr: 0.000125
38
+ -
39
+ params: '^(?=.*backbone)(?=.*norm|bn).*$'
40
+ lr: 0.000125
41
+ weight_decay: 0.
42
+ -
43
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44
+ weight_decay: 0.
45
+
46
+ lr: 0.00025
47
+ betas: [0.9, 0.999]
48
+ weight_decay: 0.000125
49
+ # weight_decay: 0.00005 # Faster convergence (optional)
50
+
51
+
52
+ epochs: 36 # Early stop
53
+ train_dataloader:
54
+ dataset:
55
+ transforms:
56
+ policy:
57
+ epoch: 500
58
+ collate_fn:
59
+ stop_epoch: 500
60
+ base_size_repeat: 20
61
+
62
+ checkpoint_freq: 1
63
+ print_freq: 1000
configs/dfine/objects365/dfine_hgnetv2_x_obj2coco.yml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/coco_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_x_obj2coco
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B5'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+
22
+ HybridEncoder:
23
+ # intra
24
+ hidden_dim: 384
25
+ dim_feedforward: 2048
26
+
27
+ DFINETransformer:
28
+ feat_channels: [384, 384, 384]
29
+ reg_scale: 8
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.0000025
37
+ -
38
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
39
+ weight_decay: 0.
40
+
41
+ lr: 0.00025
42
+ betas: [0.9, 0.999]
43
+ weight_decay: 0.000125
44
+
45
+
46
+ epochs: 36 # Early stop
47
+ train_dataloader:
48
+ dataset:
49
+ transforms:
50
+ policy:
51
+ epoch: 30
52
+ collate_fn:
53
+ stop_epoch: 30
54
+ ema_restart_decay: 0.9999
55
+ base_size_repeat: 3
56
+
57
+ ema:
58
+ warmups: 0
59
+
60
+ lr_warmup_scheduler:
61
+ warmup_duration: 0
configs/dfine/objects365/dfine_hgnetv2_x_obj365.yml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __include__: [
2
+ '../../dataset/obj365_detection.yml',
3
+ '../../runtime.yml',
4
+ '../include/dataloader.yml',
5
+ '../include/optimizer.yml',
6
+ '../include/dfine_hgnetv2.yml',
7
+ ]
8
+
9
+ output_dir: ./output/dfine_hgnetv2_x_obj365
10
+
11
+
12
+ DFINE:
13
+ backbone: HGNetv2
14
+
15
+ HGNetv2:
16
+ name: 'B5'
17
+ return_idx: [1, 2, 3]
18
+ freeze_stem_only: True
19
+ freeze_at: 0
20
+ freeze_norm: True
21
+
22
+ HybridEncoder:
23
+ # intra
24
+ hidden_dim: 384
25
+ dim_feedforward: 2048
26
+
27
+ DFINETransformer:
28
+ feat_channels: [384, 384, 384]
29
+ reg_scale: 8
30
+
31
+ optimizer:
32
+ type: AdamW
33
+ params:
34
+ -
35
+ params: '^(?=.*backbone)(?!.*norm|bn).*$'
36
+ lr: 0.0000025
37
+ -
38
+ params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
39
+ weight_decay: 0.
40
+
41
+ lr: 0.00025
42
+ betas: [0.9, 0.999]
43
+ weight_decay: 0.000125
44
+ # weight_decay: 0.00005 # Faster convergence (optional)
45
+
46
+
47
+ epochs: 24 # Early stop
48
+ train_dataloader:
49
+ dataset:
50
+ transforms:
51
+ policy:
52
+ epoch: 500
53
+ collate_fn:
54
+ stop_epoch: 500
55
+ base_size_repeat: 3
56
+
57
+ checkpoint_freq: 1
58
+ print_freq: 1000
configs/obj365.yml ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Classes
2
+ names:
3
+ 0: Person
4
+ 1: Sneakers
5
+ 2: Chair
6
+ 3: Other Shoes
7
+ 4: Hat
8
+ 5: Car
9
+ 6: Lamp
10
+ 7: Glasses
11
+ 8: Bottle
12
+ 9: Desk
13
+ 10: Cup
14
+ 11: Street Lights
15
+ 12: Cabinet/shelf
16
+ 13: Handbag/Satchel
17
+ 14: Bracelet
18
+ 15: Plate
19
+ 16: Picture/Frame
20
+ 17: Helmet
21
+ 18: Book
22
+ 19: Gloves
23
+ 20: Storage box
24
+ 21: Boat
25
+ 22: Leather Shoes
26
+ 23: Flower
27
+ 24: Bench
28
+ 25: Potted Plant
29
+ 26: Bowl/Basin
30
+ 27: Flag
31
+ 28: Pillow
32
+ 29: Boots
33
+ 30: Vase
34
+ 31: Microphone
35
+ 32: Necklace
36
+ 33: Ring
37
+ 34: SUV
38
+ 35: Wine Glass
39
+ 36: Belt
40
+ 37: Monitor/TV
41
+ 38: Backpack
42
+ 39: Umbrella
43
+ 40: Traffic Light
44
+ 41: Speaker
45
+ 42: Watch
46
+ 43: Tie
47
+ 44: Trash bin Can
48
+ 45: Slippers
49
+ 46: Bicycle
50
+ 47: Stool
51
+ 48: Barrel/bucket
52
+ 49: Van
53
+ 50: Couch
54
+ 51: Sandals
55
+ 52: Basket
56
+ 53: Drum
57
+ 54: Pen/Pencil
58
+ 55: Bus
59
+ 56: Wild Bird
60
+ 57: High Heels
61
+ 58: Motorcycle
62
+ 59: Guitar
63
+ 60: Carpet
64
+ 61: Cell Phone
65
+ 62: Bread
66
+ 63: Camera
67
+ 64: Canned
68
+ 65: Truck
69
+ 66: Traffic cone
70
+ 67: Cymbal
71
+ 68: Lifesaver
72
+ 69: Towel
73
+ 70: Stuffed Toy
74
+ 71: Candle
75
+ 72: Sailboat
76
+ 73: Laptop
77
+ 74: Awning
78
+ 75: Bed
79
+ 76: Faucet
80
+ 77: Tent
81
+ 78: Horse
82
+ 79: Mirror
83
+ 80: Power outlet
84
+ 81: Sink
85
+ 82: Apple
86
+ 83: Air Conditioner
87
+ 84: Knife
88
+ 85: Hockey Stick
89
+ 86: Paddle
90
+ 87: Pickup Truck
91
+ 88: Fork
92
+ 89: Traffic Sign
93
+ 90: Balloon
94
+ 91: Tripod
95
+ 92: Dog
96
+ 93: Spoon
97
+ 94: Clock
98
+ 95: Pot
99
+ 96: Cow
100
+ 97: Cake
101
+ 98: Dinning Table
102
+ 99: Sheep
103
+ 100: Hanger
104
+ 101: Blackboard/Whiteboard
105
+ 102: Napkin
106
+ 103: Other Fish
107
+ 104: Orange/Tangerine
108
+ 105: Toiletry
109
+ 106: Keyboard
110
+ 107: Tomato
111
+ 108: Lantern
112
+ 109: Machinery Vehicle
113
+ 110: Fan
114
+ 111: Green Vegetables
115
+ 112: Banana
116
+ 113: Baseball Glove
117
+ 114: Airplane
118
+ 115: Mouse
119
+ 116: Train
120
+ 117: Pumpkin
121
+ 118: Soccer
122
+ 119: Skiboard
123
+ 120: Luggage
124
+ 121: Nightstand
125
+ 122: Tea pot
126
+ 123: Telephone
127
+ 124: Trolley
128
+ 125: Head Phone
129
+ 126: Sports Car
130
+ 127: Stop Sign
131
+ 128: Dessert
132
+ 129: Scooter
133
+ 130: Stroller
134
+ 131: Crane
135
+ 132: Remote
136
+ 133: Refrigerator
137
+ 134: Oven
138
+ 135: Lemon
139
+ 136: Duck
140
+ 137: Baseball Bat
141
+ 138: Surveillance Camera
142
+ 139: Cat
143
+ 140: Jug
144
+ 141: Broccoli
145
+ 142: Piano
146
+ 143: Pizza
147
+ 144: Elephant
148
+ 145: Skateboard
149
+ 146: Surfboard
150
+ 147: Gun
151
+ 148: Skating and Skiing shoes
152
+ 149: Gas stove
153
+ 150: Donut
154
+ 151: Bow Tie
155
+ 152: Carrot
156
+ 153: Toilet
157
+ 154: Kite
158
+ 155: Strawberry
159
+ 156: Other Balls
160
+ 157: Shovel
161
+ 158: Pepper
162
+ 159: Computer Box
163
+ 160: Toilet Paper
164
+ 161: Cleaning Products
165
+ 162: Chopsticks
166
+ 163: Microwave
167
+ 164: Pigeon
168
+ 165: Baseball
169
+ 166: Cutting/chopping Board
170
+ 167: Coffee Table
171
+ 168: Side Table
172
+ 169: Scissors
173
+ 170: Marker
174
+ 171: Pie
175
+ 172: Ladder
176
+ 173: Snowboard
177
+ 174: Cookies
178
+ 175: Radiator
179
+ 176: Fire Hydrant
180
+ 177: Basketball
181
+ 178: Zebra
182
+ 179: Grape
183
+ 180: Giraffe
184
+ 181: Potato
185
+ 182: Sausage
186
+ 183: Tricycle
187
+ 184: Violin
188
+ 185: Egg
189
+ 186: Fire Extinguisher
190
+ 187: Candy
191
+ 188: Fire Truck
192
+ 189: Billiards
193
+ 190: Converter
194
+ 191: Bathtub
195
+ 192: Wheelchair
196
+ 193: Golf Club
197
+ 194: Briefcase
198
+ 195: Cucumber
199
+ 196: Cigar/Cigarette
200
+ 197: Paint Brush
201
+ 198: Pear
202
+ 199: Heavy Truck
203
+ 200: Hamburger
204
+ 201: Extractor
205
+ 202: Extension Cord
206
+ 203: Tong
207
+ 204: Tennis Racket
208
+ 205: Folder
209
+ 206: American Football
210
+ 207: earphone
211
+ 208: Mask
212
+ 209: Kettle
213
+ 210: Tennis
214
+ 211: Ship
215
+ 212: Swing
216
+ 213: Coffee Machine
217
+ 214: Slide
218
+ 215: Carriage
219
+ 216: Onion
220
+ 217: Green beans
221
+ 218: Projector
222
+ 219: Frisbee
223
+ 220: Washing Machine/Drying Machine
224
+ 221: Chicken
225
+ 222: Printer
226
+ 223: Watermelon
227
+ 224: Saxophone
228
+ 225: Tissue
229
+ 226: Toothbrush
230
+ 227: Ice cream
231
+ 228: Hot-air balloon
232
+ 229: Cello
233
+ 230: French Fries
234
+ 231: Scale
235
+ 232: Trophy
236
+ 233: Cabbage
237
+ 234: Hot dog
238
+ 235: Blender
239
+ 236: Peach
240
+ 237: Rice
241
+ 238: Wallet/Purse
242
+ 239: Volleyball
243
+ 240: Deer
244
+ 241: Goose
245
+ 242: Tape
246
+ 243: Tablet
247
+ 244: Cosmetics
248
+ 245: Trumpet
249
+ 246: Pineapple
250
+ 247: Golf Ball
251
+ 248: Ambulance
252
+ 249: Parking meter
253
+ 250: Mango
254
+ 251: Key
255
+ 252: Hurdle
256
+ 253: Fishing Rod
257
+ 254: Medal
258
+ 255: Flute
259
+ 256: Brush
260
+ 257: Penguin
261
+ 258: Megaphone
262
+ 259: Corn
263
+ 260: Lettuce
264
+ 261: Garlic
265
+ 262: Swan
266
+ 263: Helicopter
267
+ 264: Green Onion
268
+ 265: Sandwich
269
+ 266: Nuts
270
+ 267: Speed Limit Sign
271
+ 268: Induction Cooker
272
+ 269: Broom
273
+ 270: Trombone
274
+ 271: Plum
275
+ 272: Rickshaw
276
+ 273: Goldfish
277
+ 274: Kiwi fruit
278
+ 275: Router/modem
279
+ 276: Poker Card
280
+ 277: Toaster
281
+ 278: Shrimp
282
+ 279: Sushi
283
+ 280: Cheese
284
+ 281: Notepaper
285
+ 282: Cherry
286
+ 283: Pliers
287
+ 284: CD
288
+ 285: Pasta
289
+ 286: Hammer
290
+ 287: Cue
291
+ 288: Avocado
292
+ 289: Hamimelon
293
+ 290: Flask
294
+ 291: Mushroom
295
+ 292: Screwdriver
296
+ 293: Soap
297
+ 294: Recorder
298
+ 295: Bear
299
+ 296: Eggplant
300
+ 297: Board Eraser
301
+ 298: Coconut
302
+ 299: Tape Measure/Ruler
303
+ 300: Pig
304
+ 301: Showerhead
305
+ 302: Globe
306
+ 303: Chips
307
+ 304: Steak
308
+ 305: Crosswalk Sign
309
+ 306: Stapler
310
+ 307: Camel
311
+ 308: Formula 1
312
+ 309: Pomegranate
313
+ 310: Dishwasher
314
+ 311: Crab
315
+ 312: Hoverboard
316
+ 313: Meat ball
317
+ 314: Rice Cooker
318
+ 315: Tuba
319
+ 316: Calculator
320
+ 317: Papaya
321
+ 318: Antelope
322
+ 319: Parrot
323
+ 320: Seal
324
+ 321: Butterfly
325
+ 322: Dumbbell
326
+ 323: Donkey
327
+ 324: Lion
328
+ 325: Urinal
329
+ 326: Dolphin
330
+ 327: Electric Drill
331
+ 328: Hair Dryer
332
+ 329: Egg tart
333
+ 330: Jellyfish
334
+ 331: Treadmill
335
+ 332: Lighter
336
+ 333: Grapefruit
337
+ 334: Game board
338
+ 335: Mop
339
+ 336: Radish
340
+ 337: Baozi
341
+ 338: Target
342
+ 339: French
343
+ 340: Spring Rolls
344
+ 341: Monkey
345
+ 342: Rabbit
346
+ 343: Pencil Case
347
+ 344: Yak
348
+ 345: Red Cabbage
349
+ 346: Binoculars
350
+ 347: Asparagus
351
+ 348: Barbell
352
+ 349: Scallop
353
+ 350: Noddles
354
+ 351: Comb
355
+ 352: Dumpling
356
+ 353: Oyster
357
+ 354: Table Tennis paddle
358
+ 355: Cosmetics Brush/Eyeliner Pencil
359
+ 356: Chainsaw
360
+ 357: Eraser
361
+ 358: Lobster
362
+ 359: Durian
363
+ 360: Okra
364
+ 361: Lipstick
365
+ 362: Cosmetics Mirror
366
+ 363: Curling
367
+ 364: Table Tennis
configs/runtime.yml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ print_freq: 100
2
+ output_dir: './logs'
3
+ checkpoint_freq: 12
4
+
5
+
6
+ sync_bn: True
7
+ find_unused_parameters: False
8
+
9
+
10
+ use_amp: False
11
+ scaler:
12
+ type: GradScaler
13
+ enabled: True
14
+
15
+
16
+ use_ema: False
17
+ ema:
18
+ type: ModelEMA
19
+ decay: 0.9999
20
+ warmups: 1000
21
+
22
+ use_wandb: False
23
+ project_name: D-FINE # for wandb
24
+ exp_name: baseline # wandb experiment name