diff --git a/aagenerator.py b/aagenerator.py
index a9704472559ec0caefb96814d9db0eaf126491b1..b870d5998f040cf372e45f9db00260bc283370d8 100644
--- a/aagenerator.py
+++ b/aagenerator.py
@@ -126,10 +126,14 @@ def parse_args():
################################### newly added args ###################################
parser.add_argument("--ref_path", type=str, default="/data_laion/alvin/Dataset/evaluation/debug/42361.png")
parser.add_argument("--prompt", type=str, default="A person riding skis down a snow covered slope.")
+ # parser.add_argument("--t2mn_path", type=str,
+ # default="/data_laion/alvin/sd4human/ckpts/a-ranstart-body-sdv20-v-nd-flaw-avg-copy1-glc-resume288k-512-ft1024/checkpoint-388000")
parser.add_argument("--t2mn_path", type=str,
- default="/data_laion/alvin/sd4human/ckpts/a-ranstart-body-sdv20-v-nd-flaw-avg-copy1-glc-resume288k-512-ft1024/checkpoint-388000")
+ default="./ckpts/checkpoint-388000")
+ # parser.add_argument("--controlnet_model_name_or_path", type=str,
+ # default="/data_laion/alvin/sd4human/ckpts/ctrl-sdxl10-eps-glc-composer-bmn-sum-1024/checkpoint-91000")
parser.add_argument("--controlnet_model_name_or_path", type=str,
- default="/data_laion/alvin/sd4human/ckpts/ctrl-sdxl10-eps-glc-composer-bmn-sum-1024/checkpoint-91000")
+ default=".ckpts/checkpoint-91000")
parser.add_argument('--step_num1', default=50, type=int)
parser.add_argument('--step_num2', default=50, type=int)
parser.add_argument('--size', default=2048, type=int)
@@ -717,8 +721,8 @@ class Generator:
)
self.body_inferencer = MMPoseInferencer(
- pose2d='/fsx_laion/alvin/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py',
- pose2d_weights='/fsx_laion/alvin/pretrain/ViTPose/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth',
+ pose2d='./mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py',
+ pose2d_weights='./pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth',
scope="mmpose"
# det_model='/fsx_laion/alvin/mmpose/demo/mmdetection_cfg/faster_rcnn_r50_fpn_coco.py',
# det_weights="/fsx_laion/alvin/pretrain/ViTPose/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth"
diff --git a/ckpts/checkpoint-388000/optimizer.bin b/ckpts/checkpoint-388000/optimizer.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5ed8af55f367fdf7ce99b84ccf562442505655b5
--- /dev/null
+++ b/ckpts/checkpoint-388000/optimizer.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf8f25e968c1a2a363da273d9ca7ee2d2d4d3d7b84a82a80c4764152795cfa21
+size 7440199099
diff --git a/ckpts/checkpoint-388000/random_states_0.pkl b/ckpts/checkpoint-388000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f8f572d2aaf426947ae85c7aecc4e3b655c0e2c0
--- /dev/null
+++ b/ckpts/checkpoint-388000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:054b28928bcd597ce093db874082bede1c8313c722b6ce54d8360549e68737a9
+size 21795
diff --git a/ckpts/checkpoint-388000/scheduler.bin b/ckpts/checkpoint-388000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b607da0606003b7e06064712566a964c77885af6
--- /dev/null
+++ b/ckpts/checkpoint-388000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c92acef4f084138c2708bb2f7fc53d9ad31be999ccf6cb61dc4f614bc370baf
+size 563
diff --git a/ckpts/checkpoint-388000/unet/config.json b/ckpts/checkpoint-388000/unet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..c9e9ba2ffbd24c97f56d93b7c4fc92d0a1b0befc
--- /dev/null
+++ b/ckpts/checkpoint-388000/unet/config.json
@@ -0,0 +1,76 @@
+{
+ "_class_name": "UNet2DConditionModel",
+ "_diffusers_version": "0.19.0.dev0",
+ "_name_or_path": "/fsx_laion/alvin/sd4human/a-ranstart-body-sdv20-v-nd-flaw-avg-copy1-glc-resume282k-512/checkpoint-288000",
+ "act_fn": "silu",
+ "addition_embed_type": "time",
+ "addition_embed_type_num_heads": 64,
+ "addition_time_embed_dim": 256,
+ "attention_head_dim": [
+ 5,
+ 10,
+ 20,
+ 20
+ ],
+ "block_out_channels": [
+ 320,
+ 640,
+ 1280,
+ 1280
+ ],
+ "branch_num": 2,
+ "center_input_sample": false,
+ "class_embed_type": null,
+ "class_embeddings_concat": false,
+ "conv_in_kernel": 3,
+ "conv_out_kernel": 3,
+ "copy_first_n_block": 1,
+ "copy_last_n_block": 1,
+ "cross_attention_dim": 1024,
+ "cross_attention_norm": null,
+ "down_block_types": [
+ "CrossAttnDownBlock2D",
+ "CrossAttnDownBlock2D",
+ "CrossAttnDownBlock2D",
+ "DownBlock2D"
+ ],
+ "downsample_padding": 1,
+ "dual_cross_attention": false,
+ "encoder_hid_dim": null,
+ "encoder_hid_dim_type": null,
+ "flip_sin_to_cos": true,
+ "freq_shift": 0,
+ "fusion": "avg",
+ "in_channels": 8,
+ "layers_per_block": 2,
+ "mid_block_only_cross_attention": null,
+ "mid_block_scale_factor": 1,
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
+ "norm_eps": 1e-05,
+ "norm_num_groups": 32,
+ "num_attention_heads": null,
+ "num_class_embeds": null,
+ "off_wa": true,
+ "only_cross_attention": false,
+ "out_channels": 4,
+ "projection_class_embeddings_input_dim": null,
+ "resnet_out_scale_factor": 1.0,
+ "resnet_skip_time_act": false,
+ "resnet_time_scale_shift": "default",
+ "sample_size": 64,
+ "size_cond": true,
+ "time_cond_proj_dim": null,
+ "time_embedding_act_fn": null,
+ "time_embedding_dim": null,
+ "time_embedding_type": "positional",
+ "timestep_post_act": null,
+ "transformer_layers_per_block": 1,
+ "up_block_types": [
+ "UpBlock2D",
+ "CrossAttnUpBlock2D",
+ "CrossAttnUpBlock2D",
+ "CrossAttnUpBlock2D"
+ ],
+ "upcast_attention": false,
+ "use_linear_projection": true
+}
diff --git a/ckpts/checkpoint-388000/unet/diffusion_pytorch_model.bin b/ckpts/checkpoint-388000/unet/diffusion_pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..567ea1ac7a9cf46d04bcbcef3c71b727acb7b60f
--- /dev/null
+++ b/ckpts/checkpoint-388000/unet/diffusion_pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2a87ea43737713b524ec399a3b92f97e267829757c2286aeb1145d00d1d4c3d
+size 3720102563
diff --git a/ckpts/checkpoint-388000/unet_ema/config.json b/ckpts/checkpoint-388000/unet_ema/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..61b1fddc4755377ebacfee39101ac74baeb26454
--- /dev/null
+++ b/ckpts/checkpoint-388000/unet_ema/config.json
@@ -0,0 +1,83 @@
+{
+ "_class_name": "UNet2DConditionModel",
+ "_diffusers_version": "0.19.0.dev0",
+ "_name_or_path": "/home/suraj_huggingface_co/.cache/huggingface/diffusers/models--fusing--stable-diffusion-v2/snapshots/3282d2bdc378f4afd43edbbb90803779a5249116/unet",
+ "act_fn": "silu",
+ "addition_embed_type": "time",
+ "addition_embed_type_num_heads": 64,
+ "addition_time_embed_dim": 256,
+ "attention_head_dim": [
+ 5,
+ 10,
+ 20,
+ 20
+ ],
+ "block_out_channels": [
+ 320,
+ 640,
+ 1280,
+ 1280
+ ],
+ "branch_num": 2,
+ "center_input_sample": false,
+ "class_embed_type": null,
+ "class_embeddings_concat": false,
+ "conv_in_kernel": 3,
+ "conv_out_kernel": 3,
+ "copy_first_n_block": 1,
+ "copy_last_n_block": 1,
+ "cross_attention_dim": 1024,
+ "cross_attention_norm": null,
+ "decay": 0.9999,
+ "down_block_types": [
+ "CrossAttnDownBlock2D",
+ "CrossAttnDownBlock2D",
+ "CrossAttnDownBlock2D",
+ "DownBlock2D"
+ ],
+ "downsample_padding": 1,
+ "dual_cross_attention": false,
+ "encoder_hid_dim": null,
+ "encoder_hid_dim_type": null,
+ "flip_sin_to_cos": true,
+ "freq_shift": 0,
+ "fusion": "avg",
+ "in_channels": 8,
+ "inv_gamma": 1.0,
+ "layers_per_block": 2,
+ "mid_block_only_cross_attention": null,
+ "mid_block_scale_factor": 1,
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
+ "min_decay": 0.0,
+ "norm_eps": 1e-05,
+ "norm_num_groups": 32,
+ "num_attention_heads": null,
+ "num_class_embeds": null,
+ "off_wa": true,
+ "only_cross_attention": false,
+ "optimization_step": 100000,
+ "out_channels": 4,
+ "power": 0.6666666666666666,
+ "projection_class_embeddings_input_dim": null,
+ "resnet_out_scale_factor": 1.0,
+ "resnet_skip_time_act": false,
+ "resnet_time_scale_shift": "default",
+ "sample_size": 64,
+ "size_cond": true,
+ "time_cond_proj_dim": null,
+ "time_embedding_act_fn": null,
+ "time_embedding_dim": null,
+ "time_embedding_type": "positional",
+ "timestep_post_act": null,
+ "transformer_layers_per_block": 1,
+ "up_block_types": [
+ "UpBlock2D",
+ "CrossAttnUpBlock2D",
+ "CrossAttnUpBlock2D",
+ "CrossAttnUpBlock2D"
+ ],
+ "upcast_attention": false,
+ "update_after_step": 0,
+ "use_ema_warmup": false,
+ "use_linear_projection": true
+}
diff --git a/ckpts/checkpoint-388000/unet_ema/diffusion_pytorch_model.bin b/ckpts/checkpoint-388000/unet_ema/diffusion_pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f8f92dff32a412680e43102be510a76f529d3be3
--- /dev/null
+++ b/ckpts/checkpoint-388000/unet_ema/diffusion_pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb2d5bc8f8bd2cf1487f9cdbb955ba78e3fbb91570ea137710be142d2e11f55c
+size 3720087523
diff --git a/ckpts/checkpoint-91000/controlnet/config.json b/ckpts/checkpoint-91000/controlnet/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..23b82c48615d2ec27fcdbf831e88293273a7fa5e
--- /dev/null
+++ b/ckpts/checkpoint-91000/controlnet/config.json
@@ -0,0 +1,60 @@
+{
+ "_class_name": "ControlNetModel",
+ "_diffusers_version": "0.19.0.dev0",
+ "_name_or_path": "ctrl-sdxl10-eps-glc-composer-bmn-sum-1024/checkpoint-9000",
+ "act_fn": "silu",
+ "addition_embed_type": "text_time",
+ "addition_embed_type_num_heads": 64,
+ "addition_time_embed_dim": 256,
+ "attention_head_dim": [
+ 5,
+ 10,
+ 20
+ ],
+ "block_out_channels": [
+ 320,
+ 640,
+ 1280
+ ],
+ "class_embed_type": null,
+ "cond_num": 3,
+ "conditioning_channels": 3,
+ "conditioning_embedding_out_channels": [
+ 16,
+ 32,
+ 96,
+ 256
+ ],
+ "controlnet_conditioning_channel_order": "rgb",
+ "cross_attention_dim": 2048,
+ "down_block_types": [
+ "DownBlock2D",
+ "CrossAttnDownBlock2D",
+ "CrossAttnDownBlock2D"
+ ],
+ "downsample_padding": 1,
+ "encoder_hid_dim": null,
+ "encoder_hid_dim_type": null,
+ "flip_sin_to_cos": true,
+ "freq_shift": 0,
+ "fusion": "sum",
+ "global_pool_conditions": false,
+ "in_channels": 4,
+ "layers_per_block": 2,
+ "mid_block_scale_factor": 1,
+ "norm_eps": 1e-05,
+ "norm_num_groups": 32,
+ "normalize_to_0_1": true,
+ "num_attention_heads": null,
+ "num_class_embeds": null,
+ "only_cross_attention": false,
+ "projection_class_embeddings_input_dim": 2816,
+ "resnet_time_scale_shift": "default",
+ "transformer_layers_per_block": [
+ 1,
+ 2,
+ 10
+ ],
+ "upcast_attention": null,
+ "use_linear_projection": true
+}
diff --git a/ckpts/checkpoint-91000/controlnet/diffusion_pytorch_model.bin b/ckpts/checkpoint-91000/controlnet/diffusion_pytorch_model.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4d6ad8cac3e55fec349fb9703b087fdc0fd7137f
--- /dev/null
+++ b/ckpts/checkpoint-91000/controlnet/diffusion_pytorch_model.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f823a8a0f5bf7568d10450ea2b8c3423758f024ec6fed7a267f36a58eaef5015
+size 5013143113
diff --git a/ckpts/checkpoint-91000/optimizer.bin b/ckpts/checkpoint-91000/optimizer.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3b93d998e26b956abbe79f0a7734a666033ee23
--- /dev/null
+++ b/ckpts/checkpoint-91000/optimizer.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b18e068e83f582f440b7391fd5dab22acb9e1fa400f6014130d42edab42d4888
+size 10026266209
diff --git a/ckpts/checkpoint-91000/random_states_0.pkl b/ckpts/checkpoint-91000/random_states_0.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..8dc9b08ed3d813934eb9551b28bfe2afb4948e7c
--- /dev/null
+++ b/ckpts/checkpoint-91000/random_states_0.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b5e8ac6a3e6c740570fd852f0fbcd8783d0f8a9dc14fdf3a0b2d81c5432fc14
+size 21795
diff --git a/ckpts/checkpoint-91000/scheduler.bin b/ckpts/checkpoint-91000/scheduler.bin
new file mode 100644
index 0000000000000000000000000000000000000000..00b1b2881b9090b277a66d93add63c9d06ba584e
--- /dev/null
+++ b/ckpts/checkpoint-91000/scheduler.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d37caf0a25b77ffa731e73953cef4e9361e94796681e3c7506f2a0ec3b44538
+size 563
diff --git a/mmpose/configs/_base_/datasets/300w.py b/mmpose/configs/_base_/datasets/300w.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c3728da1d1555c3526ccbfca182385961e8b667
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/300w.py
@@ -0,0 +1,134 @@
+dataset_info = dict(
+ dataset_name='300w',
+ paper_info=dict(
+ author='Sagonas, Christos and Antonakos, Epameinondas '
+ 'and Tzimiropoulos, Georgios and Zafeiriou, Stefanos '
+ 'and Pantic, Maja',
+ title='300 faces in-the-wild challenge: '
+ 'Database and results',
+ container='Image and vision computing',
+ year='2016',
+ homepage='https://ibug.doc.ic.ac.uk/resources/300-W/',
+ ),
+ keypoint_info={
+ 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-16'),
+ 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-15'),
+ 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-14'),
+ 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-13'),
+ 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-12'),
+ 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-11'),
+ 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-10'),
+ 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-9'),
+ 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap=''),
+ 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-7'),
+ 10:
+ dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-6'),
+ 11:
+ dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-5'),
+ 12:
+ dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-4'),
+ 13:
+ dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-3'),
+ 14:
+ dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-2'),
+ 15:
+ dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-1'),
+ 16:
+ dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap='kpt-0'),
+ 17:
+ dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-26'),
+ 18:
+ dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-25'),
+ 19:
+ dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-24'),
+ 20:
+ dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap='kpt-23'),
+ 21:
+ dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap='kpt-22'),
+ 22:
+ dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-21'),
+ 23:
+ dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-20'),
+ 24:
+ dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap='kpt-19'),
+ 25:
+ dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap='kpt-18'),
+ 26:
+ dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap='kpt-17'),
+ 27: dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap=''),
+ 28: dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap=''),
+ 29: dict(name='kpt-29', id=29, color=[255, 0, 0], type='', swap=''),
+ 30: dict(name='kpt-30', id=30, color=[255, 0, 0], type='', swap=''),
+ 31:
+ dict(name='kpt-31', id=31, color=[255, 0, 0], type='', swap='kpt-35'),
+ 32:
+ dict(name='kpt-32', id=32, color=[255, 0, 0], type='', swap='kpt-34'),
+ 33: dict(name='kpt-33', id=33, color=[255, 0, 0], type='', swap=''),
+ 34:
+ dict(name='kpt-34', id=34, color=[255, 0, 0], type='', swap='kpt-32'),
+ 35:
+ dict(name='kpt-35', id=35, color=[255, 0, 0], type='', swap='kpt-31'),
+ 36:
+ dict(name='kpt-36', id=36, color=[255, 0, 0], type='', swap='kpt-45'),
+ 37:
+ dict(name='kpt-37', id=37, color=[255, 0, 0], type='', swap='kpt-44'),
+ 38:
+ dict(name='kpt-38', id=38, color=[255, 0, 0], type='', swap='kpt-43'),
+ 39:
+ dict(name='kpt-39', id=39, color=[255, 0, 0], type='', swap='kpt-42'),
+ 40:
+ dict(name='kpt-40', id=40, color=[255, 0, 0], type='', swap='kpt-47'),
+ 41: dict(
+ name='kpt-41', id=41, color=[255, 0, 0], type='', swap='kpt-46'),
+ 42: dict(
+ name='kpt-42', id=42, color=[255, 0, 0], type='', swap='kpt-39'),
+ 43: dict(
+ name='kpt-43', id=43, color=[255, 0, 0], type='', swap='kpt-38'),
+ 44: dict(
+ name='kpt-44', id=44, color=[255, 0, 0], type='', swap='kpt-37'),
+ 45: dict(
+ name='kpt-45', id=45, color=[255, 0, 0], type='', swap='kpt-36'),
+ 46: dict(
+ name='kpt-46', id=46, color=[255, 0, 0], type='', swap='kpt-41'),
+ 47: dict(
+ name='kpt-47', id=47, color=[255, 0, 0], type='', swap='kpt-40'),
+ 48: dict(
+ name='kpt-48', id=48, color=[255, 0, 0], type='', swap='kpt-54'),
+ 49: dict(
+ name='kpt-49', id=49, color=[255, 0, 0], type='', swap='kpt-53'),
+ 50: dict(
+ name='kpt-50', id=50, color=[255, 0, 0], type='', swap='kpt-52'),
+ 51: dict(name='kpt-51', id=51, color=[255, 0, 0], type='', swap=''),
+ 52: dict(
+ name='kpt-52', id=52, color=[255, 0, 0], type='', swap='kpt-50'),
+ 53: dict(
+ name='kpt-53', id=53, color=[255, 0, 0], type='', swap='kpt-49'),
+ 54: dict(
+ name='kpt-54', id=54, color=[255, 0, 0], type='', swap='kpt-48'),
+ 55: dict(
+ name='kpt-55', id=55, color=[255, 0, 0], type='', swap='kpt-59'),
+ 56: dict(
+ name='kpt-56', id=56, color=[255, 0, 0], type='', swap='kpt-58'),
+ 57: dict(name='kpt-57', id=57, color=[255, 0, 0], type='', swap=''),
+ 58: dict(
+ name='kpt-58', id=58, color=[255, 0, 0], type='', swap='kpt-56'),
+ 59: dict(
+ name='kpt-59', id=59, color=[255, 0, 0], type='', swap='kpt-55'),
+ 60: dict(
+ name='kpt-60', id=60, color=[255, 0, 0], type='', swap='kpt-64'),
+ 61: dict(
+ name='kpt-61', id=61, color=[255, 0, 0], type='', swap='kpt-63'),
+ 62: dict(name='kpt-62', id=62, color=[255, 0, 0], type='', swap=''),
+ 63: dict(
+ name='kpt-63', id=63, color=[255, 0, 0], type='', swap='kpt-61'),
+ 64: dict(
+ name='kpt-64', id=64, color=[255, 0, 0], type='', swap='kpt-60'),
+ 65: dict(
+ name='kpt-65', id=65, color=[255, 0, 0], type='', swap='kpt-67'),
+ 66: dict(name='kpt-66', id=66, color=[255, 0, 0], type='', swap=''),
+ 67: dict(
+ name='kpt-67', id=67, color=[255, 0, 0], type='', swap='kpt-65'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 68,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/aflw.py b/mmpose/configs/_base_/datasets/aflw.py
new file mode 100644
index 0000000000000000000000000000000000000000..cf5e10964da700415f3613ca43a0755f5015d8f0
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/aflw.py
@@ -0,0 +1,44 @@
+dataset_info = dict(
+ dataset_name='aflw',
+ paper_info=dict(
+ author='Koestinger, Martin and Wohlhart, Paul and '
+ 'Roth, Peter M and Bischof, Horst',
+ title='Annotated facial landmarks in the wild: '
+ 'A large-scale, real-world database for facial '
+ 'landmark localization',
+ container='2011 IEEE international conference on computer '
+ 'vision workshops (ICCV workshops)',
+ year='2011',
+ homepage='https://www.tugraz.at/institute/icg/research/'
+ 'team-bischof/lrs/downloads/aflw/',
+ ),
+ keypoint_info={
+ 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-5'),
+ 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-4'),
+ 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-3'),
+ 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-2'),
+ 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-1'),
+ 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-0'),
+ 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-11'),
+ 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-10'),
+ 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-9'),
+ 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-8'),
+ 10:
+ dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-7'),
+ 11:
+ dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-6'),
+ 12:
+ dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-14'),
+ 13: dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap=''),
+ 14:
+ dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-12'),
+ 15:
+ dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-17'),
+ 16: dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap=''),
+ 17:
+ dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-15'),
+ 18: dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 19,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/aic.py b/mmpose/configs/_base_/datasets/aic.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ecdbe3f0afeb19dbb7aed42653ce5efd85cfda3
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/aic.py
@@ -0,0 +1,140 @@
+dataset_info = dict(
+ dataset_name='aic',
+ paper_info=dict(
+ author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
+ 'Li, Yixin and Yan, Baoming and Liang, Rui and '
+ 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
+ 'Fu, Yanwei and others',
+ title='Ai challenger: A large-scale dataset for going '
+ 'deeper in image understanding',
+ container='arXiv',
+ year='2017',
+ homepage='https://github.com/AIChallenger/AI_Challenger_2017',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_shoulder',
+ id=0,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 1:
+ dict(
+ name='right_elbow',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 2:
+ dict(
+ name='right_wrist',
+ id=2,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 3:
+ dict(
+ name='left_shoulder',
+ id=3,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 4:
+ dict(
+ name='left_elbow',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 5:
+ dict(
+ name='left_wrist',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 6:
+ dict(
+ name='right_hip',
+ id=6,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 7:
+ dict(
+ name='right_knee',
+ id=7,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 8:
+ dict(
+ name='right_ankle',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 9:
+ dict(
+ name='left_hip',
+ id=9,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 10:
+ dict(
+ name='left_knee',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 11:
+ dict(
+ name='left_ankle',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 12:
+ dict(
+ name='head_top',
+ id=12,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 13:
+ dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_wrist', 'right_elbow'), id=0, color=[255, 128, 0]),
+ 1: dict(
+ link=('right_elbow', 'right_shoulder'), id=1, color=[255, 128, 0]),
+ 2: dict(link=('right_shoulder', 'neck'), id=2, color=[51, 153, 255]),
+ 3: dict(link=('neck', 'left_shoulder'), id=3, color=[51, 153, 255]),
+ 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6: dict(link=('right_ankle', 'right_knee'), id=6, color=[255, 128, 0]),
+ 7: dict(link=('right_knee', 'right_hip'), id=7, color=[255, 128, 0]),
+ 8: dict(link=('right_hip', 'left_hip'), id=8, color=[51, 153, 255]),
+ 9: dict(link=('left_hip', 'left_knee'), id=9, color=[0, 255, 0]),
+ 10: dict(link=('left_knee', 'left_ankle'), id=10, color=[0, 255, 0]),
+ 11: dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
+ 12: dict(
+ link=('right_shoulder', 'right_hip'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('left_shoulder', 'left_hip'), id=13, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.
+ ],
+
+ # 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/'
+ # 'Evaluation/keypoint_eval/keypoint_eval.py#L50'
+ # delta = 2 x sigma
+ sigmas=[
+ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144,
+ 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081,
+ 0.01291456, 0.01236173
+ ])
diff --git a/mmpose/configs/_base_/datasets/animalpose.py b/mmpose/configs/_base_/datasets/animalpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5bb62d951b71da25e679bd755fe566216dc3f6f
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/animalpose.py
@@ -0,0 +1,166 @@
+dataset_info = dict(
+ dataset_name='animalpose',
+ paper_info=dict(
+ author='Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and '
+ 'Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing',
+ title='Cross-Domain Adaptation for Animal Pose Estimation',
+ container='The IEEE International Conference on '
+ 'Computer Vision (ICCV)',
+ year='2019',
+ homepage='https://sites.google.com/view/animal-pose/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
+ 1:
+ dict(
+ name='R_Eye',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Eye'),
+ 2:
+ dict(
+ name='L_EarBase',
+ id=2,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_EarBase'),
+ 3:
+ dict(
+ name='R_EarBase',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_EarBase'),
+ 4:
+ dict(name='Nose', id=4, color=[51, 153, 255], type='upper', swap=''),
+ 5:
+ dict(name='Throat', id=5, color=[51, 153, 255], type='upper', swap=''),
+ 6:
+ dict(
+ name='TailBase', id=6, color=[51, 153, 255], type='lower',
+ swap=''),
+ 7:
+ dict(
+ name='Withers', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(
+ name='L_F_Elbow',
+ id=8,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Elbow'),
+ 9:
+ dict(
+ name='R_F_Elbow',
+ id=9,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_F_Elbow'),
+ 10:
+ dict(
+ name='L_B_Elbow',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Elbow'),
+ 11:
+ dict(
+ name='R_B_Elbow',
+ id=11,
+ color=[255, 128, 0],
+ type='lower',
+ swap='L_B_Elbow'),
+ 12:
+ dict(
+ name='L_F_Knee',
+ id=12,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Knee'),
+ 13:
+ dict(
+ name='R_F_Knee',
+ id=13,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_F_Knee'),
+ 14:
+ dict(
+ name='L_B_Knee',
+ id=14,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Knee'),
+ 15:
+ dict(
+ name='R_B_Knee',
+ id=15,
+ color=[255, 128, 0],
+ type='lower',
+ swap='L_B_Knee'),
+ 16:
+ dict(
+ name='L_F_Paw',
+ id=16,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Paw'),
+ 17:
+ dict(
+ name='R_F_Paw',
+ id=17,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_F_Paw'),
+ 18:
+ dict(
+ name='L_B_Paw',
+ id=18,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Paw'),
+ 19:
+ dict(
+ name='R_B_Paw',
+ id=19,
+ color=[255, 128, 0],
+ type='lower',
+ swap='L_B_Paw')
+ },
+ skeleton_info={
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[51, 153, 255]),
+ 1: dict(link=('L_Eye', 'L_EarBase'), id=1, color=[0, 255, 0]),
+ 2: dict(link=('R_Eye', 'R_EarBase'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('L_Eye', 'Nose'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('R_Eye', 'Nose'), id=4, color=[255, 128, 0]),
+ 5: dict(link=('Nose', 'Throat'), id=5, color=[51, 153, 255]),
+ 6: dict(link=('Throat', 'Withers'), id=6, color=[51, 153, 255]),
+ 7: dict(link=('TailBase', 'Withers'), id=7, color=[51, 153, 255]),
+ 8: dict(link=('Throat', 'L_F_Elbow'), id=8, color=[0, 255, 0]),
+ 9: dict(link=('L_F_Elbow', 'L_F_Knee'), id=9, color=[0, 255, 0]),
+ 10: dict(link=('L_F_Knee', 'L_F_Paw'), id=10, color=[0, 255, 0]),
+ 11: dict(link=('Throat', 'R_F_Elbow'), id=11, color=[255, 128, 0]),
+ 12: dict(link=('R_F_Elbow', 'R_F_Knee'), id=12, color=[255, 128, 0]),
+ 13: dict(link=('R_F_Knee', 'R_F_Paw'), id=13, color=[255, 128, 0]),
+ 14: dict(link=('TailBase', 'L_B_Elbow'), id=14, color=[0, 255, 0]),
+ 15: dict(link=('L_B_Elbow', 'L_B_Knee'), id=15, color=[0, 255, 0]),
+ 16: dict(link=('L_B_Knee', 'L_B_Paw'), id=16, color=[0, 255, 0]),
+ 17: dict(link=('TailBase', 'R_B_Elbow'), id=17, color=[255, 128, 0]),
+ 18: dict(link=('R_B_Elbow', 'R_B_Knee'), id=18, color=[255, 128, 0]),
+ 19: dict(link=('R_B_Knee', 'R_B_Paw'), id=19, color=[255, 128, 0])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2,
+ 1.5, 1.5, 1.5, 1.5
+ ],
+
+ # Note: The original paper did not provide enough information about
+ # the sigmas. We modified from 'https://github.com/cocodataset/'
+ # 'cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L523'
+ sigmas=[
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.10, 0.10, 0.10, 0.107, 0.107,
+ 0.107, 0.107, 0.087, 0.087, 0.087, 0.087, 0.089, 0.089, 0.089, 0.089
+ ])
diff --git a/mmpose/configs/_base_/datasets/ap10k.py b/mmpose/configs/_base_/datasets/ap10k.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0df579acbb8cf0de1ef62412ba865ee8710f0aa
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/ap10k.py
@@ -0,0 +1,142 @@
+dataset_info = dict(
+ dataset_name='ap10k',
+ paper_info=dict(
+ author='Yu, Hang and Xu, Yufei and Zhang, Jing and '
+ 'Zhao, Wei and Guan, Ziyu and Tao, Dacheng',
+ title='AP-10K: A Benchmark for Animal Pose Estimation in the Wild',
+ container='35th Conference on Neural Information Processing Systems '
+ '(NeurIPS 2021) Track on Datasets and Bench-marks.',
+ year='2021',
+ homepage='https://github.com/AlexTheBad/AP-10K',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
+ 1:
+ dict(
+ name='R_Eye',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Eye'),
+ 2:
+ dict(name='Nose', id=2, color=[51, 153, 255], type='upper', swap=''),
+ 3:
+ dict(name='Neck', id=3, color=[51, 153, 255], type='upper', swap=''),
+ 4:
+ dict(
+ name='Root of tail',
+ id=4,
+ color=[51, 153, 255],
+ type='lower',
+ swap=''),
+ 5:
+ dict(
+ name='L_Shoulder',
+ id=5,
+ color=[51, 153, 255],
+ type='upper',
+ swap='R_Shoulder'),
+ 6:
+ dict(
+ name='L_Elbow',
+ id=6,
+ color=[51, 153, 255],
+ type='upper',
+ swap='R_Elbow'),
+ 7:
+ dict(
+ name='L_F_Paw',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='R_F_Paw'),
+ 8:
+ dict(
+ name='R_Shoulder',
+ id=8,
+ color=[0, 255, 0],
+ type='upper',
+ swap='L_Shoulder'),
+ 9:
+ dict(
+ name='R_Elbow',
+ id=9,
+ color=[255, 128, 0],
+ type='upper',
+ swap='L_Elbow'),
+ 10:
+ dict(
+ name='R_F_Paw',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_F_Paw'),
+ 11:
+ dict(
+ name='L_Hip',
+ id=11,
+ color=[255, 128, 0],
+ type='lower',
+ swap='R_Hip'),
+ 12:
+ dict(
+ name='L_Knee',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='R_Knee'),
+ 13:
+ dict(
+ name='L_B_Paw',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='R_B_Paw'),
+ 14:
+ dict(
+ name='R_Hip', id=14, color=[0, 255, 0], type='lower',
+ swap='L_Hip'),
+ 15:
+ dict(
+ name='R_Knee',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_Knee'),
+ 16:
+ dict(
+ name='R_B_Paw',
+ id=16,
+ color=[0, 255, 0],
+ type='lower',
+ swap='L_B_Paw'),
+ },
+ skeleton_info={
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[0, 0, 255]),
+ 1: dict(link=('L_Eye', 'Nose'), id=1, color=[0, 0, 255]),
+ 2: dict(link=('R_Eye', 'Nose'), id=2, color=[0, 0, 255]),
+ 3: dict(link=('Nose', 'Neck'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('Neck', 'Root of tail'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('Neck', 'L_Shoulder'), id=5, color=[0, 255, 255]),
+ 6: dict(link=('L_Shoulder', 'L_Elbow'), id=6, color=[0, 255, 255]),
+ 7: dict(link=('L_Elbow', 'L_F_Paw'), id=6, color=[0, 255, 255]),
+ 8: dict(link=('Neck', 'R_Shoulder'), id=7, color=[6, 156, 250]),
+ 9: dict(link=('R_Shoulder', 'R_Elbow'), id=8, color=[6, 156, 250]),
+ 10: dict(link=('R_Elbow', 'R_F_Paw'), id=9, color=[6, 156, 250]),
+ 11: dict(link=('Root of tail', 'L_Hip'), id=10, color=[0, 255, 255]),
+ 12: dict(link=('L_Hip', 'L_Knee'), id=11, color=[0, 255, 255]),
+ 13: dict(link=('L_Knee', 'L_B_Paw'), id=12, color=[0, 255, 255]),
+ 14: dict(link=('Root of tail', 'R_Hip'), id=13, color=[6, 156, 250]),
+ 15: dict(link=('R_Hip', 'R_Knee'), id=14, color=[6, 156, 250]),
+ 16: dict(link=('R_Knee', 'R_B_Paw'), id=15, color=[6, 156, 250]),
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.079, 0.072, 0.062, 0.079, 0.072,
+ 0.062, 0.107, 0.087, 0.089, 0.107, 0.087, 0.089
+ ])
diff --git a/mmpose/configs/_base_/datasets/atrw.py b/mmpose/configs/_base_/datasets/atrw.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ec71c8c508a0340139371a651ca2dd56eeae3cf
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/atrw.py
@@ -0,0 +1,144 @@
+dataset_info = dict(
+ dataset_name='atrw',
+ paper_info=dict(
+ author='Li, Shuyuan and Li, Jianguo and Tang, Hanlin '
+ 'and Qian, Rui and Lin, Weiyao',
+ title='ATRW: A Benchmark for Amur Tiger '
+ 'Re-identification in the Wild',
+ container='Proceedings of the 28th ACM '
+ 'International Conference on Multimedia',
+ year='2020',
+ homepage='https://cvwc2019.github.io/challenge.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left_ear',
+ id=0,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 1:
+ dict(
+ name='right_ear',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 2:
+ dict(name='nose', id=2, color=[51, 153, 255], type='upper', swap=''),
+ 3:
+ dict(
+ name='right_shoulder',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 4:
+ dict(
+ name='right_front_paw',
+ id=4,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_front_paw'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='left_front_paw',
+ id=6,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_front_paw'),
+ 7:
+ dict(
+ name='right_hip',
+ id=7,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 8:
+ dict(
+ name='right_knee',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 9:
+ dict(
+ name='right_back_paw',
+ id=9,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_back_paw'),
+ 10:
+ dict(
+ name='left_hip',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 11:
+ dict(
+ name='left_knee',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 12:
+ dict(
+ name='left_back_paw',
+ id=12,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_back_paw'),
+ 13:
+ dict(name='tail', id=13, color=[51, 153, 255], type='lower', swap=''),
+ 14:
+ dict(
+ name='center', id=14, color=[51, 153, 255], type='lower', swap=''),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ear', 'nose'), id=0, color=[51, 153, 255]),
+ 1:
+ dict(link=('right_ear', 'nose'), id=1, color=[51, 153, 255]),
+ 2:
+ dict(link=('nose', 'center'), id=2, color=[51, 153, 255]),
+ 3:
+ dict(
+ link=('left_shoulder', 'left_front_paw'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('left_shoulder', 'center'), id=4, color=[0, 255, 0]),
+ 5:
+ dict(
+ link=('right_shoulder', 'right_front_paw'),
+ id=5,
+ color=[255, 128, 0]),
+ 6:
+ dict(link=('right_shoulder', 'center'), id=6, color=[255, 128, 0]),
+ 7:
+ dict(link=('tail', 'center'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('right_back_paw', 'right_knee'), id=8, color=[255, 128, 0]),
+ 9:
+ dict(link=('right_knee', 'right_hip'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('right_hip', 'tail'), id=10, color=[255, 128, 0]),
+ 11:
+ dict(link=('left_back_paw', 'left_knee'), id=11, color=[0, 255, 0]),
+ 12:
+ dict(link=('left_knee', 'left_hip'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('left_hip', 'tail'), id=13, color=[0, 255, 0]),
+ },
+ joint_weights=[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
+ sigmas=[
+ 0.0277, 0.0823, 0.0831, 0.0202, 0.0716, 0.0263, 0.0646, 0.0302, 0.0440,
+ 0.0316, 0.0333, 0.0547, 0.0263, 0.0683, 0.0539
+ ])
diff --git a/mmpose/configs/_base_/datasets/campus.py b/mmpose/configs/_base_/datasets/campus.py
new file mode 100644
index 0000000000000000000000000000000000000000..334316e9c25282508767158d3fae30578ab3949d
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/campus.py
@@ -0,0 +1,151 @@
+dataset_info = dict(
+ dataset_name='campus',
+ paper_info=dict(
+ author='Belagiannis, Vasileios and Amin, Sikandar and Andriluka, '
+ 'Mykhaylo and Schiele, Bernt and Navab, Nassir and Ilic, Slobodan',
+ title='3D Pictorial Structures for Multiple Human Pose Estimation',
+ container='IEEE Computer Society Conference on Computer Vision and '
+ 'Pattern Recognition (CVPR)',
+ year='2014',
+ homepage='http://campar.in.tum.de/Chair/MultiHumanPose',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_ankle',
+ id=0,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 1:
+ dict(
+ name='right_knee',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 2:
+ dict(
+ name='right_hip',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 3:
+ dict(
+ name='left_hip',
+ id=3,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 4:
+ dict(
+ name='left_knee',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 5:
+ dict(
+ name='left_ankle',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 6:
+ dict(
+ name='right_wrist',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 7:
+ dict(
+ name='right_elbow',
+ id=7,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 8:
+ dict(
+ name='right_shoulder',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 9:
+ dict(
+ name='left_shoulder',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 10:
+ dict(
+ name='left_elbow',
+ id=10,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 11:
+ dict(
+ name='left_wrist',
+ id=11,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 12:
+ dict(
+ name='bottom_head',
+ id=12,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 13:
+ dict(
+ name='top_head',
+ id=13,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('left_hip', 'left_knee'), id=2, color=[0, 255, 0]),
+ 3:
+ dict(link=('left_knee', 'left_ankle'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('right_hip', 'left_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('right_wrist', 'right_elbow'), id=5, color=[255, 128, 0]),
+ 6:
+ dict(
+ link=('right_elbow', 'right_shoulder'), id=6, color=[255, 128, 0]),
+ 7:
+ dict(link=('left_shoulder', 'left_elbow'), id=7, color=[0, 255, 0]),
+ 8:
+ dict(link=('left_elbow', 'left_wrist'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(link=('right_hip', 'right_shoulder'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_hip', 'left_shoulder'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(
+ link=('right_shoulder', 'bottom_head'), id=11, color=[255, 128,
+ 0]),
+ 12:
+ dict(link=('left_shoulder', 'bottom_head'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('bottom_head', 'top_head'), id=13, color=[51, 153, 255]),
+ },
+ joint_weights=[
+ 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.0, 1.0
+ ],
+ sigmas=[
+ 0.089, 0.087, 0.107, 0.107, 0.087, 0.089, 0.062, 0.072, 0.079, 0.079,
+ 0.072, 0.062, 0.026, 0.026
+ ])
diff --git a/mmpose/configs/_base_/datasets/coco.py b/mmpose/configs/_base_/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..865a95bc02fedd318f32d2e7aa8397147d78fdb5
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/coco.py
@@ -0,0 +1,181 @@
+dataset_info = dict(
+ dataset_name='coco',
+ paper_info=dict(
+ author='Lin, Tsung-Yi and Maire, Michael and '
+ 'Belongie, Serge and Hays, James and '
+ 'Perona, Pietro and Ramanan, Deva and '
+ r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+ title='Microsoft coco: Common objects in context',
+ container='European conference on computer vision',
+ year='2014',
+ homepage='http://cocodataset.org/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/mmpose/configs/_base_/datasets/coco_aic.py b/mmpose/configs/_base_/datasets/coco_aic.py
new file mode 100644
index 0000000000000000000000000000000000000000..a084247468dac1b766cbcf756b750aa3d3680b9d
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/coco_aic.py
@@ -0,0 +1,205 @@
+dataset_info = dict(
+ dataset_name='coco',
+ paper_info=[
+ dict(
+ author='Lin, Tsung-Yi and Maire, Michael and '
+ 'Belongie, Serge and Hays, James and '
+ 'Perona, Pietro and Ramanan, Deva and '
+ r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
+ title='Microsoft coco: Common objects in context',
+ container='European conference on computer vision',
+ year='2014',
+ homepage='http://cocodataset.org/',
+ ),
+ dict(
+ author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
+ 'Li, Yixin and Yan, Baoming and Liang, Rui and '
+ 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
+ 'Fu, Yanwei and others',
+ title='Ai challenger: A large-scale dataset for going '
+ 'deeper in image understanding',
+ container='arXiv',
+ year='2017',
+ homepage='https://github.com/AIChallenger/AI_Challenger_2017',
+ ),
+ ],
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 17:
+ dict(
+ name='head_top',
+ id=17,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 18:
+ dict(name='neck', id=18, color=[51, 153, 255], type='upper', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]),
+ 19:
+ dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5, 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.026, 0.026
+ ])
diff --git a/mmpose/configs/_base_/datasets/coco_openpose.py b/mmpose/configs/_base_/datasets/coco_openpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..9aedd9f0e42e41d92893b139eeab59c41f38d814
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/coco_openpose.py
@@ -0,0 +1,157 @@
+dataset_info = dict(
+ dataset_name='coco_openpose',
+ paper_info=dict(
+ author='Zhe, Cao and Tomas, Simon and '
+ 'Shih-En, Wei and Yaser, Sheikh',
+ title='OpenPose: Realtime Multi-Person 2D Pose '
+ 'Estimation using Part Affinity Fields',
+ container='IEEE Transactions on Pattern Analysis '
+ 'and Machine Intelligence',
+ year='2019',
+ homepage='https://github.com/CMU-Perceptual-Computing-Lab/openpose/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[255, 0, 85], type='upper', swap=''),
+ 1:
+ dict(name='neck', id=1, color=[255, 0, 0], type='upper', swap=''),
+ 2:
+ dict(
+ name='right_shoulder',
+ id=2,
+ color=[255, 85, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 3:
+ dict(
+ name='right_elbow',
+ id=3,
+ color=[255, 170, 0],
+ type='upper',
+ swap='left_elbow'),
+ 4:
+ dict(
+ name='right_wrist',
+ id=4,
+ color=[255, 255, 0],
+ type='upper',
+ swap='left_wrist'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[170, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='left_elbow',
+ id=6,
+ color=[85, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 7:
+ dict(
+ name='left_wrist',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 8:
+ dict(
+ name='right_hip',
+ id=8,
+ color=[255, 0, 170],
+ type='lower',
+ swap='left_hip'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[255, 0, 255],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='right_ankle',
+ id=10,
+ color=[170, 0, 255],
+ type='lower',
+ swap='left_ankle'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[85, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='left_knee',
+ id=12,
+ color=[0, 0, 255],
+ type='lower',
+ swap='right_knee'),
+ 13:
+ dict(
+ name='left_ankle',
+ id=13,
+ color=[0, 85, 255],
+ type='lower',
+ swap='right_ankle'),
+ 14:
+ dict(
+ name='right_eye',
+ id=14,
+ color=[0, 255, 170],
+ type='upper',
+ swap='left_eye'),
+ 15:
+ dict(
+ name='left_eye',
+ id=15,
+ color=[0, 255, 255],
+ type='upper',
+ swap='right_eye'),
+ 16:
+ dict(
+ name='right_ear',
+ id=16,
+ color=[0, 170, 255],
+ type='upper',
+ swap='left_ear'),
+ 17:
+ dict(
+ name='left_ear',
+ id=17,
+ color=[0, 170, 255],
+ type='upper',
+ swap='right_ear'),
+ },
+ skeleton_info={
+ 0: dict(link=('neck', 'right_shoulder'), id=0, color=[255, 0, 85]),
+ 1: dict(link=('neck', 'left_shoulder'), id=1, color=[255, 0, 0]),
+ 2:
+ dict(link=('right_shoulder', 'right_elbow'), id=2, color=[255, 85, 0]),
+ 3:
+ dict(link=('right_elbow', 'right_wrist'), id=3, color=[255, 170, 0]),
+ 4:
+ dict(link=('left_shoulder', 'left_elbow'), id=4, color=[255, 255, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[170, 255, 0]),
+ 6: dict(link=('neck', 'right_hip'), id=6, color=[85, 255, 0]),
+ 7: dict(link=('right_hip', 'right_knee'), id=7, color=[0, 255, 0]),
+ 8: dict(link=('right_knee', 'right_ankle'), id=8, color=[0, 255, 85]),
+ 9: dict(link=('neck', 'left_hip'), id=9, color=[0, 255, 170]),
+ 10: dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 225]),
+ 11: dict(link=('left_knee', 'left_ankle'), id=11, color=[0, 170, 255]),
+ 12: dict(link=('neck', 'nose'), id=12, color=[0, 85, 255]),
+ 13: dict(link=('nose', 'right_eye'), id=13, color=[0, 0, 255]),
+ 14: dict(link=('right_eye', 'right_ear'), id=14, color=[255, 0, 170]),
+ 15: dict(link=('nose', 'left_eye'), id=15, color=[170, 0, 255]),
+ 16: dict(link=('left_eye', 'left_ear'), id=16, color=[255, 0, 255]),
+ },
+ joint_weights=[1.] * 18,
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.082
+ ])
diff --git a/mmpose/configs/_base_/datasets/coco_wholebody.py b/mmpose/configs/_base_/datasets/coco_wholebody.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef9b707017a24a1a133bb28566d212c618fee694
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/coco_wholebody.py
@@ -0,0 +1,1154 @@
+dataset_info = dict(
+ dataset_name='coco_wholebody',
+ paper_info=dict(
+ author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
+ 'Wang, Can and Liu, Wentao and '
+ 'Qian, Chen and Ouyang, Wanli and Luo, Ping',
+ title='Whole-Body Human Pose Estimation in the Wild',
+ container='Proceedings of the European '
+ 'Conference on Computer Vision (ECCV)',
+ year='2020',
+ homepage='https://github.com/jin-s13/COCO-WholeBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 17:
+ dict(
+ name='left_big_toe',
+ id=17,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_big_toe'),
+ 18:
+ dict(
+ name='left_small_toe',
+ id=18,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_small_toe'),
+ 19:
+ dict(
+ name='left_heel',
+ id=19,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_heel'),
+ 20:
+ dict(
+ name='right_big_toe',
+ id=20,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_big_toe'),
+ 21:
+ dict(
+ name='right_small_toe',
+ id=21,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_small_toe'),
+ 22:
+ dict(
+ name='right_heel',
+ id=22,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_heel'),
+ 23:
+ dict(
+ name='face-0',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='face-16'),
+ 24:
+ dict(
+ name='face-1',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='face-15'),
+ 25:
+ dict(
+ name='face-2',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='face-14'),
+ 26:
+ dict(
+ name='face-3',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='face-13'),
+ 27:
+ dict(
+ name='face-4',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='face-12'),
+ 28:
+ dict(
+ name='face-5',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='face-11'),
+ 29:
+ dict(
+ name='face-6',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='face-10'),
+ 30:
+ dict(
+ name='face-7',
+ id=30,
+ color=[255, 255, 255],
+ type='',
+ swap='face-9'),
+ 31:
+ dict(name='face-8', id=31, color=[255, 255, 255], type='', swap=''),
+ 32:
+ dict(
+ name='face-9',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='face-7'),
+ 33:
+ dict(
+ name='face-10',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='face-6'),
+ 34:
+ dict(
+ name='face-11',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='face-5'),
+ 35:
+ dict(
+ name='face-12',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='face-4'),
+ 36:
+ dict(
+ name='face-13',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='face-3'),
+ 37:
+ dict(
+ name='face-14',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='face-2'),
+ 38:
+ dict(
+ name='face-15',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='face-1'),
+ 39:
+ dict(
+ name='face-16',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='face-0'),
+ 40:
+ dict(
+ name='face-17',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='face-26'),
+ 41:
+ dict(
+ name='face-18',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='face-25'),
+ 42:
+ dict(
+ name='face-19',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='face-24'),
+ 43:
+ dict(
+ name='face-20',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='face-23'),
+ 44:
+ dict(
+ name='face-21',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='face-22'),
+ 45:
+ dict(
+ name='face-22',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='face-21'),
+ 46:
+ dict(
+ name='face-23',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='face-20'),
+ 47:
+ dict(
+ name='face-24',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='face-19'),
+ 48:
+ dict(
+ name='face-25',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='face-18'),
+ 49:
+ dict(
+ name='face-26',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='face-17'),
+ 50:
+ dict(name='face-27', id=50, color=[255, 255, 255], type='', swap=''),
+ 51:
+ dict(name='face-28', id=51, color=[255, 255, 255], type='', swap=''),
+ 52:
+ dict(name='face-29', id=52, color=[255, 255, 255], type='', swap=''),
+ 53:
+ dict(name='face-30', id=53, color=[255, 255, 255], type='', swap=''),
+ 54:
+ dict(
+ name='face-31',
+ id=54,
+ color=[255, 255, 255],
+ type='',
+ swap='face-35'),
+ 55:
+ dict(
+ name='face-32',
+ id=55,
+ color=[255, 255, 255],
+ type='',
+ swap='face-34'),
+ 56:
+ dict(name='face-33', id=56, color=[255, 255, 255], type='', swap=''),
+ 57:
+ dict(
+ name='face-34',
+ id=57,
+ color=[255, 255, 255],
+ type='',
+ swap='face-32'),
+ 58:
+ dict(
+ name='face-35',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='face-31'),
+ 59:
+ dict(
+ name='face-36',
+ id=59,
+ color=[255, 255, 255],
+ type='',
+ swap='face-45'),
+ 60:
+ dict(
+ name='face-37',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='face-44'),
+ 61:
+ dict(
+ name='face-38',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='face-43'),
+ 62:
+ dict(
+ name='face-39',
+ id=62,
+ color=[255, 255, 255],
+ type='',
+ swap='face-42'),
+ 63:
+ dict(
+ name='face-40',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='face-47'),
+ 64:
+ dict(
+ name='face-41',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='face-46'),
+ 65:
+ dict(
+ name='face-42',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='face-39'),
+ 66:
+ dict(
+ name='face-43',
+ id=66,
+ color=[255, 255, 255],
+ type='',
+ swap='face-38'),
+ 67:
+ dict(
+ name='face-44',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='face-37'),
+ 68:
+ dict(
+ name='face-45',
+ id=68,
+ color=[255, 255, 255],
+ type='',
+ swap='face-36'),
+ 69:
+ dict(
+ name='face-46',
+ id=69,
+ color=[255, 255, 255],
+ type='',
+ swap='face-41'),
+ 70:
+ dict(
+ name='face-47',
+ id=70,
+ color=[255, 255, 255],
+ type='',
+ swap='face-40'),
+ 71:
+ dict(
+ name='face-48',
+ id=71,
+ color=[255, 255, 255],
+ type='',
+ swap='face-54'),
+ 72:
+ dict(
+ name='face-49',
+ id=72,
+ color=[255, 255, 255],
+ type='',
+ swap='face-53'),
+ 73:
+ dict(
+ name='face-50',
+ id=73,
+ color=[255, 255, 255],
+ type='',
+ swap='face-52'),
+ 74:
+ dict(name='face-51', id=74, color=[255, 255, 255], type='', swap=''),
+ 75:
+ dict(
+ name='face-52',
+ id=75,
+ color=[255, 255, 255],
+ type='',
+ swap='face-50'),
+ 76:
+ dict(
+ name='face-53',
+ id=76,
+ color=[255, 255, 255],
+ type='',
+ swap='face-49'),
+ 77:
+ dict(
+ name='face-54',
+ id=77,
+ color=[255, 255, 255],
+ type='',
+ swap='face-48'),
+ 78:
+ dict(
+ name='face-55',
+ id=78,
+ color=[255, 255, 255],
+ type='',
+ swap='face-59'),
+ 79:
+ dict(
+ name='face-56',
+ id=79,
+ color=[255, 255, 255],
+ type='',
+ swap='face-58'),
+ 80:
+ dict(name='face-57', id=80, color=[255, 255, 255], type='', swap=''),
+ 81:
+ dict(
+ name='face-58',
+ id=81,
+ color=[255, 255, 255],
+ type='',
+ swap='face-56'),
+ 82:
+ dict(
+ name='face-59',
+ id=82,
+ color=[255, 255, 255],
+ type='',
+ swap='face-55'),
+ 83:
+ dict(
+ name='face-60',
+ id=83,
+ color=[255, 255, 255],
+ type='',
+ swap='face-64'),
+ 84:
+ dict(
+ name='face-61',
+ id=84,
+ color=[255, 255, 255],
+ type='',
+ swap='face-63'),
+ 85:
+ dict(name='face-62', id=85, color=[255, 255, 255], type='', swap=''),
+ 86:
+ dict(
+ name='face-63',
+ id=86,
+ color=[255, 255, 255],
+ type='',
+ swap='face-61'),
+ 87:
+ dict(
+ name='face-64',
+ id=87,
+ color=[255, 255, 255],
+ type='',
+ swap='face-60'),
+ 88:
+ dict(
+ name='face-65',
+ id=88,
+ color=[255, 255, 255],
+ type='',
+ swap='face-67'),
+ 89:
+ dict(name='face-66', id=89, color=[255, 255, 255], type='', swap=''),
+ 90:
+ dict(
+ name='face-67',
+ id=90,
+ color=[255, 255, 255],
+ type='',
+ swap='face-65'),
+ 91:
+ dict(
+ name='left_hand_root',
+ id=91,
+ color=[255, 255, 255],
+ type='',
+ swap='right_hand_root'),
+ 92:
+ dict(
+ name='left_thumb1',
+ id=92,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb1'),
+ 93:
+ dict(
+ name='left_thumb2',
+ id=93,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb2'),
+ 94:
+ dict(
+ name='left_thumb3',
+ id=94,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb3'),
+ 95:
+ dict(
+ name='left_thumb4',
+ id=95,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb4'),
+ 96:
+ dict(
+ name='left_forefinger1',
+ id=96,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger1'),
+ 97:
+ dict(
+ name='left_forefinger2',
+ id=97,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger2'),
+ 98:
+ dict(
+ name='left_forefinger3',
+ id=98,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger3'),
+ 99:
+ dict(
+ name='left_forefinger4',
+ id=99,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger4'),
+ 100:
+ dict(
+ name='left_middle_finger1',
+ id=100,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger1'),
+ 101:
+ dict(
+ name='left_middle_finger2',
+ id=101,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger2'),
+ 102:
+ dict(
+ name='left_middle_finger3',
+ id=102,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger3'),
+ 103:
+ dict(
+ name='left_middle_finger4',
+ id=103,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger4'),
+ 104:
+ dict(
+ name='left_ring_finger1',
+ id=104,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger1'),
+ 105:
+ dict(
+ name='left_ring_finger2',
+ id=105,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger2'),
+ 106:
+ dict(
+ name='left_ring_finger3',
+ id=106,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger3'),
+ 107:
+ dict(
+ name='left_ring_finger4',
+ id=107,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger4'),
+ 108:
+ dict(
+ name='left_pinky_finger1',
+ id=108,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger1'),
+ 109:
+ dict(
+ name='left_pinky_finger2',
+ id=109,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger2'),
+ 110:
+ dict(
+ name='left_pinky_finger3',
+ id=110,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger3'),
+ 111:
+ dict(
+ name='left_pinky_finger4',
+ id=111,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger4'),
+ 112:
+ dict(
+ name='right_hand_root',
+ id=112,
+ color=[255, 255, 255],
+ type='',
+ swap='left_hand_root'),
+ 113:
+ dict(
+ name='right_thumb1',
+ id=113,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb1'),
+ 114:
+ dict(
+ name='right_thumb2',
+ id=114,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb2'),
+ 115:
+ dict(
+ name='right_thumb3',
+ id=115,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb3'),
+ 116:
+ dict(
+ name='right_thumb4',
+ id=116,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb4'),
+ 117:
+ dict(
+ name='right_forefinger1',
+ id=117,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger1'),
+ 118:
+ dict(
+ name='right_forefinger2',
+ id=118,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger2'),
+ 119:
+ dict(
+ name='right_forefinger3',
+ id=119,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger3'),
+ 120:
+ dict(
+ name='right_forefinger4',
+ id=120,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger4'),
+ 121:
+ dict(
+ name='right_middle_finger1',
+ id=121,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger1'),
+ 122:
+ dict(
+ name='right_middle_finger2',
+ id=122,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger2'),
+ 123:
+ dict(
+ name='right_middle_finger3',
+ id=123,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger3'),
+ 124:
+ dict(
+ name='right_middle_finger4',
+ id=124,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger4'),
+ 125:
+ dict(
+ name='right_ring_finger1',
+ id=125,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger1'),
+ 126:
+ dict(
+ name='right_ring_finger2',
+ id=126,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger2'),
+ 127:
+ dict(
+ name='right_ring_finger3',
+ id=127,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger3'),
+ 128:
+ dict(
+ name='right_ring_finger4',
+ id=128,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger4'),
+ 129:
+ dict(
+ name='right_pinky_finger1',
+ id=129,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger1'),
+ 130:
+ dict(
+ name='right_pinky_finger2',
+ id=130,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger2'),
+ 131:
+ dict(
+ name='right_pinky_finger3',
+ id=131,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger3'),
+ 132:
+ dict(
+ name='right_pinky_finger4',
+ id=132,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger4')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255]),
+ 19:
+ dict(link=('left_ankle', 'left_big_toe'), id=19, color=[0, 255, 0]),
+ 20:
+ dict(link=('left_ankle', 'left_small_toe'), id=20, color=[0, 255, 0]),
+ 21:
+ dict(link=('left_ankle', 'left_heel'), id=21, color=[0, 255, 0]),
+ 22:
+ dict(
+ link=('right_ankle', 'right_big_toe'), id=22, color=[255, 128, 0]),
+ 23:
+ dict(
+ link=('right_ankle', 'right_small_toe'),
+ id=23,
+ color=[255, 128, 0]),
+ 24:
+ dict(link=('right_ankle', 'right_heel'), id=24, color=[255, 128, 0]),
+ 25:
+ dict(
+ link=('left_hand_root', 'left_thumb1'), id=25, color=[255, 128,
+ 0]),
+ 26:
+ dict(link=('left_thumb1', 'left_thumb2'), id=26, color=[255, 128, 0]),
+ 27:
+ dict(link=('left_thumb2', 'left_thumb3'), id=27, color=[255, 128, 0]),
+ 28:
+ dict(link=('left_thumb3', 'left_thumb4'), id=28, color=[255, 128, 0]),
+ 29:
+ dict(
+ link=('left_hand_root', 'left_forefinger1'),
+ id=29,
+ color=[255, 153, 255]),
+ 30:
+ dict(
+ link=('left_forefinger1', 'left_forefinger2'),
+ id=30,
+ color=[255, 153, 255]),
+ 31:
+ dict(
+ link=('left_forefinger2', 'left_forefinger3'),
+ id=31,
+ color=[255, 153, 255]),
+ 32:
+ dict(
+ link=('left_forefinger3', 'left_forefinger4'),
+ id=32,
+ color=[255, 153, 255]),
+ 33:
+ dict(
+ link=('left_hand_root', 'left_middle_finger1'),
+ id=33,
+ color=[102, 178, 255]),
+ 34:
+ dict(
+ link=('left_middle_finger1', 'left_middle_finger2'),
+ id=34,
+ color=[102, 178, 255]),
+ 35:
+ dict(
+ link=('left_middle_finger2', 'left_middle_finger3'),
+ id=35,
+ color=[102, 178, 255]),
+ 36:
+ dict(
+ link=('left_middle_finger3', 'left_middle_finger4'),
+ id=36,
+ color=[102, 178, 255]),
+ 37:
+ dict(
+ link=('left_hand_root', 'left_ring_finger1'),
+ id=37,
+ color=[255, 51, 51]),
+ 38:
+ dict(
+ link=('left_ring_finger1', 'left_ring_finger2'),
+ id=38,
+ color=[255, 51, 51]),
+ 39:
+ dict(
+ link=('left_ring_finger2', 'left_ring_finger3'),
+ id=39,
+ color=[255, 51, 51]),
+ 40:
+ dict(
+ link=('left_ring_finger3', 'left_ring_finger4'),
+ id=40,
+ color=[255, 51, 51]),
+ 41:
+ dict(
+ link=('left_hand_root', 'left_pinky_finger1'),
+ id=41,
+ color=[0, 255, 0]),
+ 42:
+ dict(
+ link=('left_pinky_finger1', 'left_pinky_finger2'),
+ id=42,
+ color=[0, 255, 0]),
+ 43:
+ dict(
+ link=('left_pinky_finger2', 'left_pinky_finger3'),
+ id=43,
+ color=[0, 255, 0]),
+ 44:
+ dict(
+ link=('left_pinky_finger3', 'left_pinky_finger4'),
+ id=44,
+ color=[0, 255, 0]),
+ 45:
+ dict(
+ link=('right_hand_root', 'right_thumb1'),
+ id=45,
+ color=[255, 128, 0]),
+ 46:
+ dict(
+ link=('right_thumb1', 'right_thumb2'), id=46, color=[255, 128, 0]),
+ 47:
+ dict(
+ link=('right_thumb2', 'right_thumb3'), id=47, color=[255, 128, 0]),
+ 48:
+ dict(
+ link=('right_thumb3', 'right_thumb4'), id=48, color=[255, 128, 0]),
+ 49:
+ dict(
+ link=('right_hand_root', 'right_forefinger1'),
+ id=49,
+ color=[255, 153, 255]),
+ 50:
+ dict(
+ link=('right_forefinger1', 'right_forefinger2'),
+ id=50,
+ color=[255, 153, 255]),
+ 51:
+ dict(
+ link=('right_forefinger2', 'right_forefinger3'),
+ id=51,
+ color=[255, 153, 255]),
+ 52:
+ dict(
+ link=('right_forefinger3', 'right_forefinger4'),
+ id=52,
+ color=[255, 153, 255]),
+ 53:
+ dict(
+ link=('right_hand_root', 'right_middle_finger1'),
+ id=53,
+ color=[102, 178, 255]),
+ 54:
+ dict(
+ link=('right_middle_finger1', 'right_middle_finger2'),
+ id=54,
+ color=[102, 178, 255]),
+ 55:
+ dict(
+ link=('right_middle_finger2', 'right_middle_finger3'),
+ id=55,
+ color=[102, 178, 255]),
+ 56:
+ dict(
+ link=('right_middle_finger3', 'right_middle_finger4'),
+ id=56,
+ color=[102, 178, 255]),
+ 57:
+ dict(
+ link=('right_hand_root', 'right_ring_finger1'),
+ id=57,
+ color=[255, 51, 51]),
+ 58:
+ dict(
+ link=('right_ring_finger1', 'right_ring_finger2'),
+ id=58,
+ color=[255, 51, 51]),
+ 59:
+ dict(
+ link=('right_ring_finger2', 'right_ring_finger3'),
+ id=59,
+ color=[255, 51, 51]),
+ 60:
+ dict(
+ link=('right_ring_finger3', 'right_ring_finger4'),
+ id=60,
+ color=[255, 51, 51]),
+ 61:
+ dict(
+ link=('right_hand_root', 'right_pinky_finger1'),
+ id=61,
+ color=[0, 255, 0]),
+ 62:
+ dict(
+ link=('right_pinky_finger1', 'right_pinky_finger2'),
+ id=62,
+ color=[0, 255, 0]),
+ 63:
+ dict(
+ link=('right_pinky_finger2', 'right_pinky_finger3'),
+ id=63,
+ color=[0, 255, 0]),
+ 64:
+ dict(
+ link=('right_pinky_finger3', 'right_pinky_finger4'),
+ id=64,
+ color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 133,
+ # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/'
+ # 'evaluation/myeval_wholebody.py#L175'
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.068, 0.066, 0.066,
+ 0.092, 0.094, 0.094, 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031,
+ 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045,
+ 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015,
+ 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017,
+ 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010,
+ 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009,
+ 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01,
+ 0.008, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035,
+ 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019,
+ 0.022, 0.031, 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024,
+ 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02,
+ 0.019, 0.022, 0.031
+ ])
diff --git a/mmpose/configs/_base_/datasets/coco_wholebody_face.py b/mmpose/configs/_base_/datasets/coco_wholebody_face.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3fe1e5b336d8ddd668d47123f5c0ceeff580914
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/coco_wholebody_face.py
@@ -0,0 +1,154 @@
+dataset_info = dict(
+ dataset_name='coco_wholebody_face',
+ paper_info=dict(
+ author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
+ 'Wang, Can and Liu, Wentao and '
+ 'Qian, Chen and Ouyang, Wanli and Luo, Ping',
+ title='Whole-Body Human Pose Estimation in the Wild',
+ container='Proceedings of the European '
+ 'Conference on Computer Vision (ECCV)',
+ year='2020',
+ homepage='https://github.com/jin-s13/COCO-WholeBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='face-0', id=0, color=[255, 0, 0], type='', swap='face-16'),
+ 1:
+ dict(name='face-1', id=1, color=[255, 0, 0], type='', swap='face-15'),
+ 2:
+ dict(name='face-2', id=2, color=[255, 0, 0], type='', swap='face-14'),
+ 3:
+ dict(name='face-3', id=3, color=[255, 0, 0], type='', swap='face-13'),
+ 4:
+ dict(name='face-4', id=4, color=[255, 0, 0], type='', swap='face-12'),
+ 5:
+ dict(name='face-5', id=5, color=[255, 0, 0], type='', swap='face-11'),
+ 6:
+ dict(name='face-6', id=6, color=[255, 0, 0], type='', swap='face-10'),
+ 7:
+ dict(name='face-7', id=7, color=[255, 0, 0], type='', swap='face-9'),
+ 8: dict(name='face-8', id=8, color=[255, 0, 0], type='', swap=''),
+ 9:
+ dict(name='face-9', id=9, color=[255, 0, 0], type='', swap='face-7'),
+ 10:
+ dict(name='face-10', id=10, color=[255, 0, 0], type='', swap='face-6'),
+ 11:
+ dict(name='face-11', id=11, color=[255, 0, 0], type='', swap='face-5'),
+ 12:
+ dict(name='face-12', id=12, color=[255, 0, 0], type='', swap='face-4'),
+ 13:
+ dict(name='face-13', id=13, color=[255, 0, 0], type='', swap='face-3'),
+ 14:
+ dict(name='face-14', id=14, color=[255, 0, 0], type='', swap='face-2'),
+ 15:
+ dict(name='face-15', id=15, color=[255, 0, 0], type='', swap='face-1'),
+ 16:
+ dict(name='face-16', id=16, color=[255, 0, 0], type='', swap='face-0'),
+ 17: dict(
+ name='face-17', id=17, color=[255, 0, 0], type='', swap='face-26'),
+ 18: dict(
+ name='face-18', id=18, color=[255, 0, 0], type='', swap='face-25'),
+ 19: dict(
+ name='face-19', id=19, color=[255, 0, 0], type='', swap='face-24'),
+ 20: dict(
+ name='face-20', id=20, color=[255, 0, 0], type='', swap='face-23'),
+ 21: dict(
+ name='face-21', id=21, color=[255, 0, 0], type='', swap='face-22'),
+ 22: dict(
+ name='face-22', id=22, color=[255, 0, 0], type='', swap='face-21'),
+ 23: dict(
+ name='face-23', id=23, color=[255, 0, 0], type='', swap='face-20'),
+ 24: dict(
+ name='face-24', id=24, color=[255, 0, 0], type='', swap='face-19'),
+ 25: dict(
+ name='face-25', id=25, color=[255, 0, 0], type='', swap='face-18'),
+ 26: dict(
+ name='face-26', id=26, color=[255, 0, 0], type='', swap='face-17'),
+ 27: dict(name='face-27', id=27, color=[255, 0, 0], type='', swap=''),
+ 28: dict(name='face-28', id=28, color=[255, 0, 0], type='', swap=''),
+ 29: dict(name='face-29', id=29, color=[255, 0, 0], type='', swap=''),
+ 30: dict(name='face-30', id=30, color=[255, 0, 0], type='', swap=''),
+ 31: dict(
+ name='face-31', id=31, color=[255, 0, 0], type='', swap='face-35'),
+ 32: dict(
+ name='face-32', id=32, color=[255, 0, 0], type='', swap='face-34'),
+ 33: dict(name='face-33', id=33, color=[255, 0, 0], type='', swap=''),
+ 34: dict(
+ name='face-34', id=34, color=[255, 0, 0], type='', swap='face-32'),
+ 35: dict(
+ name='face-35', id=35, color=[255, 0, 0], type='', swap='face-31'),
+ 36: dict(
+ name='face-36', id=36, color=[255, 0, 0], type='', swap='face-45'),
+ 37: dict(
+ name='face-37', id=37, color=[255, 0, 0], type='', swap='face-44'),
+ 38: dict(
+ name='face-38', id=38, color=[255, 0, 0], type='', swap='face-43'),
+ 39: dict(
+ name='face-39', id=39, color=[255, 0, 0], type='', swap='face-42'),
+ 40: dict(
+ name='face-40', id=40, color=[255, 0, 0], type='', swap='face-47'),
+ 41: dict(
+ name='face-41', id=41, color=[255, 0, 0], type='', swap='face-46'),
+ 42: dict(
+ name='face-42', id=42, color=[255, 0, 0], type='', swap='face-39'),
+ 43: dict(
+ name='face-43', id=43, color=[255, 0, 0], type='', swap='face-38'),
+ 44: dict(
+ name='face-44', id=44, color=[255, 0, 0], type='', swap='face-37'),
+ 45: dict(
+ name='face-45', id=45, color=[255, 0, 0], type='', swap='face-36'),
+ 46: dict(
+ name='face-46', id=46, color=[255, 0, 0], type='', swap='face-41'),
+ 47: dict(
+ name='face-47', id=47, color=[255, 0, 0], type='', swap='face-40'),
+ 48: dict(
+ name='face-48', id=48, color=[255, 0, 0], type='', swap='face-54'),
+ 49: dict(
+ name='face-49', id=49, color=[255, 0, 0], type='', swap='face-53'),
+ 50: dict(
+ name='face-50', id=50, color=[255, 0, 0], type='', swap='face-52'),
+ 51: dict(name='face-51', id=52, color=[255, 0, 0], type='', swap=''),
+ 52: dict(
+ name='face-52', id=52, color=[255, 0, 0], type='', swap='face-50'),
+ 53: dict(
+ name='face-53', id=53, color=[255, 0, 0], type='', swap='face-49'),
+ 54: dict(
+ name='face-54', id=54, color=[255, 0, 0], type='', swap='face-48'),
+ 55: dict(
+ name='face-55', id=55, color=[255, 0, 0], type='', swap='face-59'),
+ 56: dict(
+ name='face-56', id=56, color=[255, 0, 0], type='', swap='face-58'),
+ 57: dict(name='face-57', id=57, color=[255, 0, 0], type='', swap=''),
+ 58: dict(
+ name='face-58', id=58, color=[255, 0, 0], type='', swap='face-56'),
+ 59: dict(
+ name='face-59', id=59, color=[255, 0, 0], type='', swap='face-55'),
+ 60: dict(
+ name='face-60', id=60, color=[255, 0, 0], type='', swap='face-64'),
+ 61: dict(
+ name='face-61', id=61, color=[255, 0, 0], type='', swap='face-63'),
+ 62: dict(name='face-62', id=62, color=[255, 0, 0], type='', swap=''),
+ 63: dict(
+ name='face-63', id=63, color=[255, 0, 0], type='', swap='face-61'),
+ 64: dict(
+ name='face-64', id=64, color=[255, 0, 0], type='', swap='face-60'),
+ 65: dict(
+ name='face-65', id=65, color=[255, 0, 0], type='', swap='face-67'),
+ 66: dict(name='face-66', id=66, color=[255, 0, 0], type='', swap=''),
+ 67: dict(
+ name='face-67', id=67, color=[255, 0, 0], type='', swap='face-65')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 68,
+
+ # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/'
+ # 'evaluation/myeval_wholebody.py#L177'
+ sigmas=[
+ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023,
+ 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011,
+ 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007,
+ 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011,
+ 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008,
+ 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007,
+ 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008
+ ])
diff --git a/mmpose/configs/_base_/datasets/coco_wholebody_hand.py b/mmpose/configs/_base_/datasets/coco_wholebody_hand.py
new file mode 100644
index 0000000000000000000000000000000000000000..1910b2ced5a8b31cd6f83911e41cae9f1a580222
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/coco_wholebody_hand.py
@@ -0,0 +1,147 @@
+dataset_info = dict(
+ dataset_name='coco_wholebody_hand',
+ paper_info=dict(
+ author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
+ 'Wang, Can and Liu, Wentao and '
+ 'Qian, Chen and Ouyang, Wanli and Luo, Ping',
+ title='Whole-Body Human Pose Estimation in the Wild',
+ container='Proceedings of the European '
+ 'Conference on Computer Vision (ECCV)',
+ year='2020',
+ homepage='https://github.com/jin-s13/COCO-WholeBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[
+ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018,
+ 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022,
+ 0.031
+ ])
diff --git a/mmpose/configs/_base_/datasets/cofw.py b/mmpose/configs/_base_/datasets/cofw.py
new file mode 100644
index 0000000000000000000000000000000000000000..d528bf2f2f7e63adbff3ed56e18bca8b02165e42
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/cofw.py
@@ -0,0 +1,57 @@
+dataset_info = dict(
+ dataset_name='cofw',
+ paper_info=dict(
+ author='Burgos-Artizzu, Xavier P and Perona, '
+ r'Pietro and Doll{\'a}r, Piotr',
+ title='Robust face landmark estimation under occlusion',
+ container='Proceedings of the IEEE international '
+ 'conference on computer vision',
+ year='2013',
+ homepage='http://www.vision.caltech.edu/xpburgos/ICCV13/',
+ ),
+ keypoint_info={
+ 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-1'),
+ 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-0'),
+ 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-3'),
+ 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-2'),
+ 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-6'),
+ 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-7'),
+ 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-4'),
+ 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-5'),
+ 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-9'),
+ 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-8'),
+ 10:
+ dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-11'),
+ 11:
+ dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-10'),
+ 12:
+ dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-14'),
+ 13:
+ dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-15'),
+ 14:
+ dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-12'),
+ 15:
+ dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-13'),
+ 16:
+ dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap='kpt-17'),
+ 17:
+ dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-16'),
+ 18:
+ dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-19'),
+ 19:
+ dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-18'),
+ 20: dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap=''),
+ 21: dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap=''),
+ 22:
+ dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-23'),
+ 23:
+ dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-22'),
+ 24: dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap=''),
+ 25: dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap=''),
+ 26: dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap=''),
+ 27: dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap=''),
+ 28: dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap='')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 29,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/crowdpose.py b/mmpose/configs/_base_/datasets/crowdpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..45086531a601870716eed15a32c5413c0e24b7ae
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/crowdpose.py
@@ -0,0 +1,147 @@
+dataset_info = dict(
+ dataset_name='crowdpose',
+ paper_info=dict(
+ author='Li, Jiefeng and Wang, Can and Zhu, Hao and '
+ 'Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu',
+ title='CrowdPose: Efficient Crowded Scenes Pose Estimation '
+ 'and A New Benchmark',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2019',
+ homepage='https://github.com/Jeff-sjtu/CrowdPose',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left_shoulder',
+ id=0,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_shoulder'),
+ 1:
+ dict(
+ name='right_shoulder',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_shoulder'),
+ 2:
+ dict(
+ name='left_elbow',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_elbow'),
+ 3:
+ dict(
+ name='right_elbow',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_elbow'),
+ 4:
+ dict(
+ name='left_wrist',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_wrist'),
+ 5:
+ dict(
+ name='right_wrist',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='left_wrist'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='right_hip',
+ id=7,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_hip'),
+ 8:
+ dict(
+ name='left_knee',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_knee'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='left_ankle',
+ id=10,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_ankle'),
+ 11:
+ dict(
+ name='right_ankle',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_ankle'),
+ 12:
+ dict(
+ name='top_head', id=12, color=[255, 128, 0], type='upper',
+ swap=''),
+ 13:
+ dict(name='neck', id=13, color=[0, 255, 0], type='upper', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('top_head', 'neck'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('right_shoulder', 'neck'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('left_shoulder', 'neck'), id=14, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 0.2, 0.2, 0.2, 1.3, 1.5, 0.2, 1.3, 1.5, 0.2, 0.2, 0.5, 0.2, 0.2, 0.5
+ ],
+ sigmas=[
+ 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087,
+ 0.089, 0.089, 0.079, 0.079
+ ])
diff --git a/mmpose/configs/_base_/datasets/deepfashion2.py b/mmpose/configs/_base_/datasets/deepfashion2.py
new file mode 100644
index 0000000000000000000000000000000000000000..f65d1bb591fab8f06a79b5d595478a282acd8b3e
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/deepfashion2.py
@@ -0,0 +1,2660 @@
+colors = dict(
+ sss=[255, 128, 0], # short_sleeve_shirt
+ lss=[255, 0, 128], # long_sleeved_shirt
+ sso=[128, 0, 255], # short_sleeved_outwear
+ lso=[0, 128, 255], # long_sleeved_outwear
+ vest=[0, 128, 128], # vest
+ sling=[0, 0, 128], # sling
+ shorts=[128, 128, 128], # shorts
+ trousers=[128, 0, 128], # trousers
+ skirt=[64, 128, 128], # skirt
+ ssd=[64, 64, 128], # short_sleeved_dress
+ lsd=[128, 64, 0], # long_sleeved_dress
+ vd=[128, 64, 255], # vest_dress
+ sd=[128, 64, 0], # sling_dress
+)
+dataset_info = dict(
+ dataset_name='deepfashion2',
+ paper_info=dict(
+ author='Yuying Ge and Ruimao Zhang and Lingyun Wu '
+ 'and Xiaogang Wang and Xiaoou Tang and Ping Luo',
+ title='DeepFashion2: A Versatile Benchmark for '
+ 'Detection, Pose Estimation, Segmentation and '
+ 'Re-Identification of Clothing Images',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2019',
+ homepage='https://github.com/switchablenorms/DeepFashion2',
+ ),
+ keypoint_info={
+ # short_sleeved_shirt
+ 0:
+ dict(name='sss_kpt1', id=0, color=colors['sss'], type='', swap=''),
+ 1:
+ dict(
+ name='sss_kpt2',
+ id=1,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt6'),
+ 2:
+ dict(
+ name='sss_kpt3',
+ id=2,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt5'),
+ 3:
+ dict(name='sss_kpt4', id=3, color=colors['sss'], type='', swap=''),
+ 4:
+ dict(
+ name='sss_kpt5',
+ id=4,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt3'),
+ 5:
+ dict(
+ name='sss_kpt6',
+ id=5,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt2'),
+ 6:
+ dict(
+ name='sss_kpt7',
+ id=6,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt25'),
+ 7:
+ dict(
+ name='sss_kpt8',
+ id=7,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt24'),
+ 8:
+ dict(
+ name='sss_kpt9',
+ id=8,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt23'),
+ 9:
+ dict(
+ name='sss_kpt10',
+ id=9,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt22'),
+ 10:
+ dict(
+ name='sss_kpt11',
+ id=10,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt21'),
+ 11:
+ dict(
+ name='sss_kpt12',
+ id=11,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt20'),
+ 12:
+ dict(
+ name='sss_kpt13',
+ id=12,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt19'),
+ 13:
+ dict(
+ name='sss_kpt14',
+ id=13,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt18'),
+ 14:
+ dict(
+ name='sss_kpt15',
+ id=14,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt17'),
+ 15:
+ dict(name='sss_kpt16', id=15, color=colors['sss'], type='', swap=''),
+ 16:
+ dict(
+ name='sss_kpt17',
+ id=16,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt15'),
+ 17:
+ dict(
+ name='sss_kpt18',
+ id=17,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt14'),
+ 18:
+ dict(
+ name='sss_kpt19',
+ id=18,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt13'),
+ 19:
+ dict(
+ name='sss_kpt20',
+ id=19,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt12'),
+ 20:
+ dict(
+ name='sss_kpt21',
+ id=20,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt11'),
+ 21:
+ dict(
+ name='sss_kpt22',
+ id=21,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt10'),
+ 22:
+ dict(
+ name='sss_kpt23',
+ id=22,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt9'),
+ 23:
+ dict(
+ name='sss_kpt24',
+ id=23,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt8'),
+ 24:
+ dict(
+ name='sss_kpt25',
+ id=24,
+ color=colors['sss'],
+ type='',
+ swap='sss_kpt7'),
+ # long_sleeved_shirt
+ 25:
+ dict(name='lss_kpt1', id=25, color=colors['lss'], type='', swap=''),
+ 26:
+ dict(
+ name='lss_kpt2',
+ id=26,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt6'),
+ 27:
+ dict(
+ name='lss_kpt3',
+ id=27,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt5'),
+ 28:
+ dict(name='lss_kpt4', id=28, color=colors['lss'], type='', swap=''),
+ 29:
+ dict(
+ name='lss_kpt5',
+ id=29,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt3'),
+ 30:
+ dict(
+ name='lss_kpt6',
+ id=30,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt2'),
+ 31:
+ dict(
+ name='lss_kpt7',
+ id=31,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt33'),
+ 32:
+ dict(
+ name='lss_kpt8',
+ id=32,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt32'),
+ 33:
+ dict(
+ name='lss_kpt9',
+ id=33,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt31'),
+ 34:
+ dict(
+ name='lss_kpt10',
+ id=34,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt30'),
+ 35:
+ dict(
+ name='lss_kpt11',
+ id=35,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt29'),
+ 36:
+ dict(
+ name='lss_kpt12',
+ id=36,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt28'),
+ 37:
+ dict(
+ name='lss_kpt13',
+ id=37,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt27'),
+ 38:
+ dict(
+ name='lss_kpt14',
+ id=38,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt26'),
+ 39:
+ dict(
+ name='lss_kpt15',
+ id=39,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt25'),
+ 40:
+ dict(
+ name='lss_kpt16',
+ id=40,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt24'),
+ 41:
+ dict(
+ name='lss_kpt17',
+ id=41,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt23'),
+ 42:
+ dict(
+ name='lss_kpt18',
+ id=42,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt22'),
+ 43:
+ dict(
+ name='lss_kpt19',
+ id=43,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt21'),
+ 44:
+ dict(name='lss_kpt20', id=44, color=colors['lss'], type='', swap=''),
+ 45:
+ dict(
+ name='lss_kpt21',
+ id=45,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt19'),
+ 46:
+ dict(
+ name='lss_kpt22',
+ id=46,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt18'),
+ 47:
+ dict(
+ name='lss_kpt23',
+ id=47,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt17'),
+ 48:
+ dict(
+ name='lss_kpt24',
+ id=48,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt16'),
+ 49:
+ dict(
+ name='lss_kpt25',
+ id=49,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt15'),
+ 50:
+ dict(
+ name='lss_kpt26',
+ id=50,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt14'),
+ 51:
+ dict(
+ name='lss_kpt27',
+ id=51,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt13'),
+ 52:
+ dict(
+ name='lss_kpt28',
+ id=52,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt12'),
+ 53:
+ dict(
+ name='lss_kpt29',
+ id=53,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt11'),
+ 54:
+ dict(
+ name='lss_kpt30',
+ id=54,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt10'),
+ 55:
+ dict(
+ name='lss_kpt31',
+ id=55,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt9'),
+ 56:
+ dict(
+ name='lss_kpt32',
+ id=56,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt8'),
+ 57:
+ dict(
+ name='lss_kpt33',
+ id=57,
+ color=colors['lss'],
+ type='',
+ swap='lss_kpt7'),
+ # short_sleeved_outwear
+ 58:
+ dict(name='sso_kpt1', id=58, color=colors['sso'], type='', swap=''),
+ 59:
+ dict(
+ name='sso_kpt2',
+ id=59,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt26'),
+ 60:
+ dict(
+ name='sso_kpt3',
+ id=60,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt5'),
+ 61:
+ dict(
+ name='sso_kpt4',
+ id=61,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt6'),
+ 62:
+ dict(
+ name='sso_kpt5',
+ id=62,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt3'),
+ 63:
+ dict(
+ name='sso_kpt6',
+ id=63,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt4'),
+ 64:
+ dict(
+ name='sso_kpt7',
+ id=64,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt25'),
+ 65:
+ dict(
+ name='sso_kpt8',
+ id=65,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt24'),
+ 66:
+ dict(
+ name='sso_kpt9',
+ id=66,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt23'),
+ 67:
+ dict(
+ name='sso_kpt10',
+ id=67,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt22'),
+ 68:
+ dict(
+ name='sso_kpt11',
+ id=68,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt21'),
+ 69:
+ dict(
+ name='sso_kpt12',
+ id=69,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt20'),
+ 70:
+ dict(
+ name='sso_kpt13',
+ id=70,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt19'),
+ 71:
+ dict(
+ name='sso_kpt14',
+ id=71,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt18'),
+ 72:
+ dict(
+ name='sso_kpt15',
+ id=72,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt17'),
+ 73:
+ dict(
+ name='sso_kpt16',
+ id=73,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt29'),
+ 74:
+ dict(
+ name='sso_kpt17',
+ id=74,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt15'),
+ 75:
+ dict(
+ name='sso_kpt18',
+ id=75,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt14'),
+ 76:
+ dict(
+ name='sso_kpt19',
+ id=76,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt13'),
+ 77:
+ dict(
+ name='sso_kpt20',
+ id=77,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt12'),
+ 78:
+ dict(
+ name='sso_kpt21',
+ id=78,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt11'),
+ 79:
+ dict(
+ name='sso_kpt22',
+ id=79,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt10'),
+ 80:
+ dict(
+ name='sso_kpt23',
+ id=80,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt9'),
+ 81:
+ dict(
+ name='sso_kpt24',
+ id=81,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt8'),
+ 82:
+ dict(
+ name='sso_kpt25',
+ id=82,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt7'),
+ 83:
+ dict(
+ name='sso_kpt26',
+ id=83,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt2'),
+ 84:
+ dict(
+ name='sso_kpt27',
+ id=84,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt30'),
+ 85:
+ dict(
+ name='sso_kpt28',
+ id=85,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt31'),
+ 86:
+ dict(
+ name='sso_kpt29',
+ id=86,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt16'),
+ 87:
+ dict(
+ name='sso_kpt30',
+ id=87,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt27'),
+ 88:
+ dict(
+ name='sso_kpt31',
+ id=88,
+ color=colors['sso'],
+ type='',
+ swap='sso_kpt28'),
+ # long_sleeved_outwear
+ 89:
+ dict(name='lso_kpt1', id=89, color=colors['lso'], type='', swap=''),
+ 90:
+ dict(
+ name='lso_kpt2',
+ id=90,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt6'),
+ 91:
+ dict(
+ name='lso_kpt3',
+ id=91,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt5'),
+ 92:
+ dict(
+ name='lso_kpt4',
+ id=92,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt34'),
+ 93:
+ dict(
+ name='lso_kpt5',
+ id=93,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt3'),
+ 94:
+ dict(
+ name='lso_kpt6',
+ id=94,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt2'),
+ 95:
+ dict(
+ name='lso_kpt7',
+ id=95,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt33'),
+ 96:
+ dict(
+ name='lso_kpt8',
+ id=96,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt32'),
+ 97:
+ dict(
+ name='lso_kpt9',
+ id=97,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt31'),
+ 98:
+ dict(
+ name='lso_kpt10',
+ id=98,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt30'),
+ 99:
+ dict(
+ name='lso_kpt11',
+ id=99,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt29'),
+ 100:
+ dict(
+ name='lso_kpt12',
+ id=100,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt28'),
+ 101:
+ dict(
+ name='lso_kpt13',
+ id=101,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt27'),
+ 102:
+ dict(
+ name='lso_kpt14',
+ id=102,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt26'),
+ 103:
+ dict(
+ name='lso_kpt15',
+ id=103,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt25'),
+ 104:
+ dict(
+ name='lso_kpt16',
+ id=104,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt24'),
+ 105:
+ dict(
+ name='lso_kpt17',
+ id=105,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt23'),
+ 106:
+ dict(
+ name='lso_kpt18',
+ id=106,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt22'),
+ 107:
+ dict(
+ name='lso_kpt19',
+ id=107,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt21'),
+ 108:
+ dict(
+ name='lso_kpt20',
+ id=108,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt37'),
+ 109:
+ dict(
+ name='lso_kpt21',
+ id=109,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt19'),
+ 110:
+ dict(
+ name='lso_kpt22',
+ id=110,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt18'),
+ 111:
+ dict(
+ name='lso_kpt23',
+ id=111,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt17'),
+ 112:
+ dict(
+ name='lso_kpt24',
+ id=112,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt16'),
+ 113:
+ dict(
+ name='lso_kpt25',
+ id=113,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt15'),
+ 114:
+ dict(
+ name='lso_kpt26',
+ id=114,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt14'),
+ 115:
+ dict(
+ name='lso_kpt27',
+ id=115,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt13'),
+ 116:
+ dict(
+ name='lso_kpt28',
+ id=116,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt12'),
+ 117:
+ dict(
+ name='lso_kpt29',
+ id=117,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt11'),
+ 118:
+ dict(
+ name='lso_kpt30',
+ id=118,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt10'),
+ 119:
+ dict(
+ name='lso_kpt31',
+ id=119,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt9'),
+ 120:
+ dict(
+ name='lso_kpt32',
+ id=120,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt8'),
+ 121:
+ dict(
+ name='lso_kpt33',
+ id=121,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt7'),
+ 122:
+ dict(
+ name='lso_kpt34',
+ id=122,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt4'),
+ 123:
+ dict(
+ name='lso_kpt35',
+ id=123,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt38'),
+ 124:
+ dict(
+ name='lso_kpt36',
+ id=124,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt39'),
+ 125:
+ dict(
+ name='lso_kpt37',
+ id=125,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt20'),
+ 126:
+ dict(
+ name='lso_kpt38',
+ id=126,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt35'),
+ 127:
+ dict(
+ name='lso_kpt39',
+ id=127,
+ color=colors['lso'],
+ type='',
+ swap='lso_kpt36'),
+ # vest
+ 128:
+ dict(name='vest_kpt1', id=128, color=colors['vest'], type='', swap=''),
+ 129:
+ dict(
+ name='vest_kpt2',
+ id=129,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt6'),
+ 130:
+ dict(
+ name='vest_kpt3',
+ id=130,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt5'),
+ 131:
+ dict(name='vest_kpt4', id=131, color=colors['vest'], type='', swap=''),
+ 132:
+ dict(
+ name='vest_kpt5',
+ id=132,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt3'),
+ 133:
+ dict(
+ name='vest_kpt6',
+ id=133,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt2'),
+ 134:
+ dict(
+ name='vest_kpt7',
+ id=134,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt15'),
+ 135:
+ dict(
+ name='vest_kpt8',
+ id=135,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt14'),
+ 136:
+ dict(
+ name='vest_kpt9',
+ id=136,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt13'),
+ 137:
+ dict(
+ name='vest_kpt10',
+ id=137,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt12'),
+ 138:
+ dict(
+ name='vest_kpt11', id=138, color=colors['vest'], type='', swap=''),
+ 139:
+ dict(
+ name='vest_kpt12',
+ id=139,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt10'),
+ 140:
+ dict(
+ name='vest_kpt13', id=140, color=colors['vest'], type='', swap=''),
+ 141:
+ dict(
+ name='vest_kpt14',
+ id=141,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt8'),
+ 142:
+ dict(
+ name='vest_kpt15',
+ id=142,
+ color=colors['vest'],
+ type='',
+ swap='vest_kpt7'),
+ # sling
+ 143:
+ dict(
+ name='sling_kpt1', id=143, color=colors['sling'], type='',
+ swap=''),
+ 144:
+ dict(
+ name='sling_kpt2',
+ id=144,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt6'),
+ 145:
+ dict(
+ name='sling_kpt3',
+ id=145,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt5'),
+ 146:
+ dict(
+ name='sling_kpt4', id=146, color=colors['sling'], type='',
+ swap=''),
+ 147:
+ dict(
+ name='sling_kpt5',
+ id=147,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt3'),
+ 148:
+ dict(
+ name='sling_kpt6',
+ id=148,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt2'),
+ 149:
+ dict(
+ name='sling_kpt7',
+ id=149,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt15'),
+ 150:
+ dict(
+ name='sling_kpt8',
+ id=150,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt14'),
+ 151:
+ dict(
+ name='sling_kpt9',
+ id=151,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt13'),
+ 152:
+ dict(
+ name='sling_kpt10',
+ id=152,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt12'),
+ 153:
+ dict(
+ name='sling_kpt11',
+ id=153,
+ color=colors['sling'],
+ type='',
+ swap=''),
+ 154:
+ dict(
+ name='sling_kpt12',
+ id=154,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt10'),
+ 155:
+ dict(
+ name='sling_kpt13',
+ id=155,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt9'),
+ 156:
+ dict(
+ name='sling_kpt14',
+ id=156,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt8'),
+ 157:
+ dict(
+ name='sling_kpt15',
+ id=157,
+ color=colors['sling'],
+ type='',
+ swap='sling_kpt7'),
+ # shorts
+ 158:
+ dict(
+ name='shorts_kpt1',
+ id=158,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt3'),
+ 159:
+ dict(
+ name='shorts_kpt2',
+ id=159,
+ color=colors['shorts'],
+ type='',
+ swap=''),
+ 160:
+ dict(
+ name='shorts_kpt3',
+ id=160,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt1'),
+ 161:
+ dict(
+ name='shorts_kpt4',
+ id=161,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt10'),
+ 162:
+ dict(
+ name='shorts_kpt5',
+ id=162,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt9'),
+ 163:
+ dict(
+ name='shorts_kpt6',
+ id=163,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt8'),
+ 164:
+ dict(
+ name='shorts_kpt7',
+ id=164,
+ color=colors['shorts'],
+ type='',
+ swap=''),
+ 165:
+ dict(
+ name='shorts_kpt8',
+ id=165,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt6'),
+ 166:
+ dict(
+ name='shorts_kpt9',
+ id=166,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt5'),
+ 167:
+ dict(
+ name='shorts_kpt10',
+ id=167,
+ color=colors['shorts'],
+ type='',
+ swap='shorts_kpt4'),
+ # trousers
+ 168:
+ dict(
+ name='trousers_kpt1',
+ id=168,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt3'),
+ 169:
+ dict(
+ name='trousers_kpt2',
+ id=169,
+ color=colors['trousers'],
+ type='',
+ swap=''),
+ 170:
+ dict(
+ name='trousers_kpt3',
+ id=170,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt1'),
+ 171:
+ dict(
+ name='trousers_kpt4',
+ id=171,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt14'),
+ 172:
+ dict(
+ name='trousers_kpt5',
+ id=172,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt13'),
+ 173:
+ dict(
+ name='trousers_kpt6',
+ id=173,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt12'),
+ 174:
+ dict(
+ name='trousers_kpt7',
+ id=174,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt11'),
+ 175:
+ dict(
+ name='trousers_kpt8',
+ id=175,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt10'),
+ 176:
+ dict(
+ name='trousers_kpt9',
+ id=176,
+ color=colors['trousers'],
+ type='',
+ swap=''),
+ 177:
+ dict(
+ name='trousers_kpt10',
+ id=177,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt8'),
+ 178:
+ dict(
+ name='trousers_kpt11',
+ id=178,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt7'),
+ 179:
+ dict(
+ name='trousers_kpt12',
+ id=179,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt6'),
+ 180:
+ dict(
+ name='trousers_kpt13',
+ id=180,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt5'),
+ 181:
+ dict(
+ name='trousers_kpt14',
+ id=181,
+ color=colors['trousers'],
+ type='',
+ swap='trousers_kpt4'),
+ # skirt
+ 182:
+ dict(
+ name='skirt_kpt1',
+ id=182,
+ color=colors['skirt'],
+ type='',
+ swap='skirt_kpt3'),
+ 183:
+ dict(
+ name='skirt_kpt2', id=183, color=colors['skirt'], type='',
+ swap=''),
+ 184:
+ dict(
+ name='skirt_kpt3',
+ id=184,
+ color=colors['skirt'],
+ type='',
+ swap='skirt_kpt1'),
+ 185:
+ dict(
+ name='skirt_kpt4',
+ id=185,
+ color=colors['skirt'],
+ type='',
+ swap='skirt_kpt8'),
+ 186:
+ dict(
+ name='skirt_kpt5',
+ id=186,
+ color=colors['skirt'],
+ type='',
+ swap='skirt_kpt7'),
+ 187:
+ dict(
+ name='skirt_kpt6', id=187, color=colors['skirt'], type='',
+ swap=''),
+ 188:
+ dict(
+ name='skirt_kpt7',
+ id=188,
+ color=colors['skirt'],
+ type='',
+ swap='skirt_kpt5'),
+ 189:
+ dict(
+ name='skirt_kpt8',
+ id=189,
+ color=colors['skirt'],
+ type='',
+ swap='skirt_kpt4'),
+ # short_sleeved_dress
+ 190:
+ dict(name='ssd_kpt1', id=190, color=colors['ssd'], type='', swap=''),
+ 191:
+ dict(
+ name='ssd_kpt2',
+ id=191,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt6'),
+ 192:
+ dict(
+ name='ssd_kpt3',
+ id=192,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt5'),
+ 193:
+ dict(name='ssd_kpt4', id=193, color=colors['ssd'], type='', swap=''),
+ 194:
+ dict(
+ name='ssd_kpt5',
+ id=194,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt3'),
+ 195:
+ dict(
+ name='ssd_kpt6',
+ id=195,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt2'),
+ 196:
+ dict(
+ name='ssd_kpt7',
+ id=196,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt29'),
+ 197:
+ dict(
+ name='ssd_kpt8',
+ id=197,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt28'),
+ 198:
+ dict(
+ name='ssd_kpt9',
+ id=198,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt27'),
+ 199:
+ dict(
+ name='ssd_kpt10',
+ id=199,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt26'),
+ 200:
+ dict(
+ name='ssd_kpt11',
+ id=200,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt25'),
+ 201:
+ dict(
+ name='ssd_kpt12',
+ id=201,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt24'),
+ 202:
+ dict(
+ name='ssd_kpt13',
+ id=202,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt23'),
+ 203:
+ dict(
+ name='ssd_kpt14',
+ id=203,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt22'),
+ 204:
+ dict(
+ name='ssd_kpt15',
+ id=204,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt21'),
+ 205:
+ dict(
+ name='ssd_kpt16',
+ id=205,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt20'),
+ 206:
+ dict(
+ name='ssd_kpt17',
+ id=206,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt19'),
+ 207:
+ dict(name='ssd_kpt18', id=207, color=colors['ssd'], type='', swap=''),
+ 208:
+ dict(
+ name='ssd_kpt19',
+ id=208,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt17'),
+ 209:
+ dict(
+ name='ssd_kpt20',
+ id=209,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt16'),
+ 210:
+ dict(
+ name='ssd_kpt21',
+ id=210,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt15'),
+ 211:
+ dict(
+ name='ssd_kpt22',
+ id=211,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt14'),
+ 212:
+ dict(
+ name='ssd_kpt23',
+ id=212,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt13'),
+ 213:
+ dict(
+ name='ssd_kpt24',
+ id=213,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt12'),
+ 214:
+ dict(
+ name='ssd_kpt25',
+ id=214,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt11'),
+ 215:
+ dict(
+ name='ssd_kpt26',
+ id=215,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt10'),
+ 216:
+ dict(
+ name='ssd_kpt27',
+ id=216,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt9'),
+ 217:
+ dict(
+ name='ssd_kpt28',
+ id=217,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt8'),
+ 218:
+ dict(
+ name='ssd_kpt29',
+ id=218,
+ color=colors['ssd'],
+ type='',
+ swap='ssd_kpt7'),
+ # long_sleeved_dress
+ 219:
+ dict(name='lsd_kpt1', id=219, color=colors['lsd'], type='', swap=''),
+ 220:
+ dict(
+ name='lsd_kpt2',
+ id=220,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt6'),
+ 221:
+ dict(
+ name='lsd_kpt3',
+ id=221,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt5'),
+ 222:
+ dict(name='lsd_kpt4', id=222, color=colors['lsd'], type='', swap=''),
+ 223:
+ dict(
+ name='lsd_kpt5',
+ id=223,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt3'),
+ 224:
+ dict(
+ name='lsd_kpt6',
+ id=224,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt2'),
+ 225:
+ dict(
+ name='lsd_kpt7',
+ id=225,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt37'),
+ 226:
+ dict(
+ name='lsd_kpt8',
+ id=226,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt36'),
+ 227:
+ dict(
+ name='lsd_kpt9',
+ id=227,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt35'),
+ 228:
+ dict(
+ name='lsd_kpt10',
+ id=228,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt34'),
+ 229:
+ dict(
+ name='lsd_kpt11',
+ id=229,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt33'),
+ 230:
+ dict(
+ name='lsd_kpt12',
+ id=230,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt32'),
+ 231:
+ dict(
+ name='lsd_kpt13',
+ id=231,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt31'),
+ 232:
+ dict(
+ name='lsd_kpt14',
+ id=232,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt30'),
+ 233:
+ dict(
+ name='lsd_kpt15',
+ id=233,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt29'),
+ 234:
+ dict(
+ name='lsd_kpt16',
+ id=234,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt28'),
+ 235:
+ dict(
+ name='lsd_kpt17',
+ id=235,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt27'),
+ 236:
+ dict(
+ name='lsd_kpt18',
+ id=236,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt26'),
+ 237:
+ dict(
+ name='lsd_kpt19',
+ id=237,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt25'),
+ 238:
+ dict(
+ name='lsd_kpt20',
+ id=238,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt24'),
+ 239:
+ dict(
+ name='lsd_kpt21',
+ id=239,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt23'),
+ 240:
+ dict(name='lsd_kpt22', id=240, color=colors['lsd'], type='', swap=''),
+ 241:
+ dict(
+ name='lsd_kpt23',
+ id=241,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt21'),
+ 242:
+ dict(
+ name='lsd_kpt24',
+ id=242,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt20'),
+ 243:
+ dict(
+ name='lsd_kpt25',
+ id=243,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt19'),
+ 244:
+ dict(
+ name='lsd_kpt26',
+ id=244,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt18'),
+ 245:
+ dict(
+ name='lsd_kpt27',
+ id=245,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt17'),
+ 246:
+ dict(
+ name='lsd_kpt28',
+ id=246,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt16'),
+ 247:
+ dict(
+ name='lsd_kpt29',
+ id=247,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt15'),
+ 248:
+ dict(
+ name='lsd_kpt30',
+ id=248,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt14'),
+ 249:
+ dict(
+ name='lsd_kpt31',
+ id=249,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt13'),
+ 250:
+ dict(
+ name='lsd_kpt32',
+ id=250,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt12'),
+ 251:
+ dict(
+ name='lsd_kpt33',
+ id=251,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt11'),
+ 252:
+ dict(
+ name='lsd_kpt34',
+ id=252,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt10'),
+ 253:
+ dict(
+ name='lsd_kpt35',
+ id=253,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt9'),
+ 254:
+ dict(
+ name='lsd_kpt36',
+ id=254,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt8'),
+ 255:
+ dict(
+ name='lsd_kpt37',
+ id=255,
+ color=colors['lsd'],
+ type='',
+ swap='lsd_kpt7'),
+ # vest_dress
+ 256:
+ dict(name='vd_kpt1', id=256, color=colors['vd'], type='', swap=''),
+ 257:
+ dict(
+ name='vd_kpt2',
+ id=257,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt6'),
+ 258:
+ dict(
+ name='vd_kpt3',
+ id=258,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt5'),
+ 259:
+ dict(name='vd_kpt4', id=259, color=colors['vd'], type='', swap=''),
+ 260:
+ dict(
+ name='vd_kpt5',
+ id=260,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt3'),
+ 261:
+ dict(
+ name='vd_kpt6',
+ id=261,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt2'),
+ 262:
+ dict(
+ name='vd_kpt7',
+ id=262,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt19'),
+ 263:
+ dict(
+ name='vd_kpt8',
+ id=263,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt18'),
+ 264:
+ dict(
+ name='vd_kpt9',
+ id=264,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt17'),
+ 265:
+ dict(
+ name='vd_kpt10',
+ id=265,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt16'),
+ 266:
+ dict(
+ name='vd_kpt11',
+ id=266,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt15'),
+ 267:
+ dict(
+ name='vd_kpt12',
+ id=267,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt14'),
+ 268:
+ dict(name='vd_kpt13', id=268, color=colors['vd'], type='', swap=''),
+ 269:
+ dict(
+ name='vd_kpt14',
+ id=269,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt12'),
+ 270:
+ dict(
+ name='vd_kpt15',
+ id=270,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt11'),
+ 271:
+ dict(
+ name='vd_kpt16',
+ id=271,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt10'),
+ 272:
+ dict(
+ name='vd_kpt17',
+ id=272,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt9'),
+ 273:
+ dict(
+ name='vd_kpt18',
+ id=273,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt8'),
+ 274:
+ dict(
+ name='vd_kpt19',
+ id=274,
+ color=colors['vd'],
+ type='',
+ swap='vd_kpt7'),
+ # sling_dress
+ 275:
+ dict(name='sd_kpt1', id=275, color=colors['sd'], type='', swap=''),
+ 276:
+ dict(
+ name='sd_kpt2',
+ id=276,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt6'),
+ 277:
+ dict(
+ name='sd_kpt3',
+ id=277,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt5'),
+ 278:
+ dict(name='sd_kpt4', id=278, color=colors['sd'], type='', swap=''),
+ 279:
+ dict(
+ name='sd_kpt5',
+ id=279,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt3'),
+ 280:
+ dict(
+ name='sd_kpt6',
+ id=280,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt2'),
+ 281:
+ dict(
+ name='sd_kpt7',
+ id=281,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt19'),
+ 282:
+ dict(
+ name='sd_kpt8',
+ id=282,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt18'),
+ 283:
+ dict(
+ name='sd_kpt9',
+ id=283,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt17'),
+ 284:
+ dict(
+ name='sd_kpt10',
+ id=284,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt16'),
+ 285:
+ dict(
+ name='sd_kpt11',
+ id=285,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt15'),
+ 286:
+ dict(
+ name='sd_kpt12',
+ id=286,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt14'),
+ 287:
+ dict(name='sd_kpt13', id=287, color=colors['sd'], type='', swap=''),
+ 288:
+ dict(
+ name='sd_kpt14',
+ id=288,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt12'),
+ 289:
+ dict(
+ name='sd_kpt15',
+ id=289,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt11'),
+ 290:
+ dict(
+ name='sd_kpt16',
+ id=290,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt10'),
+ 291:
+ dict(
+ name='sd_kpt17',
+ id=291,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt9'),
+ 292:
+ dict(
+ name='sd_kpt18',
+ id=292,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt8'),
+ 293:
+ dict(
+ name='sd_kpt19',
+ id=293,
+ color=colors['sd'],
+ type='',
+ swap='sd_kpt7'),
+ },
+ skeleton_info={
+ # short_sleeved_shirt
+ 0:
+ dict(link=('sss_kpt1', 'sss_kpt2'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('sss_kpt2', 'sss_kpt7'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('sss_kpt7', 'sss_kpt8'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('sss_kpt8', 'sss_kpt9'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('sss_kpt9', 'sss_kpt10'), id=4, color=[255, 128, 0]),
+ 5:
+ dict(link=('sss_kpt10', 'sss_kpt11'), id=5, color=[255, 128, 0]),
+ 6:
+ dict(link=('sss_kpt11', 'sss_kpt12'), id=6, color=[255, 128, 0]),
+ 7:
+ dict(link=('sss_kpt12', 'sss_kpt13'), id=7, color=[255, 128, 0]),
+ 8:
+ dict(link=('sss_kpt13', 'sss_kpt14'), id=8, color=[255, 128, 0]),
+ 9:
+ dict(link=('sss_kpt14', 'sss_kpt15'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('sss_kpt15', 'sss_kpt16'), id=10, color=[255, 128, 0]),
+ 11:
+ dict(link=('sss_kpt16', 'sss_kpt17'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('sss_kpt17', 'sss_kpt18'), id=12, color=[255, 128, 0]),
+ 13:
+ dict(link=('sss_kpt18', 'sss_kpt19'), id=13, color=[255, 128, 0]),
+ 14:
+ dict(link=('sss_kpt19', 'sss_kpt20'), id=14, color=[255, 128, 0]),
+ 15:
+ dict(link=('sss_kpt20', 'sss_kpt21'), id=15, color=[255, 128, 0]),
+ 16:
+ dict(link=('sss_kpt21', 'sss_kpt22'), id=16, color=[255, 128, 0]),
+ 17:
+ dict(link=('sss_kpt22', 'sss_kpt23'), id=17, color=[255, 128, 0]),
+ 18:
+ dict(link=('sss_kpt23', 'sss_kpt24'), id=18, color=[255, 128, 0]),
+ 19:
+ dict(link=('sss_kpt24', 'sss_kpt25'), id=19, color=[255, 128, 0]),
+ 20:
+ dict(link=('sss_kpt25', 'sss_kpt6'), id=20, color=[255, 128, 0]),
+ 21:
+ dict(link=('sss_kpt6', 'sss_kpt1'), id=21, color=[255, 128, 0]),
+ 22:
+ dict(link=('sss_kpt2', 'sss_kpt3'), id=22, color=[255, 128, 0]),
+ 23:
+ dict(link=('sss_kpt3', 'sss_kpt4'), id=23, color=[255, 128, 0]),
+ 24:
+ dict(link=('sss_kpt4', 'sss_kpt5'), id=24, color=[255, 128, 0]),
+ 25:
+ dict(link=('sss_kpt5', 'sss_kpt6'), id=25, color=[255, 128, 0]),
+ # long_sleeve_shirt
+ 26:
+ dict(link=('lss_kpt1', 'lss_kpt2'), id=26, color=[255, 0, 128]),
+ 27:
+ dict(link=('lss_kpt2', 'lss_kpt7'), id=27, color=[255, 0, 128]),
+ 28:
+ dict(link=('lss_kpt7', 'lss_kpt8'), id=28, color=[255, 0, 128]),
+ 29:
+ dict(link=('lss_kpt8', 'lss_kpt9'), id=29, color=[255, 0, 128]),
+ 30:
+ dict(link=('lss_kpt9', 'lss_kpt10'), id=30, color=[255, 0, 128]),
+ 31:
+ dict(link=('lss_kpt10', 'lss_kpt11'), id=31, color=[255, 0, 128]),
+ 32:
+ dict(link=('lss_kpt11', 'lss_kpt12'), id=32, color=[255, 0, 128]),
+ 33:
+ dict(link=('lss_kpt12', 'lss_kpt13'), id=33, color=[255, 0, 128]),
+ 34:
+ dict(link=('lss_kpt13', 'lss_kpt14'), id=34, color=[255, 0, 128]),
+ 35:
+ dict(link=('lss_kpt14', 'lss_kpt15'), id=35, color=[255, 0, 128]),
+ 36:
+ dict(link=('lss_kpt15', 'lss_kpt16'), id=36, color=[255, 0, 128]),
+ 37:
+ dict(link=('lss_kpt16', 'lss_kpt17'), id=37, color=[255, 0, 128]),
+ 38:
+ dict(link=('lss_kpt17', 'lss_kpt18'), id=38, color=[255, 0, 128]),
+ 39:
+ dict(link=('lss_kpt18', 'lss_kpt19'), id=39, color=[255, 0, 128]),
+ 40:
+ dict(link=('lss_kpt19', 'lss_kpt20'), id=40, color=[255, 0, 128]),
+ 41:
+ dict(link=('lss_kpt20', 'lss_kpt21'), id=41, color=[255, 0, 128]),
+ 42:
+ dict(link=('lss_kpt21', 'lss_kpt22'), id=42, color=[255, 0, 128]),
+ 43:
+ dict(link=('lss_kpt22', 'lss_kpt23'), id=43, color=[255, 0, 128]),
+ 44:
+ dict(link=('lss_kpt23', 'lss_kpt24'), id=44, color=[255, 0, 128]),
+ 45:
+ dict(link=('lss_kpt24', 'lss_kpt25'), id=45, color=[255, 0, 128]),
+ 46:
+ dict(link=('lss_kpt25', 'lss_kpt26'), id=46, color=[255, 0, 128]),
+ 47:
+ dict(link=('lss_kpt26', 'lss_kpt27'), id=47, color=[255, 0, 128]),
+ 48:
+ dict(link=('lss_kpt27', 'lss_kpt28'), id=48, color=[255, 0, 128]),
+ 49:
+ dict(link=('lss_kpt28', 'lss_kpt29'), id=49, color=[255, 0, 128]),
+ 50:
+ dict(link=('lss_kpt29', 'lss_kpt30'), id=50, color=[255, 0, 128]),
+ 51:
+ dict(link=('lss_kpt30', 'lss_kpt31'), id=51, color=[255, 0, 128]),
+ 52:
+ dict(link=('lss_kpt31', 'lss_kpt32'), id=52, color=[255, 0, 128]),
+ 53:
+ dict(link=('lss_kpt32', 'lss_kpt33'), id=53, color=[255, 0, 128]),
+ 54:
+ dict(link=('lss_kpt33', 'lss_kpt6'), id=54, color=[255, 0, 128]),
+ 55:
+ dict(link=('lss_kpt6', 'lss_kpt5'), id=55, color=[255, 0, 128]),
+ 56:
+ dict(link=('lss_kpt5', 'lss_kpt4'), id=56, color=[255, 0, 128]),
+ 57:
+ dict(link=('lss_kpt4', 'lss_kpt3'), id=57, color=[255, 0, 128]),
+ 58:
+ dict(link=('lss_kpt3', 'lss_kpt2'), id=58, color=[255, 0, 128]),
+ 59:
+ dict(link=('lss_kpt6', 'lss_kpt1'), id=59, color=[255, 0, 128]),
+ # short_sleeved_outwear
+ 60:
+ dict(link=('sso_kpt1', 'sso_kpt4'), id=60, color=[128, 0, 255]),
+ 61:
+ dict(link=('sso_kpt4', 'sso_kpt7'), id=61, color=[128, 0, 255]),
+ 62:
+ dict(link=('sso_kpt7', 'sso_kpt8'), id=62, color=[128, 0, 255]),
+ 63:
+ dict(link=('sso_kpt8', 'sso_kpt9'), id=63, color=[128, 0, 255]),
+ 64:
+ dict(link=('sso_kpt9', 'sso_kpt10'), id=64, color=[128, 0, 255]),
+ 65:
+ dict(link=('sso_kpt10', 'sso_kpt11'), id=65, color=[128, 0, 255]),
+ 66:
+ dict(link=('sso_kpt11', 'sso_kpt12'), id=66, color=[128, 0, 255]),
+ 67:
+ dict(link=('sso_kpt12', 'sso_kpt13'), id=67, color=[128, 0, 255]),
+ 68:
+ dict(link=('sso_kpt13', 'sso_kpt14'), id=68, color=[128, 0, 255]),
+ 69:
+ dict(link=('sso_kpt14', 'sso_kpt15'), id=69, color=[128, 0, 255]),
+ 70:
+ dict(link=('sso_kpt15', 'sso_kpt16'), id=70, color=[128, 0, 255]),
+ 71:
+ dict(link=('sso_kpt16', 'sso_kpt31'), id=71, color=[128, 0, 255]),
+ 72:
+ dict(link=('sso_kpt31', 'sso_kpt30'), id=72, color=[128, 0, 255]),
+ 73:
+ dict(link=('sso_kpt30', 'sso_kpt2'), id=73, color=[128, 0, 255]),
+ 74:
+ dict(link=('sso_kpt2', 'sso_kpt3'), id=74, color=[128, 0, 255]),
+ 75:
+ dict(link=('sso_kpt3', 'sso_kpt4'), id=75, color=[128, 0, 255]),
+ 76:
+ dict(link=('sso_kpt1', 'sso_kpt6'), id=76, color=[128, 0, 255]),
+ 77:
+ dict(link=('sso_kpt6', 'sso_kpt25'), id=77, color=[128, 0, 255]),
+ 78:
+ dict(link=('sso_kpt25', 'sso_kpt24'), id=78, color=[128, 0, 255]),
+ 79:
+ dict(link=('sso_kpt24', 'sso_kpt23'), id=79, color=[128, 0, 255]),
+ 80:
+ dict(link=('sso_kpt23', 'sso_kpt22'), id=80, color=[128, 0, 255]),
+ 81:
+ dict(link=('sso_kpt22', 'sso_kpt21'), id=81, color=[128, 0, 255]),
+ 82:
+ dict(link=('sso_kpt21', 'sso_kpt20'), id=82, color=[128, 0, 255]),
+ 83:
+ dict(link=('sso_kpt20', 'sso_kpt19'), id=83, color=[128, 0, 255]),
+ 84:
+ dict(link=('sso_kpt19', 'sso_kpt18'), id=84, color=[128, 0, 255]),
+ 85:
+ dict(link=('sso_kpt18', 'sso_kpt17'), id=85, color=[128, 0, 255]),
+ 86:
+ dict(link=('sso_kpt17', 'sso_kpt29'), id=86, color=[128, 0, 255]),
+ 87:
+ dict(link=('sso_kpt29', 'sso_kpt28'), id=87, color=[128, 0, 255]),
+ 88:
+ dict(link=('sso_kpt28', 'sso_kpt27'), id=88, color=[128, 0, 255]),
+ 89:
+ dict(link=('sso_kpt27', 'sso_kpt26'), id=89, color=[128, 0, 255]),
+ 90:
+ dict(link=('sso_kpt26', 'sso_kpt5'), id=90, color=[128, 0, 255]),
+ 91:
+ dict(link=('sso_kpt5', 'sso_kpt6'), id=91, color=[128, 0, 255]),
+ # long_sleeved_outwear
+ 92:
+ dict(link=('lso_kpt1', 'lso_kpt2'), id=92, color=[0, 128, 255]),
+ 93:
+ dict(link=('lso_kpt2', 'lso_kpt7'), id=93, color=[0, 128, 255]),
+ 94:
+ dict(link=('lso_kpt7', 'lso_kpt8'), id=94, color=[0, 128, 255]),
+ 95:
+ dict(link=('lso_kpt8', 'lso_kpt9'), id=95, color=[0, 128, 255]),
+ 96:
+ dict(link=('lso_kpt9', 'lso_kpt10'), id=96, color=[0, 128, 255]),
+ 97:
+ dict(link=('lso_kpt10', 'lso_kpt11'), id=97, color=[0, 128, 255]),
+ 98:
+ dict(link=('lso_kpt11', 'lso_kpt12'), id=98, color=[0, 128, 255]),
+ 99:
+ dict(link=('lso_kpt12', 'lso_kpt13'), id=99, color=[0, 128, 255]),
+ 100:
+ dict(link=('lso_kpt13', 'lso_kpt14'), id=100, color=[0, 128, 255]),
+ 101:
+ dict(link=('lso_kpt14', 'lso_kpt15'), id=101, color=[0, 128, 255]),
+ 102:
+ dict(link=('lso_kpt15', 'lso_kpt16'), id=102, color=[0, 128, 255]),
+ 103:
+ dict(link=('lso_kpt16', 'lso_kpt17'), id=103, color=[0, 128, 255]),
+ 104:
+ dict(link=('lso_kpt17', 'lso_kpt18'), id=104, color=[0, 128, 255]),
+ 105:
+ dict(link=('lso_kpt18', 'lso_kpt19'), id=105, color=[0, 128, 255]),
+ 106:
+ dict(link=('lso_kpt19', 'lso_kpt20'), id=106, color=[0, 128, 255]),
+ 107:
+ dict(link=('lso_kpt20', 'lso_kpt39'), id=107, color=[0, 128, 255]),
+ 108:
+ dict(link=('lso_kpt39', 'lso_kpt38'), id=108, color=[0, 128, 255]),
+ 109:
+ dict(link=('lso_kpt38', 'lso_kpt4'), id=109, color=[0, 128, 255]),
+ 110:
+ dict(link=('lso_kpt4', 'lso_kpt3'), id=110, color=[0, 128, 255]),
+ 111:
+ dict(link=('lso_kpt3', 'lso_kpt2'), id=111, color=[0, 128, 255]),
+ 112:
+ dict(link=('lso_kpt1', 'lso_kpt6'), id=112, color=[0, 128, 255]),
+ 113:
+ dict(link=('lso_kpt6', 'lso_kpt33'), id=113, color=[0, 128, 255]),
+ 114:
+ dict(link=('lso_kpt33', 'lso_kpt32'), id=114, color=[0, 128, 255]),
+ 115:
+ dict(link=('lso_kpt32', 'lso_kpt31'), id=115, color=[0, 128, 255]),
+ 116:
+ dict(link=('lso_kpt31', 'lso_kpt30'), id=116, color=[0, 128, 255]),
+ 117:
+ dict(link=('lso_kpt30', 'lso_kpt29'), id=117, color=[0, 128, 255]),
+ 118:
+ dict(link=('lso_kpt29', 'lso_kpt28'), id=118, color=[0, 128, 255]),
+ 119:
+ dict(link=('lso_kpt28', 'lso_kpt27'), id=119, color=[0, 128, 255]),
+ 120:
+ dict(link=('lso_kpt27', 'lso_kpt26'), id=120, color=[0, 128, 255]),
+ 121:
+ dict(link=('lso_kpt26', 'lso_kpt25'), id=121, color=[0, 128, 255]),
+ 122:
+ dict(link=('lso_kpt25', 'lso_kpt24'), id=122, color=[0, 128, 255]),
+ 123:
+ dict(link=('lso_kpt24', 'lso_kpt23'), id=123, color=[0, 128, 255]),
+ 124:
+ dict(link=('lso_kpt23', 'lso_kpt22'), id=124, color=[0, 128, 255]),
+ 125:
+ dict(link=('lso_kpt22', 'lso_kpt21'), id=125, color=[0, 128, 255]),
+ 126:
+ dict(link=('lso_kpt21', 'lso_kpt37'), id=126, color=[0, 128, 255]),
+ 127:
+ dict(link=('lso_kpt37', 'lso_kpt36'), id=127, color=[0, 128, 255]),
+ 128:
+ dict(link=('lso_kpt36', 'lso_kpt35'), id=128, color=[0, 128, 255]),
+ 129:
+ dict(link=('lso_kpt35', 'lso_kpt34'), id=129, color=[0, 128, 255]),
+ 130:
+ dict(link=('lso_kpt34', 'lso_kpt5'), id=130, color=[0, 128, 255]),
+ 131:
+ dict(link=('lso_kpt5', 'lso_kpt6'), id=131, color=[0, 128, 255]),
+ # vest
+ 132:
+ dict(link=('vest_kpt1', 'vest_kpt2'), id=132, color=[0, 128, 128]),
+ 133:
+ dict(link=('vest_kpt2', 'vest_kpt7'), id=133, color=[0, 128, 128]),
+ 134:
+ dict(link=('vest_kpt7', 'vest_kpt8'), id=134, color=[0, 128, 128]),
+ 135:
+ dict(link=('vest_kpt8', 'vest_kpt9'), id=135, color=[0, 128, 128]),
+ 136:
+ dict(link=('vest_kpt9', 'vest_kpt10'), id=136, color=[0, 128, 128]),
+ 137:
+ dict(link=('vest_kpt10', 'vest_kpt11'), id=137, color=[0, 128, 128]),
+ 138:
+ dict(link=('vest_kpt11', 'vest_kpt12'), id=138, color=[0, 128, 128]),
+ 139:
+ dict(link=('vest_kpt12', 'vest_kpt13'), id=139, color=[0, 128, 128]),
+ 140:
+ dict(link=('vest_kpt13', 'vest_kpt14'), id=140, color=[0, 128, 128]),
+ 141:
+ dict(link=('vest_kpt14', 'vest_kpt15'), id=141, color=[0, 128, 128]),
+ 142:
+ dict(link=('vest_kpt15', 'vest_kpt6'), id=142, color=[0, 128, 128]),
+ 143:
+ dict(link=('vest_kpt6', 'vest_kpt1'), id=143, color=[0, 128, 128]),
+ 144:
+ dict(link=('vest_kpt2', 'vest_kpt3'), id=144, color=[0, 128, 128]),
+ 145:
+ dict(link=('vest_kpt3', 'vest_kpt4'), id=145, color=[0, 128, 128]),
+ 146:
+ dict(link=('vest_kpt4', 'vest_kpt5'), id=146, color=[0, 128, 128]),
+ 147:
+ dict(link=('vest_kpt5', 'vest_kpt6'), id=147, color=[0, 128, 128]),
+ # sling
+ 148:
+ dict(link=('sling_kpt1', 'sling_kpt2'), id=148, color=[0, 0, 128]),
+ 149:
+ dict(link=('sling_kpt2', 'sling_kpt8'), id=149, color=[0, 0, 128]),
+ 150:
+ dict(link=('sling_kpt8', 'sling_kpt9'), id=150, color=[0, 0, 128]),
+ 151:
+ dict(link=('sling_kpt9', 'sling_kpt10'), id=151, color=[0, 0, 128]),
+ 152:
+ dict(link=('sling_kpt10', 'sling_kpt11'), id=152, color=[0, 0, 128]),
+ 153:
+ dict(link=('sling_kpt11', 'sling_kpt12'), id=153, color=[0, 0, 128]),
+ 154:
+ dict(link=('sling_kpt12', 'sling_kpt13'), id=154, color=[0, 0, 128]),
+ 155:
+ dict(link=('sling_kpt13', 'sling_kpt14'), id=155, color=[0, 0, 128]),
+ 156:
+ dict(link=('sling_kpt14', 'sling_kpt6'), id=156, color=[0, 0, 128]),
+ 157:
+ dict(link=('sling_kpt2', 'sling_kpt7'), id=157, color=[0, 0, 128]),
+ 158:
+ dict(link=('sling_kpt6', 'sling_kpt15'), id=158, color=[0, 0, 128]),
+ 159:
+ dict(link=('sling_kpt2', 'sling_kpt3'), id=159, color=[0, 0, 128]),
+ 160:
+ dict(link=('sling_kpt3', 'sling_kpt4'), id=160, color=[0, 0, 128]),
+ 161:
+ dict(link=('sling_kpt4', 'sling_kpt5'), id=161, color=[0, 0, 128]),
+ 162:
+ dict(link=('sling_kpt5', 'sling_kpt6'), id=162, color=[0, 0, 128]),
+ 163:
+ dict(link=('sling_kpt1', 'sling_kpt6'), id=163, color=[0, 0, 128]),
+ # shorts
+ 164:
+ dict(
+ link=('shorts_kpt1', 'shorts_kpt4'), id=164, color=[128, 128,
+ 128]),
+ 165:
+ dict(
+ link=('shorts_kpt4', 'shorts_kpt5'), id=165, color=[128, 128,
+ 128]),
+ 166:
+ dict(
+ link=('shorts_kpt5', 'shorts_kpt6'), id=166, color=[128, 128,
+ 128]),
+ 167:
+ dict(
+ link=('shorts_kpt6', 'shorts_kpt7'), id=167, color=[128, 128,
+ 128]),
+ 168:
+ dict(
+ link=('shorts_kpt7', 'shorts_kpt8'), id=168, color=[128, 128,
+ 128]),
+ 169:
+ dict(
+ link=('shorts_kpt8', 'shorts_kpt9'), id=169, color=[128, 128,
+ 128]),
+ 170:
+ dict(
+ link=('shorts_kpt9', 'shorts_kpt10'),
+ id=170,
+ color=[128, 128, 128]),
+ 171:
+ dict(
+ link=('shorts_kpt10', 'shorts_kpt3'),
+ id=171,
+ color=[128, 128, 128]),
+ 172:
+ dict(
+ link=('shorts_kpt3', 'shorts_kpt2'), id=172, color=[128, 128,
+ 128]),
+ 173:
+ dict(
+ link=('shorts_kpt2', 'shorts_kpt1'), id=173, color=[128, 128,
+ 128]),
+ # trousers
+ 174:
+ dict(
+ link=('trousers_kpt1', 'trousers_kpt4'),
+ id=174,
+ color=[128, 0, 128]),
+ 175:
+ dict(
+ link=('trousers_kpt4', 'trousers_kpt5'),
+ id=175,
+ color=[128, 0, 128]),
+ 176:
+ dict(
+ link=('trousers_kpt5', 'trousers_kpt6'),
+ id=176,
+ color=[128, 0, 128]),
+ 177:
+ dict(
+ link=('trousers_kpt6', 'trousers_kpt7'),
+ id=177,
+ color=[128, 0, 128]),
+ 178:
+ dict(
+ link=('trousers_kpt7', 'trousers_kpt8'),
+ id=178,
+ color=[128, 0, 128]),
+ 179:
+ dict(
+ link=('trousers_kpt8', 'trousers_kpt9'),
+ id=179,
+ color=[128, 0, 128]),
+ 180:
+ dict(
+ link=('trousers_kpt9', 'trousers_kpt10'),
+ id=180,
+ color=[128, 0, 128]),
+ 181:
+ dict(
+ link=('trousers_kpt10', 'trousers_kpt11'),
+ id=181,
+ color=[128, 0, 128]),
+ 182:
+ dict(
+ link=('trousers_kpt11', 'trousers_kpt12'),
+ id=182,
+ color=[128, 0, 128]),
+ 183:
+ dict(
+ link=('trousers_kpt12', 'trousers_kpt13'),
+ id=183,
+ color=[128, 0, 128]),
+ 184:
+ dict(
+ link=('trousers_kpt13', 'trousers_kpt14'),
+ id=184,
+ color=[128, 0, 128]),
+ 185:
+ dict(
+ link=('trousers_kpt14', 'trousers_kpt3'),
+ id=185,
+ color=[128, 0, 128]),
+ 186:
+ dict(
+ link=('trousers_kpt3', 'trousers_kpt2'),
+ id=186,
+ color=[128, 0, 128]),
+ 187:
+ dict(
+ link=('trousers_kpt2', 'trousers_kpt1'),
+ id=187,
+ color=[128, 0, 128]),
+ # skirt
+ 188:
+ dict(link=('skirt_kpt1', 'skirt_kpt4'), id=188, color=[64, 128, 128]),
+ 189:
+ dict(link=('skirt_kpt4', 'skirt_kpt5'), id=189, color=[64, 128, 128]),
+ 190:
+ dict(link=('skirt_kpt5', 'skirt_kpt6'), id=190, color=[64, 128, 128]),
+ 191:
+ dict(link=('skirt_kpt6', 'skirt_kpt7'), id=191, color=[64, 128, 128]),
+ 192:
+ dict(link=('skirt_kpt7', 'skirt_kpt8'), id=192, color=[64, 128, 128]),
+ 193:
+ dict(link=('skirt_kpt8', 'skirt_kpt3'), id=193, color=[64, 128, 128]),
+ 194:
+ dict(link=('skirt_kpt3', 'skirt_kpt2'), id=194, color=[64, 128, 128]),
+ 195:
+ dict(link=('skirt_kpt2', 'skirt_kpt1'), id=195, color=[64, 128, 128]),
+ # short_sleeved_dress
+ 196:
+ dict(link=('ssd_kpt1', 'ssd_kpt2'), id=196, color=[64, 64, 128]),
+ 197:
+ dict(link=('ssd_kpt2', 'ssd_kpt7'), id=197, color=[64, 64, 128]),
+ 198:
+ dict(link=('ssd_kpt7', 'ssd_kpt8'), id=198, color=[64, 64, 128]),
+ 199:
+ dict(link=('ssd_kpt8', 'ssd_kpt9'), id=199, color=[64, 64, 128]),
+ 200:
+ dict(link=('ssd_kpt9', 'ssd_kpt10'), id=200, color=[64, 64, 128]),
+ 201:
+ dict(link=('ssd_kpt10', 'ssd_kpt11'), id=201, color=[64, 64, 128]),
+ 202:
+ dict(link=('ssd_kpt11', 'ssd_kpt12'), id=202, color=[64, 64, 128]),
+ 203:
+ dict(link=('ssd_kpt12', 'ssd_kpt13'), id=203, color=[64, 64, 128]),
+ 204:
+ dict(link=('ssd_kpt13', 'ssd_kpt14'), id=204, color=[64, 64, 128]),
+ 205:
+ dict(link=('ssd_kpt14', 'ssd_kpt15'), id=205, color=[64, 64, 128]),
+ 206:
+ dict(link=('ssd_kpt15', 'ssd_kpt16'), id=206, color=[64, 64, 128]),
+ 207:
+ dict(link=('ssd_kpt16', 'ssd_kpt17'), id=207, color=[64, 64, 128]),
+ 208:
+ dict(link=('ssd_kpt17', 'ssd_kpt18'), id=208, color=[64, 64, 128]),
+ 209:
+ dict(link=('ssd_kpt18', 'ssd_kpt19'), id=209, color=[64, 64, 128]),
+ 210:
+ dict(link=('ssd_kpt19', 'ssd_kpt20'), id=210, color=[64, 64, 128]),
+ 211:
+ dict(link=('ssd_kpt20', 'ssd_kpt21'), id=211, color=[64, 64, 128]),
+ 212:
+ dict(link=('ssd_kpt21', 'ssd_kpt22'), id=212, color=[64, 64, 128]),
+ 213:
+ dict(link=('ssd_kpt22', 'ssd_kpt23'), id=213, color=[64, 64, 128]),
+ 214:
+ dict(link=('ssd_kpt23', 'ssd_kpt24'), id=214, color=[64, 64, 128]),
+ 215:
+ dict(link=('ssd_kpt24', 'ssd_kpt25'), id=215, color=[64, 64, 128]),
+ 216:
+ dict(link=('ssd_kpt25', 'ssd_kpt26'), id=216, color=[64, 64, 128]),
+ 217:
+ dict(link=('ssd_kpt26', 'ssd_kpt27'), id=217, color=[64, 64, 128]),
+ 218:
+ dict(link=('ssd_kpt27', 'ssd_kpt28'), id=218, color=[64, 64, 128]),
+ 219:
+ dict(link=('ssd_kpt28', 'ssd_kpt29'), id=219, color=[64, 64, 128]),
+ 220:
+ dict(link=('ssd_kpt29', 'ssd_kpt6'), id=220, color=[64, 64, 128]),
+ 221:
+ dict(link=('ssd_kpt6', 'ssd_kpt5'), id=221, color=[64, 64, 128]),
+ 222:
+ dict(link=('ssd_kpt5', 'ssd_kpt4'), id=222, color=[64, 64, 128]),
+ 223:
+ dict(link=('ssd_kpt4', 'ssd_kpt3'), id=223, color=[64, 64, 128]),
+ 224:
+ dict(link=('ssd_kpt3', 'ssd_kpt2'), id=224, color=[64, 64, 128]),
+ 225:
+ dict(link=('ssd_kpt6', 'ssd_kpt1'), id=225, color=[64, 64, 128]),
+ # long_sleeved_dress
+ 226:
+ dict(link=('lsd_kpt1', 'lsd_kpt2'), id=226, color=[128, 64, 0]),
+ 227:
+ dict(link=('lsd_kpt2', 'lsd_kpt7'), id=228, color=[128, 64, 0]),
+ 228:
+ dict(link=('lsd_kpt7', 'lsd_kpt8'), id=228, color=[128, 64, 0]),
+ 229:
+ dict(link=('lsd_kpt8', 'lsd_kpt9'), id=229, color=[128, 64, 0]),
+ 230:
+ dict(link=('lsd_kpt9', 'lsd_kpt10'), id=230, color=[128, 64, 0]),
+ 231:
+ dict(link=('lsd_kpt10', 'lsd_kpt11'), id=231, color=[128, 64, 0]),
+ 232:
+ dict(link=('lsd_kpt11', 'lsd_kpt12'), id=232, color=[128, 64, 0]),
+ 233:
+ dict(link=('lsd_kpt12', 'lsd_kpt13'), id=233, color=[128, 64, 0]),
+ 234:
+ dict(link=('lsd_kpt13', 'lsd_kpt14'), id=234, color=[128, 64, 0]),
+ 235:
+ dict(link=('lsd_kpt14', 'lsd_kpt15'), id=235, color=[128, 64, 0]),
+ 236:
+ dict(link=('lsd_kpt15', 'lsd_kpt16'), id=236, color=[128, 64, 0]),
+ 237:
+ dict(link=('lsd_kpt16', 'lsd_kpt17'), id=237, color=[128, 64, 0]),
+ 238:
+ dict(link=('lsd_kpt17', 'lsd_kpt18'), id=238, color=[128, 64, 0]),
+ 239:
+ dict(link=('lsd_kpt18', 'lsd_kpt19'), id=239, color=[128, 64, 0]),
+ 240:
+ dict(link=('lsd_kpt19', 'lsd_kpt20'), id=240, color=[128, 64, 0]),
+ 241:
+ dict(link=('lsd_kpt20', 'lsd_kpt21'), id=241, color=[128, 64, 0]),
+ 242:
+ dict(link=('lsd_kpt21', 'lsd_kpt22'), id=242, color=[128, 64, 0]),
+ 243:
+ dict(link=('lsd_kpt22', 'lsd_kpt23'), id=243, color=[128, 64, 0]),
+ 244:
+ dict(link=('lsd_kpt23', 'lsd_kpt24'), id=244, color=[128, 64, 0]),
+ 245:
+ dict(link=('lsd_kpt24', 'lsd_kpt25'), id=245, color=[128, 64, 0]),
+ 246:
+ dict(link=('lsd_kpt25', 'lsd_kpt26'), id=246, color=[128, 64, 0]),
+ 247:
+ dict(link=('lsd_kpt26', 'lsd_kpt27'), id=247, color=[128, 64, 0]),
+ 248:
+ dict(link=('lsd_kpt27', 'lsd_kpt28'), id=248, color=[128, 64, 0]),
+ 249:
+ dict(link=('lsd_kpt28', 'lsd_kpt29'), id=249, color=[128, 64, 0]),
+ 250:
+ dict(link=('lsd_kpt29', 'lsd_kpt30'), id=250, color=[128, 64, 0]),
+ 251:
+ dict(link=('lsd_kpt30', 'lsd_kpt31'), id=251, color=[128, 64, 0]),
+ 252:
+ dict(link=('lsd_kpt31', 'lsd_kpt32'), id=252, color=[128, 64, 0]),
+ 253:
+ dict(link=('lsd_kpt32', 'lsd_kpt33'), id=253, color=[128, 64, 0]),
+ 254:
+ dict(link=('lsd_kpt33', 'lsd_kpt34'), id=254, color=[128, 64, 0]),
+ 255:
+ dict(link=('lsd_kpt34', 'lsd_kpt35'), id=255, color=[128, 64, 0]),
+ 256:
+ dict(link=('lsd_kpt35', 'lsd_kpt36'), id=256, color=[128, 64, 0]),
+ 257:
+ dict(link=('lsd_kpt36', 'lsd_kpt37'), id=257, color=[128, 64, 0]),
+ 258:
+ dict(link=('lsd_kpt37', 'lsd_kpt6'), id=258, color=[128, 64, 0]),
+ 259:
+ dict(link=('lsd_kpt6', 'lsd_kpt5'), id=259, color=[128, 64, 0]),
+ 260:
+ dict(link=('lsd_kpt5', 'lsd_kpt4'), id=260, color=[128, 64, 0]),
+ 261:
+ dict(link=('lsd_kpt4', 'lsd_kpt3'), id=261, color=[128, 64, 0]),
+ 262:
+ dict(link=('lsd_kpt3', 'lsd_kpt2'), id=262, color=[128, 64, 0]),
+ 263:
+ dict(link=('lsd_kpt6', 'lsd_kpt1'), id=263, color=[128, 64, 0]),
+ # vest_dress
+ 264:
+ dict(link=('vd_kpt1', 'vd_kpt2'), id=264, color=[128, 64, 255]),
+ 265:
+ dict(link=('vd_kpt2', 'vd_kpt7'), id=265, color=[128, 64, 255]),
+ 266:
+ dict(link=('vd_kpt7', 'vd_kpt8'), id=266, color=[128, 64, 255]),
+ 267:
+ dict(link=('vd_kpt8', 'vd_kpt9'), id=267, color=[128, 64, 255]),
+ 268:
+ dict(link=('vd_kpt9', 'vd_kpt10'), id=268, color=[128, 64, 255]),
+ 269:
+ dict(link=('vd_kpt10', 'vd_kpt11'), id=269, color=[128, 64, 255]),
+ 270:
+ dict(link=('vd_kpt11', 'vd_kpt12'), id=270, color=[128, 64, 255]),
+ 271:
+ dict(link=('vd_kpt12', 'vd_kpt13'), id=271, color=[128, 64, 255]),
+ 272:
+ dict(link=('vd_kpt13', 'vd_kpt14'), id=272, color=[128, 64, 255]),
+ 273:
+ dict(link=('vd_kpt14', 'vd_kpt15'), id=273, color=[128, 64, 255]),
+ 274:
+ dict(link=('vd_kpt15', 'vd_kpt16'), id=274, color=[128, 64, 255]),
+ 275:
+ dict(link=('vd_kpt16', 'vd_kpt17'), id=275, color=[128, 64, 255]),
+ 276:
+ dict(link=('vd_kpt17', 'vd_kpt18'), id=276, color=[128, 64, 255]),
+ 277:
+ dict(link=('vd_kpt18', 'vd_kpt19'), id=277, color=[128, 64, 255]),
+ 278:
+ dict(link=('vd_kpt19', 'vd_kpt6'), id=278, color=[128, 64, 255]),
+ 279:
+ dict(link=('vd_kpt6', 'vd_kpt5'), id=279, color=[128, 64, 255]),
+ 280:
+ dict(link=('vd_kpt5', 'vd_kpt4'), id=280, color=[128, 64, 255]),
+ 281:
+ dict(link=('vd_kpt4', 'vd_kpt3'), id=281, color=[128, 64, 255]),
+ 282:
+ dict(link=('vd_kpt3', 'vd_kpt2'), id=282, color=[128, 64, 255]),
+ 283:
+ dict(link=('vd_kpt6', 'vd_kpt1'), id=283, color=[128, 64, 255]),
+ # sling_dress
+ 284:
+ dict(link=('sd_kpt1', 'sd_kpt2'), id=284, color=[128, 64, 0]),
+ 285:
+ dict(link=('sd_kpt2', 'sd_kpt8'), id=285, color=[128, 64, 0]),
+ 286:
+ dict(link=('sd_kpt8', 'sd_kpt9'), id=286, color=[128, 64, 0]),
+ 287:
+ dict(link=('sd_kpt9', 'sd_kpt10'), id=287, color=[128, 64, 0]),
+ 288:
+ dict(link=('sd_kpt10', 'sd_kpt11'), id=288, color=[128, 64, 0]),
+ 289:
+ dict(link=('sd_kpt11', 'sd_kpt12'), id=289, color=[128, 64, 0]),
+ 290:
+ dict(link=('sd_kpt12', 'sd_kpt13'), id=290, color=[128, 64, 0]),
+ 291:
+ dict(link=('sd_kpt13', 'sd_kpt14'), id=291, color=[128, 64, 0]),
+ 292:
+ dict(link=('sd_kpt14', 'sd_kpt15'), id=292, color=[128, 64, 0]),
+ 293:
+ dict(link=('sd_kpt15', 'sd_kpt16'), id=293, color=[128, 64, 0]),
+ 294:
+ dict(link=('sd_kpt16', 'sd_kpt17'), id=294, color=[128, 64, 0]),
+ 295:
+ dict(link=('sd_kpt17', 'sd_kpt18'), id=295, color=[128, 64, 0]),
+ 296:
+ dict(link=('sd_kpt18', 'sd_kpt6'), id=296, color=[128, 64, 0]),
+ 297:
+ dict(link=('sd_kpt6', 'sd_kpt5'), id=297, color=[128, 64, 0]),
+ 298:
+ dict(link=('sd_kpt5', 'sd_kpt4'), id=298, color=[128, 64, 0]),
+ 299:
+ dict(link=('sd_kpt4', 'sd_kpt3'), id=299, color=[128, 64, 0]),
+ 300:
+ dict(link=('sd_kpt3', 'sd_kpt2'), id=300, color=[128, 64, 0]),
+ 301:
+ dict(link=('sd_kpt2', 'sd_kpt7'), id=301, color=[128, 64, 0]),
+ 302:
+ dict(link=('sd_kpt6', 'sd_kpt19'), id=302, color=[128, 64, 0]),
+ 303:
+ dict(link=('sd_kpt6', 'sd_kpt1'), id=303, color=[128, 64, 0]),
+ },
+ joint_weights=[1.] * 294,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/deepfashion_full.py b/mmpose/configs/_base_/datasets/deepfashion_full.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d989069ee7253d3a5b5f01c81135b1a472cd4b2
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/deepfashion_full.py
@@ -0,0 +1,74 @@
+dataset_info = dict(
+ dataset_name='deepfashion_full',
+ paper_info=dict(
+ author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
+ 'and Wang, Xiaogang and Tang, Xiaoou',
+ title='DeepFashion: Powering Robust Clothes Recognition '
+ 'and Retrieval with Rich Annotations',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2016',
+ homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
+ 'DeepFashion/LandmarkDetection.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left collar',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right collar'),
+ 1:
+ dict(
+ name='right collar',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left collar'),
+ 2:
+ dict(
+ name='left sleeve',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right sleeve'),
+ 3:
+ dict(
+ name='right sleeve',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left sleeve'),
+ 4:
+ dict(
+ name='left waistline',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right waistline'),
+ 5:
+ dict(
+ name='right waistline',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left waistline'),
+ 6:
+ dict(
+ name='left hem',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right hem'),
+ 7:
+ dict(
+ name='right hem',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left hem'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 8,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/deepfashion_lower.py b/mmpose/configs/_base_/datasets/deepfashion_lower.py
new file mode 100644
index 0000000000000000000000000000000000000000..db014a1747ca618f93a7d092d29027015b48ae3c
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/deepfashion_lower.py
@@ -0,0 +1,46 @@
+dataset_info = dict(
+ dataset_name='deepfashion_lower',
+ paper_info=dict(
+ author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
+ 'and Wang, Xiaogang and Tang, Xiaoou',
+ title='DeepFashion: Powering Robust Clothes Recognition '
+ 'and Retrieval with Rich Annotations',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2016',
+ homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
+ 'DeepFashion/LandmarkDetection.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left waistline',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right waistline'),
+ 1:
+ dict(
+ name='right waistline',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left waistline'),
+ 2:
+ dict(
+ name='left hem',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right hem'),
+ 3:
+ dict(
+ name='right hem',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left hem'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 4,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/deepfashion_upper.py b/mmpose/configs/_base_/datasets/deepfashion_upper.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0b012fd37bee1ba5ed956a7a5465a8623bf0894
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/deepfashion_upper.py
@@ -0,0 +1,60 @@
+dataset_info = dict(
+ dataset_name='deepfashion_upper',
+ paper_info=dict(
+ author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
+ 'and Wang, Xiaogang and Tang, Xiaoou',
+ title='DeepFashion: Powering Robust Clothes Recognition '
+ 'and Retrieval with Rich Annotations',
+ container='Proceedings of IEEE Conference on Computer '
+ 'Vision and Pattern Recognition (CVPR)',
+ year='2016',
+ homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
+ 'DeepFashion/LandmarkDetection.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left collar',
+ id=0,
+ color=[255, 255, 255],
+ type='',
+ swap='right collar'),
+ 1:
+ dict(
+ name='right collar',
+ id=1,
+ color=[255, 255, 255],
+ type='',
+ swap='left collar'),
+ 2:
+ dict(
+ name='left sleeve',
+ id=2,
+ color=[255, 255, 255],
+ type='',
+ swap='right sleeve'),
+ 3:
+ dict(
+ name='right sleeve',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='left sleeve'),
+ 4:
+ dict(
+ name='left hem',
+ id=4,
+ color=[255, 255, 255],
+ type='',
+ swap='right hem'),
+ 5:
+ dict(
+ name='right hem',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='left hem'),
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 6,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/fly.py b/mmpose/configs/_base_/datasets/fly.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f94ff57ca93d8f562b6a61b9a67198abdcde217
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/fly.py
@@ -0,0 +1,237 @@
+dataset_info = dict(
+ dataset_name='fly',
+ paper_info=dict(
+ author='Pereira, Talmo D and Aldarondo, Diego E and '
+ 'Willmore, Lindsay and Kislin, Mikhail and '
+ 'Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W',
+ title='Fast animal pose estimation using deep neural networks',
+ container='Nature methods',
+ year='2019',
+ homepage='https://github.com/jgraving/DeepPoseKit-Data',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='head', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='eyeL', id=1, color=[255, 255, 255], type='', swap='eyeR'),
+ 2:
+ dict(name='eyeR', id=2, color=[255, 255, 255], type='', swap='eyeL'),
+ 3:
+ dict(name='neck', id=3, color=[255, 255, 255], type='', swap=''),
+ 4:
+ dict(name='thorax', id=4, color=[255, 255, 255], type='', swap=''),
+ 5:
+ dict(name='abdomen', id=5, color=[255, 255, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forelegR1',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL1'),
+ 7:
+ dict(
+ name='forelegR2',
+ id=7,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL2'),
+ 8:
+ dict(
+ name='forelegR3',
+ id=8,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL3'),
+ 9:
+ dict(
+ name='forelegR4',
+ id=9,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL4'),
+ 10:
+ dict(
+ name='midlegR1',
+ id=10,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL1'),
+ 11:
+ dict(
+ name='midlegR2',
+ id=11,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL2'),
+ 12:
+ dict(
+ name='midlegR3',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL3'),
+ 13:
+ dict(
+ name='midlegR4',
+ id=13,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL4'),
+ 14:
+ dict(
+ name='hindlegR1',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL1'),
+ 15:
+ dict(
+ name='hindlegR2',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL2'),
+ 16:
+ dict(
+ name='hindlegR3',
+ id=16,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL3'),
+ 17:
+ dict(
+ name='hindlegR4',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL4'),
+ 18:
+ dict(
+ name='forelegL1',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR1'),
+ 19:
+ dict(
+ name='forelegL2',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR2'),
+ 20:
+ dict(
+ name='forelegL3',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR3'),
+ 21:
+ dict(
+ name='forelegL4',
+ id=21,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR4'),
+ 22:
+ dict(
+ name='midlegL1',
+ id=22,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR1'),
+ 23:
+ dict(
+ name='midlegL2',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR2'),
+ 24:
+ dict(
+ name='midlegL3',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR3'),
+ 25:
+ dict(
+ name='midlegL4',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR4'),
+ 26:
+ dict(
+ name='hindlegL1',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR1'),
+ 27:
+ dict(
+ name='hindlegL2',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR2'),
+ 28:
+ dict(
+ name='hindlegL3',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR3'),
+ 29:
+ dict(
+ name='hindlegL4',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR4'),
+ 30:
+ dict(
+ name='wingL', id=30, color=[255, 255, 255], type='', swap='wingR'),
+ 31:
+ dict(
+ name='wingR', id=31, color=[255, 255, 255], type='', swap='wingL'),
+ },
+ skeleton_info={
+ 0: dict(link=('eyeL', 'head'), id=0, color=[255, 255, 255]),
+ 1: dict(link=('eyeR', 'head'), id=1, color=[255, 255, 255]),
+ 2: dict(link=('neck', 'head'), id=2, color=[255, 255, 255]),
+ 3: dict(link=('thorax', 'neck'), id=3, color=[255, 255, 255]),
+ 4: dict(link=('abdomen', 'thorax'), id=4, color=[255, 255, 255]),
+ 5: dict(link=('forelegR2', 'forelegR1'), id=5, color=[255, 255, 255]),
+ 6: dict(link=('forelegR3', 'forelegR2'), id=6, color=[255, 255, 255]),
+ 7: dict(link=('forelegR4', 'forelegR3'), id=7, color=[255, 255, 255]),
+ 8: dict(link=('midlegR2', 'midlegR1'), id=8, color=[255, 255, 255]),
+ 9: dict(link=('midlegR3', 'midlegR2'), id=9, color=[255, 255, 255]),
+ 10: dict(link=('midlegR4', 'midlegR3'), id=10, color=[255, 255, 255]),
+ 11:
+ dict(link=('hindlegR2', 'hindlegR1'), id=11, color=[255, 255, 255]),
+ 12:
+ dict(link=('hindlegR3', 'hindlegR2'), id=12, color=[255, 255, 255]),
+ 13:
+ dict(link=('hindlegR4', 'hindlegR3'), id=13, color=[255, 255, 255]),
+ 14:
+ dict(link=('forelegL2', 'forelegL1'), id=14, color=[255, 255, 255]),
+ 15:
+ dict(link=('forelegL3', 'forelegL2'), id=15, color=[255, 255, 255]),
+ 16:
+ dict(link=('forelegL4', 'forelegL3'), id=16, color=[255, 255, 255]),
+ 17: dict(link=('midlegL2', 'midlegL1'), id=17, color=[255, 255, 255]),
+ 18: dict(link=('midlegL3', 'midlegL2'), id=18, color=[255, 255, 255]),
+ 19: dict(link=('midlegL4', 'midlegL3'), id=19, color=[255, 255, 255]),
+ 20:
+ dict(link=('hindlegL2', 'hindlegL1'), id=20, color=[255, 255, 255]),
+ 21:
+ dict(link=('hindlegL3', 'hindlegL2'), id=21, color=[255, 255, 255]),
+ 22:
+ dict(link=('hindlegL4', 'hindlegL3'), id=22, color=[255, 255, 255]),
+ 23: dict(link=('wingL', 'neck'), id=23, color=[255, 255, 255]),
+ 24: dict(link=('wingR', 'neck'), id=24, color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 32,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/freihand2d.py b/mmpose/configs/_base_/datasets/freihand2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b960d10f3538801531dbccdd67aeac6e73ac572
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/freihand2d.py
@@ -0,0 +1,144 @@
+dataset_info = dict(
+ dataset_name='freihand',
+ paper_info=dict(
+ author='Zimmermann, Christian and Ceylan, Duygu and '
+ 'Yang, Jimei and Russell, Bryan and '
+ 'Argus, Max and Brox, Thomas',
+ title='Freihand: A dataset for markerless capture of hand pose '
+ 'and shape from single rgb images',
+ container='Proceedings of the IEEE International '
+ 'Conference on Computer Vision',
+ year='2019',
+ homepage='https://lmb.informatik.uni-freiburg.de/projects/freihand/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/h36m.py b/mmpose/configs/_base_/datasets/h36m.py
new file mode 100644
index 0000000000000000000000000000000000000000..00a719d8b19f9ff3c5ef98476d73216055bf9186
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/h36m.py
@@ -0,0 +1,152 @@
+dataset_info = dict(
+ dataset_name='h36m',
+ paper_info=dict(
+ author='Ionescu, Catalin and Papava, Dragos and '
+ 'Olaru, Vlad and Sminchisescu, Cristian',
+ title='Human3.6M: Large Scale Datasets and Predictive '
+ 'Methods for 3D Human Sensing in Natural Environments',
+ container='IEEE Transactions on Pattern Analysis and '
+ 'Machine Intelligence',
+ year='2014',
+ homepage='http://vision.imar.ro/human3.6m/description.php',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='root', id=0, color=[51, 153, 255], type='lower', swap=''),
+ 1:
+ dict(
+ name='right_hip',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 2:
+ dict(
+ name='right_knee',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 3:
+ dict(
+ name='right_foot',
+ id=3,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_foot'),
+ 4:
+ dict(
+ name='left_hip',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 5:
+ dict(
+ name='left_knee',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 6:
+ dict(
+ name='left_foot',
+ id=6,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_foot'),
+ 7:
+ dict(name='spine', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(name='thorax', id=8, color=[51, 153, 255], type='upper', swap=''),
+ 9:
+ dict(
+ name='neck_base',
+ id=9,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 10:
+ dict(name='head', id=10, color=[51, 153, 255], type='upper', swap=''),
+ 11:
+ dict(
+ name='left_shoulder',
+ id=11,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 12:
+ dict(
+ name='left_elbow',
+ id=12,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 13:
+ dict(
+ name='left_wrist',
+ id=13,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 14:
+ dict(
+ name='right_shoulder',
+ id=14,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 15:
+ dict(
+ name='right_elbow',
+ id=15,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 16:
+ dict(
+ name='right_wrist',
+ id=16,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('root', 'left_hip'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_hip', 'left_knee'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('left_knee', 'left_foot'), id=2, color=[0, 255, 0]),
+ 3:
+ dict(link=('root', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('right_hip', 'right_knee'), id=4, color=[255, 128, 0]),
+ 5:
+ dict(link=('right_knee', 'right_foot'), id=5, color=[255, 128, 0]),
+ 6:
+ dict(link=('root', 'spine'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('spine', 'thorax'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('thorax', 'neck_base'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('neck_base', 'head'), id=9, color=[51, 153, 255]),
+ 10:
+ dict(link=('thorax', 'left_shoulder'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('left_shoulder', 'left_elbow'), id=11, color=[0, 255, 0]),
+ 12:
+ dict(link=('left_elbow', 'left_wrist'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('thorax', 'right_shoulder'), id=13, color=[255, 128, 0]),
+ 14:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=14, color=[255, 128,
+ 0]),
+ 15:
+ dict(link=('right_elbow', 'right_wrist'), id=15, color=[255, 128, 0])
+ },
+ joint_weights=[1.] * 17,
+ sigmas=[],
+ stats_info=dict(bbox_center=(528., 427.), bbox_scale=400.))
diff --git a/mmpose/configs/_base_/datasets/halpe.py b/mmpose/configs/_base_/datasets/halpe.py
new file mode 100644
index 0000000000000000000000000000000000000000..1385fe81dc2190684f2142449c0f288f2cb74c1a
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/halpe.py
@@ -0,0 +1,1157 @@
+dataset_info = dict(
+ dataset_name='halpe',
+ paper_info=dict(
+ author='Li, Yong-Lu and Xu, Liang and Liu, Xinpeng and Huang, Xijie'
+ ' and Xu, Yue and Wang, Shiyi and Fang, Hao-Shu'
+ ' and Ma, Ze and Chen, Mingyang and Lu, Cewu',
+ title='PaStaNet: Toward Human Activity Knowledge Engine',
+ container='CVPR',
+ year='2020',
+ homepage='https://github.com/Fang-Haoshu/Halpe-FullBody/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 17:
+ dict(name='head', id=17, color=[255, 128, 0], type='upper', swap=''),
+ 18:
+ dict(name='neck', id=18, color=[255, 128, 0], type='upper', swap=''),
+ 19:
+ dict(name='hip', id=19, color=[255, 128, 0], type='lower', swap=''),
+ 20:
+ dict(
+ name='left_big_toe',
+ id=20,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_big_toe'),
+ 21:
+ dict(
+ name='right_big_toe',
+ id=21,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_big_toe'),
+ 22:
+ dict(
+ name='left_small_toe',
+ id=22,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_small_toe'),
+ 23:
+ dict(
+ name='right_small_toe',
+ id=23,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_small_toe'),
+ 24:
+ dict(
+ name='left_heel',
+ id=24,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_heel'),
+ 25:
+ dict(
+ name='right_heel',
+ id=25,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_heel'),
+ 26:
+ dict(
+ name='face-0',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='face-16'),
+ 27:
+ dict(
+ name='face-1',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='face-15'),
+ 28:
+ dict(
+ name='face-2',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='face-14'),
+ 29:
+ dict(
+ name='face-3',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='face-13'),
+ 30:
+ dict(
+ name='face-4',
+ id=30,
+ color=[255, 255, 255],
+ type='',
+ swap='face-12'),
+ 31:
+ dict(
+ name='face-5',
+ id=31,
+ color=[255, 255, 255],
+ type='',
+ swap='face-11'),
+ 32:
+ dict(
+ name='face-6',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='face-10'),
+ 33:
+ dict(
+ name='face-7',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='face-9'),
+ 34:
+ dict(name='face-8', id=34, color=[255, 255, 255], type='', swap=''),
+ 35:
+ dict(
+ name='face-9',
+ id=35,
+ color=[255, 255, 255],
+ type='',
+ swap='face-7'),
+ 36:
+ dict(
+ name='face-10',
+ id=36,
+ color=[255, 255, 255],
+ type='',
+ swap='face-6'),
+ 37:
+ dict(
+ name='face-11',
+ id=37,
+ color=[255, 255, 255],
+ type='',
+ swap='face-5'),
+ 38:
+ dict(
+ name='face-12',
+ id=38,
+ color=[255, 255, 255],
+ type='',
+ swap='face-4'),
+ 39:
+ dict(
+ name='face-13',
+ id=39,
+ color=[255, 255, 255],
+ type='',
+ swap='face-3'),
+ 40:
+ dict(
+ name='face-14',
+ id=40,
+ color=[255, 255, 255],
+ type='',
+ swap='face-2'),
+ 41:
+ dict(
+ name='face-15',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='face-1'),
+ 42:
+ dict(
+ name='face-16',
+ id=42,
+ color=[255, 255, 255],
+ type='',
+ swap='face-0'),
+ 43:
+ dict(
+ name='face-17',
+ id=43,
+ color=[255, 255, 255],
+ type='',
+ swap='face-26'),
+ 44:
+ dict(
+ name='face-18',
+ id=44,
+ color=[255, 255, 255],
+ type='',
+ swap='face-25'),
+ 45:
+ dict(
+ name='face-19',
+ id=45,
+ color=[255, 255, 255],
+ type='',
+ swap='face-24'),
+ 46:
+ dict(
+ name='face-20',
+ id=46,
+ color=[255, 255, 255],
+ type='',
+ swap='face-23'),
+ 47:
+ dict(
+ name='face-21',
+ id=47,
+ color=[255, 255, 255],
+ type='',
+ swap='face-22'),
+ 48:
+ dict(
+ name='face-22',
+ id=48,
+ color=[255, 255, 255],
+ type='',
+ swap='face-21'),
+ 49:
+ dict(
+ name='face-23',
+ id=49,
+ color=[255, 255, 255],
+ type='',
+ swap='face-20'),
+ 50:
+ dict(
+ name='face-24',
+ id=50,
+ color=[255, 255, 255],
+ type='',
+ swap='face-19'),
+ 51:
+ dict(
+ name='face-25',
+ id=51,
+ color=[255, 255, 255],
+ type='',
+ swap='face-18'),
+ 52:
+ dict(
+ name='face-26',
+ id=52,
+ color=[255, 255, 255],
+ type='',
+ swap='face-17'),
+ 53:
+ dict(name='face-27', id=53, color=[255, 255, 255], type='', swap=''),
+ 54:
+ dict(name='face-28', id=54, color=[255, 255, 255], type='', swap=''),
+ 55:
+ dict(name='face-29', id=55, color=[255, 255, 255], type='', swap=''),
+ 56:
+ dict(name='face-30', id=56, color=[255, 255, 255], type='', swap=''),
+ 57:
+ dict(
+ name='face-31',
+ id=57,
+ color=[255, 255, 255],
+ type='',
+ swap='face-35'),
+ 58:
+ dict(
+ name='face-32',
+ id=58,
+ color=[255, 255, 255],
+ type='',
+ swap='face-34'),
+ 59:
+ dict(name='face-33', id=59, color=[255, 255, 255], type='', swap=''),
+ 60:
+ dict(
+ name='face-34',
+ id=60,
+ color=[255, 255, 255],
+ type='',
+ swap='face-32'),
+ 61:
+ dict(
+ name='face-35',
+ id=61,
+ color=[255, 255, 255],
+ type='',
+ swap='face-31'),
+ 62:
+ dict(
+ name='face-36',
+ id=62,
+ color=[255, 255, 255],
+ type='',
+ swap='face-45'),
+ 63:
+ dict(
+ name='face-37',
+ id=63,
+ color=[255, 255, 255],
+ type='',
+ swap='face-44'),
+ 64:
+ dict(
+ name='face-38',
+ id=64,
+ color=[255, 255, 255],
+ type='',
+ swap='face-43'),
+ 65:
+ dict(
+ name='face-39',
+ id=65,
+ color=[255, 255, 255],
+ type='',
+ swap='face-42'),
+ 66:
+ dict(
+ name='face-40',
+ id=66,
+ color=[255, 255, 255],
+ type='',
+ swap='face-47'),
+ 67:
+ dict(
+ name='face-41',
+ id=67,
+ color=[255, 255, 255],
+ type='',
+ swap='face-46'),
+ 68:
+ dict(
+ name='face-42',
+ id=68,
+ color=[255, 255, 255],
+ type='',
+ swap='face-39'),
+ 69:
+ dict(
+ name='face-43',
+ id=69,
+ color=[255, 255, 255],
+ type='',
+ swap='face-38'),
+ 70:
+ dict(
+ name='face-44',
+ id=70,
+ color=[255, 255, 255],
+ type='',
+ swap='face-37'),
+ 71:
+ dict(
+ name='face-45',
+ id=71,
+ color=[255, 255, 255],
+ type='',
+ swap='face-36'),
+ 72:
+ dict(
+ name='face-46',
+ id=72,
+ color=[255, 255, 255],
+ type='',
+ swap='face-41'),
+ 73:
+ dict(
+ name='face-47',
+ id=73,
+ color=[255, 255, 255],
+ type='',
+ swap='face-40'),
+ 74:
+ dict(
+ name='face-48',
+ id=74,
+ color=[255, 255, 255],
+ type='',
+ swap='face-54'),
+ 75:
+ dict(
+ name='face-49',
+ id=75,
+ color=[255, 255, 255],
+ type='',
+ swap='face-53'),
+ 76:
+ dict(
+ name='face-50',
+ id=76,
+ color=[255, 255, 255],
+ type='',
+ swap='face-52'),
+ 77:
+ dict(name='face-51', id=77, color=[255, 255, 255], type='', swap=''),
+ 78:
+ dict(
+ name='face-52',
+ id=78,
+ color=[255, 255, 255],
+ type='',
+ swap='face-50'),
+ 79:
+ dict(
+ name='face-53',
+ id=79,
+ color=[255, 255, 255],
+ type='',
+ swap='face-49'),
+ 80:
+ dict(
+ name='face-54',
+ id=80,
+ color=[255, 255, 255],
+ type='',
+ swap='face-48'),
+ 81:
+ dict(
+ name='face-55',
+ id=81,
+ color=[255, 255, 255],
+ type='',
+ swap='face-59'),
+ 82:
+ dict(
+ name='face-56',
+ id=82,
+ color=[255, 255, 255],
+ type='',
+ swap='face-58'),
+ 83:
+ dict(name='face-57', id=83, color=[255, 255, 255], type='', swap=''),
+ 84:
+ dict(
+ name='face-58',
+ id=84,
+ color=[255, 255, 255],
+ type='',
+ swap='face-56'),
+ 85:
+ dict(
+ name='face-59',
+ id=85,
+ color=[255, 255, 255],
+ type='',
+ swap='face-55'),
+ 86:
+ dict(
+ name='face-60',
+ id=86,
+ color=[255, 255, 255],
+ type='',
+ swap='face-64'),
+ 87:
+ dict(
+ name='face-61',
+ id=87,
+ color=[255, 255, 255],
+ type='',
+ swap='face-63'),
+ 88:
+ dict(name='face-62', id=88, color=[255, 255, 255], type='', swap=''),
+ 89:
+ dict(
+ name='face-63',
+ id=89,
+ color=[255, 255, 255],
+ type='',
+ swap='face-61'),
+ 90:
+ dict(
+ name='face-64',
+ id=90,
+ color=[255, 255, 255],
+ type='',
+ swap='face-60'),
+ 91:
+ dict(
+ name='face-65',
+ id=91,
+ color=[255, 255, 255],
+ type='',
+ swap='face-67'),
+ 92:
+ dict(name='face-66', id=92, color=[255, 255, 255], type='', swap=''),
+ 93:
+ dict(
+ name='face-67',
+ id=93,
+ color=[255, 255, 255],
+ type='',
+ swap='face-65'),
+ 94:
+ dict(
+ name='left_hand_root',
+ id=94,
+ color=[255, 255, 255],
+ type='',
+ swap='right_hand_root'),
+ 95:
+ dict(
+ name='left_thumb1',
+ id=95,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb1'),
+ 96:
+ dict(
+ name='left_thumb2',
+ id=96,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb2'),
+ 97:
+ dict(
+ name='left_thumb3',
+ id=97,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb3'),
+ 98:
+ dict(
+ name='left_thumb4',
+ id=98,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb4'),
+ 99:
+ dict(
+ name='left_forefinger1',
+ id=99,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger1'),
+ 100:
+ dict(
+ name='left_forefinger2',
+ id=100,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger2'),
+ 101:
+ dict(
+ name='left_forefinger3',
+ id=101,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger3'),
+ 102:
+ dict(
+ name='left_forefinger4',
+ id=102,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger4'),
+ 103:
+ dict(
+ name='left_middle_finger1',
+ id=103,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger1'),
+ 104:
+ dict(
+ name='left_middle_finger2',
+ id=104,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger2'),
+ 105:
+ dict(
+ name='left_middle_finger3',
+ id=105,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger3'),
+ 106:
+ dict(
+ name='left_middle_finger4',
+ id=106,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger4'),
+ 107:
+ dict(
+ name='left_ring_finger1',
+ id=107,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger1'),
+ 108:
+ dict(
+ name='left_ring_finger2',
+ id=108,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger2'),
+ 109:
+ dict(
+ name='left_ring_finger3',
+ id=109,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger3'),
+ 110:
+ dict(
+ name='left_ring_finger4',
+ id=110,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger4'),
+ 111:
+ dict(
+ name='left_pinky_finger1',
+ id=111,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger1'),
+ 112:
+ dict(
+ name='left_pinky_finger2',
+ id=112,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger2'),
+ 113:
+ dict(
+ name='left_pinky_finger3',
+ id=113,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger3'),
+ 114:
+ dict(
+ name='left_pinky_finger4',
+ id=114,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger4'),
+ 115:
+ dict(
+ name='right_hand_root',
+ id=115,
+ color=[255, 255, 255],
+ type='',
+ swap='left_hand_root'),
+ 116:
+ dict(
+ name='right_thumb1',
+ id=116,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb1'),
+ 117:
+ dict(
+ name='right_thumb2',
+ id=117,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb2'),
+ 118:
+ dict(
+ name='right_thumb3',
+ id=118,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb3'),
+ 119:
+ dict(
+ name='right_thumb4',
+ id=119,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb4'),
+ 120:
+ dict(
+ name='right_forefinger1',
+ id=120,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger1'),
+ 121:
+ dict(
+ name='right_forefinger2',
+ id=121,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger2'),
+ 122:
+ dict(
+ name='right_forefinger3',
+ id=122,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger3'),
+ 123:
+ dict(
+ name='right_forefinger4',
+ id=123,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger4'),
+ 124:
+ dict(
+ name='right_middle_finger1',
+ id=124,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger1'),
+ 125:
+ dict(
+ name='right_middle_finger2',
+ id=125,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger2'),
+ 126:
+ dict(
+ name='right_middle_finger3',
+ id=126,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger3'),
+ 127:
+ dict(
+ name='right_middle_finger4',
+ id=127,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger4'),
+ 128:
+ dict(
+ name='right_ring_finger1',
+ id=128,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger1'),
+ 129:
+ dict(
+ name='right_ring_finger2',
+ id=129,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger2'),
+ 130:
+ dict(
+ name='right_ring_finger3',
+ id=130,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger3'),
+ 131:
+ dict(
+ name='right_ring_finger4',
+ id=131,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger4'),
+ 132:
+ dict(
+ name='right_pinky_finger1',
+ id=132,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger1'),
+ 133:
+ dict(
+ name='right_pinky_finger2',
+ id=133,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger2'),
+ 134:
+ dict(
+ name='right_pinky_finger3',
+ id=134,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger3'),
+ 135:
+ dict(
+ name='right_pinky_finger4',
+ id=135,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger4')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('left_hip', 'hip'), id=2, color=[0, 255, 0]),
+ 3:
+ dict(link=('right_ankle', 'right_knee'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('right_knee', 'right_hip'), id=4, color=[255, 128, 0]),
+ 5:
+ dict(link=('right_hip', 'hip'), id=5, color=[255, 128, 0]),
+ 6:
+ dict(link=('head', 'neck'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('neck', 'hip'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('neck', 'left_shoulder'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(link=('left_shoulder', 'left_elbow'), id=9, color=[0, 255, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('neck', 'right_shoulder'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=12, color=[255, 128,
+ 0]),
+ 13:
+ dict(link=('right_elbow', 'right_wrist'), id=13, color=[255, 128, 0]),
+ 14:
+ dict(link=('left_eye', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('nose', 'left_eye'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('nose', 'right_eye'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_eye', 'left_ear'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(link=('right_eye', 'right_ear'), id=18, color=[51, 153, 255]),
+ 19:
+ dict(link=('left_ear', 'left_shoulder'), id=19, color=[51, 153, 255]),
+ 20:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=20, color=[51, 153, 255]),
+ 21:
+ dict(link=('left_ankle', 'left_big_toe'), id=21, color=[0, 255, 0]),
+ 22:
+ dict(link=('left_ankle', 'left_small_toe'), id=22, color=[0, 255, 0]),
+ 23:
+ dict(link=('left_ankle', 'left_heel'), id=23, color=[0, 255, 0]),
+ 24:
+ dict(
+ link=('right_ankle', 'right_big_toe'), id=24, color=[255, 128, 0]),
+ 25:
+ dict(
+ link=('right_ankle', 'right_small_toe'),
+ id=25,
+ color=[255, 128, 0]),
+ 26:
+ dict(link=('right_ankle', 'right_heel'), id=26, color=[255, 128, 0]),
+ 27:
+ dict(link=('left_wrist', 'left_thumb1'), id=27, color=[255, 128, 0]),
+ 28:
+ dict(link=('left_thumb1', 'left_thumb2'), id=28, color=[255, 128, 0]),
+ 29:
+ dict(link=('left_thumb2', 'left_thumb3'), id=29, color=[255, 128, 0]),
+ 30:
+ dict(link=('left_thumb3', 'left_thumb4'), id=30, color=[255, 128, 0]),
+ 31:
+ dict(
+ link=('left_wrist', 'left_forefinger1'),
+ id=31,
+ color=[255, 153, 255]),
+ 32:
+ dict(
+ link=('left_forefinger1', 'left_forefinger2'),
+ id=32,
+ color=[255, 153, 255]),
+ 33:
+ dict(
+ link=('left_forefinger2', 'left_forefinger3'),
+ id=33,
+ color=[255, 153, 255]),
+ 34:
+ dict(
+ link=('left_forefinger3', 'left_forefinger4'),
+ id=34,
+ color=[255, 153, 255]),
+ 35:
+ dict(
+ link=('left_wrist', 'left_middle_finger1'),
+ id=35,
+ color=[102, 178, 255]),
+ 36:
+ dict(
+ link=('left_middle_finger1', 'left_middle_finger2'),
+ id=36,
+ color=[102, 178, 255]),
+ 37:
+ dict(
+ link=('left_middle_finger2', 'left_middle_finger3'),
+ id=37,
+ color=[102, 178, 255]),
+ 38:
+ dict(
+ link=('left_middle_finger3', 'left_middle_finger4'),
+ id=38,
+ color=[102, 178, 255]),
+ 39:
+ dict(
+ link=('left_wrist', 'left_ring_finger1'),
+ id=39,
+ color=[255, 51, 51]),
+ 40:
+ dict(
+ link=('left_ring_finger1', 'left_ring_finger2'),
+ id=40,
+ color=[255, 51, 51]),
+ 41:
+ dict(
+ link=('left_ring_finger2', 'left_ring_finger3'),
+ id=41,
+ color=[255, 51, 51]),
+ 42:
+ dict(
+ link=('left_ring_finger3', 'left_ring_finger4'),
+ id=42,
+ color=[255, 51, 51]),
+ 43:
+ dict(
+ link=('left_wrist', 'left_pinky_finger1'),
+ id=43,
+ color=[0, 255, 0]),
+ 44:
+ dict(
+ link=('left_pinky_finger1', 'left_pinky_finger2'),
+ id=44,
+ color=[0, 255, 0]),
+ 45:
+ dict(
+ link=('left_pinky_finger2', 'left_pinky_finger3'),
+ id=45,
+ color=[0, 255, 0]),
+ 46:
+ dict(
+ link=('left_pinky_finger3', 'left_pinky_finger4'),
+ id=46,
+ color=[0, 255, 0]),
+ 47:
+ dict(link=('right_wrist', 'right_thumb1'), id=47, color=[255, 128, 0]),
+ 48:
+ dict(
+ link=('right_thumb1', 'right_thumb2'), id=48, color=[255, 128, 0]),
+ 49:
+ dict(
+ link=('right_thumb2', 'right_thumb3'), id=49, color=[255, 128, 0]),
+ 50:
+ dict(
+ link=('right_thumb3', 'right_thumb4'), id=50, color=[255, 128, 0]),
+ 51:
+ dict(
+ link=('right_wrist', 'right_forefinger1'),
+ id=51,
+ color=[255, 153, 255]),
+ 52:
+ dict(
+ link=('right_forefinger1', 'right_forefinger2'),
+ id=52,
+ color=[255, 153, 255]),
+ 53:
+ dict(
+ link=('right_forefinger2', 'right_forefinger3'),
+ id=53,
+ color=[255, 153, 255]),
+ 54:
+ dict(
+ link=('right_forefinger3', 'right_forefinger4'),
+ id=54,
+ color=[255, 153, 255]),
+ 55:
+ dict(
+ link=('right_wrist', 'right_middle_finger1'),
+ id=55,
+ color=[102, 178, 255]),
+ 56:
+ dict(
+ link=('right_middle_finger1', 'right_middle_finger2'),
+ id=56,
+ color=[102, 178, 255]),
+ 57:
+ dict(
+ link=('right_middle_finger2', 'right_middle_finger3'),
+ id=57,
+ color=[102, 178, 255]),
+ 58:
+ dict(
+ link=('right_middle_finger3', 'right_middle_finger4'),
+ id=58,
+ color=[102, 178, 255]),
+ 59:
+ dict(
+ link=('right_wrist', 'right_ring_finger1'),
+ id=59,
+ color=[255, 51, 51]),
+ 60:
+ dict(
+ link=('right_ring_finger1', 'right_ring_finger2'),
+ id=60,
+ color=[255, 51, 51]),
+ 61:
+ dict(
+ link=('right_ring_finger2', 'right_ring_finger3'),
+ id=61,
+ color=[255, 51, 51]),
+ 62:
+ dict(
+ link=('right_ring_finger3', 'right_ring_finger4'),
+ id=62,
+ color=[255, 51, 51]),
+ 63:
+ dict(
+ link=('right_wrist', 'right_pinky_finger1'),
+ id=63,
+ color=[0, 255, 0]),
+ 64:
+ dict(
+ link=('right_pinky_finger1', 'right_pinky_finger2'),
+ id=64,
+ color=[0, 255, 0]),
+ 65:
+ dict(
+ link=('right_pinky_finger2', 'right_pinky_finger3'),
+ id=65,
+ color=[0, 255, 0]),
+ 66:
+ dict(
+ link=('right_pinky_finger3', 'right_pinky_finger4'),
+ id=66,
+ color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 136,
+
+ # 'https://github.com/Fang-Haoshu/Halpe-FullBody/blob/master/'
+ # 'HalpeCOCOAPI/PythonAPI/halpecocotools/cocoeval.py#L245'
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089, 0.08, 0.08, 0.08,
+ 0.089, 0.089, 0.089, 0.089, 0.089, 0.089, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015, 0.015,
+ 0.015, 0.015, 0.015, 0.015, 0.015, 0.015
+ ])
diff --git a/mmpose/configs/_base_/datasets/horse10.py b/mmpose/configs/_base_/datasets/horse10.py
new file mode 100644
index 0000000000000000000000000000000000000000..a485bf191bc151b0d76e48f3e55eb8e2dda6c506
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/horse10.py
@@ -0,0 +1,201 @@
+dataset_info = dict(
+ dataset_name='horse10',
+ paper_info=dict(
+ author='Mathis, Alexander and Biasi, Thomas and '
+ 'Schneider, Steffen and '
+ 'Yuksekgonul, Mert and Rogers, Byron and '
+ 'Bethge, Matthias and '
+ 'Mathis, Mackenzie W',
+ title='Pretraining boosts out-of-domain robustness '
+ 'for pose estimation',
+ container='Proceedings of the IEEE/CVF Winter Conference on '
+ 'Applications of Computer Vision',
+ year='2021',
+ homepage='http://www.mackenziemathislab.org/horse10',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='Nose', id=0, color=[255, 153, 255], type='upper', swap=''),
+ 1:
+ dict(name='Eye', id=1, color=[255, 153, 255], type='upper', swap=''),
+ 2:
+ dict(
+ name='Nearknee',
+ id=2,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 3:
+ dict(
+ name='Nearfrontfetlock',
+ id=3,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 4:
+ dict(
+ name='Nearfrontfoot',
+ id=4,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 5:
+ dict(
+ name='Offknee', id=5, color=[255, 102, 255], type='upper',
+ swap=''),
+ 6:
+ dict(
+ name='Offfrontfetlock',
+ id=6,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 7:
+ dict(
+ name='Offfrontfoot',
+ id=7,
+ color=[255, 102, 255],
+ type='upper',
+ swap=''),
+ 8:
+ dict(
+ name='Shoulder',
+ id=8,
+ color=[255, 153, 255],
+ type='upper',
+ swap=''),
+ 9:
+ dict(
+ name='Midshoulder',
+ id=9,
+ color=[255, 153, 255],
+ type='upper',
+ swap=''),
+ 10:
+ dict(
+ name='Elbow', id=10, color=[255, 153, 255], type='upper', swap=''),
+ 11:
+ dict(
+ name='Girth', id=11, color=[255, 153, 255], type='upper', swap=''),
+ 12:
+ dict(
+ name='Wither', id=12, color=[255, 153, 255], type='upper',
+ swap=''),
+ 13:
+ dict(
+ name='Nearhindhock',
+ id=13,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 14:
+ dict(
+ name='Nearhindfetlock',
+ id=14,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 15:
+ dict(
+ name='Nearhindfoot',
+ id=15,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 16:
+ dict(name='Hip', id=16, color=[255, 153, 255], type='lower', swap=''),
+ 17:
+ dict(
+ name='Stifle', id=17, color=[255, 153, 255], type='lower',
+ swap=''),
+ 18:
+ dict(
+ name='Offhindhock',
+ id=18,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 19:
+ dict(
+ name='Offhindfetlock',
+ id=19,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 20:
+ dict(
+ name='Offhindfoot',
+ id=20,
+ color=[255, 51, 255],
+ type='lower',
+ swap=''),
+ 21:
+ dict(
+ name='Ischium',
+ id=21,
+ color=[255, 153, 255],
+ type='lower',
+ swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('Nose', 'Eye'), id=0, color=[255, 153, 255]),
+ 1:
+ dict(link=('Eye', 'Wither'), id=1, color=[255, 153, 255]),
+ 2:
+ dict(link=('Wither', 'Hip'), id=2, color=[255, 153, 255]),
+ 3:
+ dict(link=('Hip', 'Ischium'), id=3, color=[255, 153, 255]),
+ 4:
+ dict(link=('Ischium', 'Stifle'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('Stifle', 'Girth'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('Girth', 'Elbow'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('Elbow', 'Shoulder'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('Shoulder', 'Midshoulder'), id=8, color=[255, 153, 255]),
+ 9:
+ dict(link=('Midshoulder', 'Wither'), id=9, color=[255, 153, 255]),
+ 10:
+ dict(
+ link=('Nearknee', 'Nearfrontfetlock'),
+ id=10,
+ color=[255, 102, 255]),
+ 11:
+ dict(
+ link=('Nearfrontfetlock', 'Nearfrontfoot'),
+ id=11,
+ color=[255, 102, 255]),
+ 12:
+ dict(
+ link=('Offknee', 'Offfrontfetlock'), id=12, color=[255, 102, 255]),
+ 13:
+ dict(
+ link=('Offfrontfetlock', 'Offfrontfoot'),
+ id=13,
+ color=[255, 102, 255]),
+ 14:
+ dict(
+ link=('Nearhindhock', 'Nearhindfetlock'),
+ id=14,
+ color=[255, 51, 255]),
+ 15:
+ dict(
+ link=('Nearhindfetlock', 'Nearhindfoot'),
+ id=15,
+ color=[255, 51, 255]),
+ 16:
+ dict(
+ link=('Offhindhock', 'Offhindfetlock'),
+ id=16,
+ color=[255, 51, 255]),
+ 17:
+ dict(
+ link=('Offhindfetlock', 'Offhindfoot'),
+ id=17,
+ color=[255, 51, 255])
+ },
+ joint_weights=[1.] * 22,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/interhand2d.py b/mmpose/configs/_base_/datasets/interhand2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..0134f07de5bf536eaffbf71155a7e6eb33b24f0a
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/interhand2d.py
@@ -0,0 +1,142 @@
+dataset_info = dict(
+ dataset_name='interhand2d',
+ paper_info=dict(
+ author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and '
+ 'Shiratori, Takaaki and Lee, Kyoung Mu',
+ title='InterHand2.6M: A dataset and baseline for 3D '
+ 'interacting hand pose estimation from a single RGB image',
+ container='arXiv',
+ year='2020',
+ homepage='https://mks0601.github.io/InterHand2.6M/',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='thumb4', id=0, color=[255, 128, 0], type='', swap=''),
+ 1:
+ dict(name='thumb3', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb1', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(
+ name='forefinger4', id=4, color=[255, 153, 255], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger3', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger1', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='middle_finger4',
+ id=8,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 9:
+ dict(
+ name='middle_finger3',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger1',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='ring_finger4', id=12, color=[255, 51, 51], type='', swap=''),
+ 13:
+ dict(
+ name='ring_finger3', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger1', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(name='pinky_finger4', id=16, color=[0, 255, 0], type='', swap=''),
+ 17:
+ dict(name='pinky_finger3', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger1', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='wrist', id=20, color=[255, 255, 255], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/interhand3d.py b/mmpose/configs/_base_/datasets/interhand3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2bd8121c281c741ec9b980c7570ebef8a632993
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/interhand3d.py
@@ -0,0 +1,487 @@
+dataset_info = dict(
+ dataset_name='interhand3d',
+ paper_info=dict(
+ author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and '
+ 'Shiratori, Takaaki and Lee, Kyoung Mu',
+ title='InterHand2.6M: A dataset and baseline for 3D '
+ 'interacting hand pose estimation from a single RGB image',
+ container='arXiv',
+ year='2020',
+ homepage='https://mks0601.github.io/InterHand2.6M/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_thumb4',
+ id=0,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb4'),
+ 1:
+ dict(
+ name='right_thumb3',
+ id=1,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb3'),
+ 2:
+ dict(
+ name='right_thumb2',
+ id=2,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb2'),
+ 3:
+ dict(
+ name='right_thumb1',
+ id=3,
+ color=[255, 128, 0],
+ type='',
+ swap='left_thumb1'),
+ 4:
+ dict(
+ name='right_forefinger4',
+ id=4,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger4'),
+ 5:
+ dict(
+ name='right_forefinger3',
+ id=5,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger3'),
+ 6:
+ dict(
+ name='right_forefinger2',
+ id=6,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger2'),
+ 7:
+ dict(
+ name='right_forefinger1',
+ id=7,
+ color=[255, 153, 255],
+ type='',
+ swap='left_forefinger1'),
+ 8:
+ dict(
+ name='right_middle_finger4',
+ id=8,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger4'),
+ 9:
+ dict(
+ name='right_middle_finger3',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger3'),
+ 10:
+ dict(
+ name='right_middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger2'),
+ 11:
+ dict(
+ name='right_middle_finger1',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap='left_middle_finger1'),
+ 12:
+ dict(
+ name='right_ring_finger4',
+ id=12,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger4'),
+ 13:
+ dict(
+ name='right_ring_finger3',
+ id=13,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger3'),
+ 14:
+ dict(
+ name='right_ring_finger2',
+ id=14,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger2'),
+ 15:
+ dict(
+ name='right_ring_finger1',
+ id=15,
+ color=[255, 51, 51],
+ type='',
+ swap='left_ring_finger1'),
+ 16:
+ dict(
+ name='right_pinky_finger4',
+ id=16,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger4'),
+ 17:
+ dict(
+ name='right_pinky_finger3',
+ id=17,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger3'),
+ 18:
+ dict(
+ name='right_pinky_finger2',
+ id=18,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger2'),
+ 19:
+ dict(
+ name='right_pinky_finger1',
+ id=19,
+ color=[0, 255, 0],
+ type='',
+ swap='left_pinky_finger1'),
+ 20:
+ dict(
+ name='right_wrist',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='left_wrist'),
+ 21:
+ dict(
+ name='left_thumb4',
+ id=21,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb4'),
+ 22:
+ dict(
+ name='left_thumb3',
+ id=22,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb3'),
+ 23:
+ dict(
+ name='left_thumb2',
+ id=23,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb2'),
+ 24:
+ dict(
+ name='left_thumb1',
+ id=24,
+ color=[255, 128, 0],
+ type='',
+ swap='right_thumb1'),
+ 25:
+ dict(
+ name='left_forefinger4',
+ id=25,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger4'),
+ 26:
+ dict(
+ name='left_forefinger3',
+ id=26,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger3'),
+ 27:
+ dict(
+ name='left_forefinger2',
+ id=27,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger2'),
+ 28:
+ dict(
+ name='left_forefinger1',
+ id=28,
+ color=[255, 153, 255],
+ type='',
+ swap='right_forefinger1'),
+ 29:
+ dict(
+ name='left_middle_finger4',
+ id=29,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger4'),
+ 30:
+ dict(
+ name='left_middle_finger3',
+ id=30,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger3'),
+ 31:
+ dict(
+ name='left_middle_finger2',
+ id=31,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger2'),
+ 32:
+ dict(
+ name='left_middle_finger1',
+ id=32,
+ color=[102, 178, 255],
+ type='',
+ swap='right_middle_finger1'),
+ 33:
+ dict(
+ name='left_ring_finger4',
+ id=33,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger4'),
+ 34:
+ dict(
+ name='left_ring_finger3',
+ id=34,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger3'),
+ 35:
+ dict(
+ name='left_ring_finger2',
+ id=35,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger2'),
+ 36:
+ dict(
+ name='left_ring_finger1',
+ id=36,
+ color=[255, 51, 51],
+ type='',
+ swap='right_ring_finger1'),
+ 37:
+ dict(
+ name='left_pinky_finger4',
+ id=37,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger4'),
+ 38:
+ dict(
+ name='left_pinky_finger3',
+ id=38,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger3'),
+ 39:
+ dict(
+ name='left_pinky_finger2',
+ id=39,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger2'),
+ 40:
+ dict(
+ name='left_pinky_finger1',
+ id=40,
+ color=[0, 255, 0],
+ type='',
+ swap='right_pinky_finger1'),
+ 41:
+ dict(
+ name='left_wrist',
+ id=41,
+ color=[255, 255, 255],
+ type='',
+ swap='right_wrist'),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_wrist', 'right_thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_thumb1', 'right_thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_thumb2', 'right_thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_thumb3', 'right_thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(
+ link=('right_wrist', 'right_forefinger1'),
+ id=4,
+ color=[255, 153, 255]),
+ 5:
+ dict(
+ link=('right_forefinger1', 'right_forefinger2'),
+ id=5,
+ color=[255, 153, 255]),
+ 6:
+ dict(
+ link=('right_forefinger2', 'right_forefinger3'),
+ id=6,
+ color=[255, 153, 255]),
+ 7:
+ dict(
+ link=('right_forefinger3', 'right_forefinger4'),
+ id=7,
+ color=[255, 153, 255]),
+ 8:
+ dict(
+ link=('right_wrist', 'right_middle_finger1'),
+ id=8,
+ color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('right_middle_finger1', 'right_middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('right_middle_finger2', 'right_middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('right_middle_finger3', 'right_middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(
+ link=('right_wrist', 'right_ring_finger1'),
+ id=12,
+ color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('right_ring_finger1', 'right_ring_finger2'),
+ id=13,
+ color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('right_ring_finger2', 'right_ring_finger3'),
+ id=14,
+ color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('right_ring_finger3', 'right_ring_finger4'),
+ id=15,
+ color=[255, 51, 51]),
+ 16:
+ dict(
+ link=('right_wrist', 'right_pinky_finger1'),
+ id=16,
+ color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('right_pinky_finger1', 'right_pinky_finger2'),
+ id=17,
+ color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('right_pinky_finger2', 'right_pinky_finger3'),
+ id=18,
+ color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('right_pinky_finger3', 'right_pinky_finger4'),
+ id=19,
+ color=[0, 255, 0]),
+ 20:
+ dict(link=('left_wrist', 'left_thumb1'), id=20, color=[255, 128, 0]),
+ 21:
+ dict(link=('left_thumb1', 'left_thumb2'), id=21, color=[255, 128, 0]),
+ 22:
+ dict(link=('left_thumb2', 'left_thumb3'), id=22, color=[255, 128, 0]),
+ 23:
+ dict(link=('left_thumb3', 'left_thumb4'), id=23, color=[255, 128, 0]),
+ 24:
+ dict(
+ link=('left_wrist', 'left_forefinger1'),
+ id=24,
+ color=[255, 153, 255]),
+ 25:
+ dict(
+ link=('left_forefinger1', 'left_forefinger2'),
+ id=25,
+ color=[255, 153, 255]),
+ 26:
+ dict(
+ link=('left_forefinger2', 'left_forefinger3'),
+ id=26,
+ color=[255, 153, 255]),
+ 27:
+ dict(
+ link=('left_forefinger3', 'left_forefinger4'),
+ id=27,
+ color=[255, 153, 255]),
+ 28:
+ dict(
+ link=('left_wrist', 'left_middle_finger1'),
+ id=28,
+ color=[102, 178, 255]),
+ 29:
+ dict(
+ link=('left_middle_finger1', 'left_middle_finger2'),
+ id=29,
+ color=[102, 178, 255]),
+ 30:
+ dict(
+ link=('left_middle_finger2', 'left_middle_finger3'),
+ id=30,
+ color=[102, 178, 255]),
+ 31:
+ dict(
+ link=('left_middle_finger3', 'left_middle_finger4'),
+ id=31,
+ color=[102, 178, 255]),
+ 32:
+ dict(
+ link=('left_wrist', 'left_ring_finger1'),
+ id=32,
+ color=[255, 51, 51]),
+ 33:
+ dict(
+ link=('left_ring_finger1', 'left_ring_finger2'),
+ id=33,
+ color=[255, 51, 51]),
+ 34:
+ dict(
+ link=('left_ring_finger2', 'left_ring_finger3'),
+ id=34,
+ color=[255, 51, 51]),
+ 35:
+ dict(
+ link=('left_ring_finger3', 'left_ring_finger4'),
+ id=35,
+ color=[255, 51, 51]),
+ 36:
+ dict(
+ link=('left_wrist', 'left_pinky_finger1'),
+ id=36,
+ color=[0, 255, 0]),
+ 37:
+ dict(
+ link=('left_pinky_finger1', 'left_pinky_finger2'),
+ id=37,
+ color=[0, 255, 0]),
+ 38:
+ dict(
+ link=('left_pinky_finger2', 'left_pinky_finger3'),
+ id=38,
+ color=[0, 255, 0]),
+ 39:
+ dict(
+ link=('left_pinky_finger3', 'left_pinky_finger4'),
+ id=39,
+ color=[0, 255, 0]),
+ },
+ joint_weights=[1.] * 42,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/jhmdb.py b/mmpose/configs/_base_/datasets/jhmdb.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b37488498a2bade1fa6f2ff6532fcd219071803
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/jhmdb.py
@@ -0,0 +1,129 @@
+dataset_info = dict(
+ dataset_name='jhmdb',
+ paper_info=dict(
+ author='H. Jhuang and J. Gall and S. Zuffi and '
+ 'C. Schmid and M. J. Black',
+ title='Towards understanding action recognition',
+ container='International Conf. on Computer Vision (ICCV)',
+ year='2013',
+ homepage='http://jhmdb.is.tue.mpg.de/dataset',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='neck', id=0, color=[255, 128, 0], type='upper', swap=''),
+ 1:
+ dict(name='belly', id=1, color=[255, 128, 0], type='upper', swap=''),
+ 2:
+ dict(name='head', id=2, color=[255, 128, 0], type='upper', swap=''),
+ 3:
+ dict(
+ name='right_shoulder',
+ id=3,
+ color=[0, 255, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 4:
+ dict(
+ name='left_shoulder',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 5:
+ dict(
+ name='right_hip',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_hip'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[51, 153, 255],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='right_elbow',
+ id=7,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_elbow'),
+ 8:
+ dict(
+ name='left_elbow',
+ id=8,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_elbow'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[51, 153, 255],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='left_knee',
+ id=10,
+ color=[255, 128, 0],
+ type='lower',
+ swap='right_knee'),
+ 11:
+ dict(
+ name='right_wrist',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 12:
+ dict(
+ name='left_wrist',
+ id=12,
+ color=[255, 128, 0],
+ type='upper',
+ swap='right_wrist'),
+ 13:
+ dict(
+ name='right_ankle',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='left_ankle'),
+ 14:
+ dict(
+ name='left_ankle',
+ id=14,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle')
+ },
+ skeleton_info={
+ 0: dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1: dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2: dict(link=('right_hip', 'belly'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('belly', 'left_hip'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
+ 6: dict(link=('belly', 'neck'), id=6, color=[51, 153, 255]),
+ 7: dict(link=('neck', 'head'), id=7, color=[51, 153, 255]),
+ 8: dict(link=('neck', 'right_shoulder'), id=8, color=[255, 128, 0]),
+ 9: dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('right_elbow', 'right_wrist'), id=10, color=[255, 128, 0]),
+ 11: dict(link=('neck', 'left_shoulder'), id=11, color=[0, 255, 0]),
+ 12:
+ dict(link=('left_shoulder', 'left_elbow'), id=12, color=[0, 255, 0]),
+ 13: dict(link=('left_elbow', 'left_wrist'), id=13, color=[0, 255, 0])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5, 1.5, 1.5
+ ],
+ # Adapted from COCO dataset.
+ sigmas=[
+ 0.025, 0.107, 0.025, 0.079, 0.079, 0.107, 0.107, 0.072, 0.072, 0.087,
+ 0.087, 0.062, 0.062, 0.089, 0.089
+ ])
diff --git a/mmpose/configs/_base_/datasets/lapa.py b/mmpose/configs/_base_/datasets/lapa.py
new file mode 100644
index 0000000000000000000000000000000000000000..26a0843404eaed46c0af9249aee6f90ef952c216
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/lapa.py
@@ -0,0 +1,688 @@
+dataset_info = dict(
+ dataset_name='lapa',
+ paper_info=dict(
+ author='Liu, Yinglu and Shi, Hailin and Shen, Hao and Si, '
+ 'Yue and Wang, Xiaobo and Mei, Tao',
+ title='A New Dataset and Boundary-Attention Semantic '
+ 'Segmentation for Face Parsing.',
+ container='Proceedings of the AAAI Conference on '
+ 'Artificial Intelligence 2020',
+ year='2020',
+ homepage='https://github.com/JDAI-CV/lapa-dataset',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='kpt-0', id=0, color=[255, 0, 0], type='upper',
+ swap='kpt-32'),
+ 1:
+ dict(
+ name='kpt-1', id=1, color=[255, 0, 0], type='upper',
+ swap='kpt-31'),
+ 2:
+ dict(
+ name='kpt-2', id=2, color=[255, 0, 0], type='upper',
+ swap='kpt-30'),
+ 3:
+ dict(
+ name='kpt-3', id=3, color=[255, 0, 0], type='lower',
+ swap='kpt-29'),
+ 4:
+ dict(
+ name='kpt-4', id=4, color=[255, 0, 0], type='lower',
+ swap='kpt-28'),
+ 5:
+ dict(
+ name='kpt-5', id=5, color=[255, 0, 0], type='lower',
+ swap='kpt-27'),
+ 6:
+ dict(
+ name='kpt-6', id=6, color=[255, 0, 0], type='lower',
+ swap='kpt-26'),
+ 7:
+ dict(
+ name='kpt-7', id=7, color=[255, 0, 0], type='lower',
+ swap='kpt-25'),
+ 8:
+ dict(
+ name='kpt-8', id=8, color=[255, 0, 0], type='lower',
+ swap='kpt-24'),
+ 9:
+ dict(
+ name='kpt-9', id=9, color=[255, 0, 0], type='lower',
+ swap='kpt-23'),
+ 10:
+ dict(
+ name='kpt-10',
+ id=10,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-22'),
+ 11:
+ dict(
+ name='kpt-11',
+ id=11,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-21'),
+ 12:
+ dict(
+ name='kpt-12',
+ id=12,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-20'),
+ 13:
+ dict(
+ name='kpt-13',
+ id=13,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-19'),
+ 14:
+ dict(
+ name='kpt-14',
+ id=14,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-18'),
+ 15:
+ dict(
+ name='kpt-15',
+ id=15,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-17'),
+ 16:
+ dict(name='kpt-16', id=16, color=[255, 0, 0], type='lower', swap=''),
+ 17:
+ dict(
+ name='kpt-17',
+ id=17,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-15'),
+ 18:
+ dict(
+ name='kpt-18',
+ id=18,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-14'),
+ 19:
+ dict(
+ name='kpt-19',
+ id=19,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-13'),
+ 20:
+ dict(
+ name='kpt-20',
+ id=20,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-12'),
+ 21:
+ dict(
+ name='kpt-21',
+ id=21,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-11'),
+ 22:
+ dict(
+ name='kpt-22',
+ id=22,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-10'),
+ 23:
+ dict(
+ name='kpt-23',
+ id=23,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-9'),
+ 24:
+ dict(
+ name='kpt-24',
+ id=24,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-8'),
+ 25:
+ dict(
+ name='kpt-25',
+ id=25,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-7'),
+ 26:
+ dict(
+ name='kpt-26',
+ id=26,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-6'),
+ 27:
+ dict(
+ name='kpt-27',
+ id=27,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-5'),
+ 28:
+ dict(
+ name='kpt-28',
+ id=28,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-4'),
+ 29:
+ dict(
+ name='kpt-29',
+ id=29,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-3'),
+ 30:
+ dict(
+ name='kpt-30',
+ id=30,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-2'),
+ 31:
+ dict(
+ name='kpt-31',
+ id=31,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-1'),
+ 32:
+ dict(
+ name='kpt-32',
+ id=32,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-0'),
+ 33:
+ dict(
+ name='kpt-33',
+ id=33,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-46'),
+ 34:
+ dict(
+ name='kpt-34',
+ id=34,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-45'),
+ 35:
+ dict(
+ name='kpt-35',
+ id=35,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-44'),
+ 36:
+ dict(
+ name='kpt-36',
+ id=36,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-43'),
+ 37:
+ dict(
+ name='kpt-37',
+ id=37,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-42'),
+ 38:
+ dict(
+ name='kpt-38',
+ id=38,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-50'),
+ 39:
+ dict(
+ name='kpt-39',
+ id=39,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-49'),
+ 40:
+ dict(
+ name='kpt-40',
+ id=40,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-48'),
+ 41:
+ dict(
+ name='kpt-41',
+ id=41,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-47'),
+ 42:
+ dict(
+ name='kpt-42',
+ id=42,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-37'),
+ 43:
+ dict(
+ name='kpt-43',
+ id=43,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-36'),
+ 44:
+ dict(
+ name='kpt-44',
+ id=44,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-35'),
+ 45:
+ dict(
+ name='kpt-45',
+ id=45,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-34'),
+ 46:
+ dict(
+ name='kpt-46',
+ id=46,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-33'),
+ 47:
+ dict(
+ name='kpt-47',
+ id=47,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-41'),
+ 48:
+ dict(
+ name='kpt-48',
+ id=48,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-40'),
+ 49:
+ dict(
+ name='kpt-49',
+ id=49,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-39'),
+ 50:
+ dict(
+ name='kpt-50',
+ id=50,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-38'),
+ 51:
+ dict(name='kpt-51', id=51, color=[255, 0, 0], type='upper', swap=''),
+ 52:
+ dict(name='kpt-52', id=52, color=[255, 0, 0], type='upper', swap=''),
+ 53:
+ dict(name='kpt-53', id=53, color=[255, 0, 0], type='lower', swap=''),
+ 54:
+ dict(name='kpt-54', id=54, color=[255, 0, 0], type='lower', swap=''),
+ 55:
+ dict(
+ name='kpt-55',
+ id=55,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-65'),
+ 56:
+ dict(
+ name='kpt-56',
+ id=56,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-64'),
+ 57:
+ dict(
+ name='kpt-57',
+ id=57,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-63'),
+ 58:
+ dict(
+ name='kpt-58',
+ id=58,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-62'),
+ 59:
+ dict(
+ name='kpt-59',
+ id=59,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-61'),
+ 60:
+ dict(name='kpt-60', id=60, color=[255, 0, 0], type='lower', swap=''),
+ 61:
+ dict(
+ name='kpt-61',
+ id=61,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-59'),
+ 62:
+ dict(
+ name='kpt-62',
+ id=62,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-58'),
+ 63:
+ dict(
+ name='kpt-63',
+ id=63,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-57'),
+ 64:
+ dict(
+ name='kpt-64',
+ id=64,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-56'),
+ 65:
+ dict(
+ name='kpt-65',
+ id=65,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-55'),
+ 66:
+ dict(
+ name='kpt-66',
+ id=66,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-79'),
+ 67:
+ dict(
+ name='kpt-67',
+ id=67,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-78'),
+ 68:
+ dict(
+ name='kpt-68',
+ id=68,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-77'),
+ 69:
+ dict(
+ name='kpt-69',
+ id=69,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-76'),
+ 70:
+ dict(
+ name='kpt-70',
+ id=70,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-75'),
+ 71:
+ dict(
+ name='kpt-71',
+ id=71,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-82'),
+ 72:
+ dict(
+ name='kpt-72',
+ id=72,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-81'),
+ 73:
+ dict(
+ name='kpt-73',
+ id=73,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-80'),
+ 74:
+ dict(
+ name='kpt-74',
+ id=74,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-83'),
+ 75:
+ dict(
+ name='kpt-75',
+ id=75,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-70'),
+ 76:
+ dict(
+ name='kpt-76',
+ id=76,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-69'),
+ 77:
+ dict(
+ name='kpt-77',
+ id=77,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-68'),
+ 78:
+ dict(
+ name='kpt-78',
+ id=78,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-67'),
+ 79:
+ dict(
+ name='kpt-79',
+ id=79,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-66'),
+ 80:
+ dict(
+ name='kpt-80',
+ id=80,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-73'),
+ 81:
+ dict(
+ name='kpt-81',
+ id=81,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-72'),
+ 82:
+ dict(
+ name='kpt-82',
+ id=82,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-71'),
+ 83:
+ dict(
+ name='kpt-83',
+ id=83,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-74'),
+ 84:
+ dict(
+ name='kpt-84',
+ id=84,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-90'),
+ 85:
+ dict(
+ name='kpt-85',
+ id=85,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-89'),
+ 86:
+ dict(
+ name='kpt-86',
+ id=86,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-88'),
+ 87:
+ dict(name='kpt-87', id=87, color=[255, 0, 0], type='lower', swap=''),
+ 88:
+ dict(
+ name='kpt-88',
+ id=88,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-86'),
+ 89:
+ dict(
+ name='kpt-89',
+ id=89,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-85'),
+ 90:
+ dict(
+ name='kpt-90',
+ id=90,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-84'),
+ 91:
+ dict(
+ name='kpt-91',
+ id=91,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-95'),
+ 92:
+ dict(
+ name='kpt-92',
+ id=92,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-94'),
+ 93:
+ dict(name='kpt-93', id=93, color=[255, 0, 0], type='lower', swap=''),
+ 94:
+ dict(
+ name='kpt-94',
+ id=94,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-92'),
+ 95:
+ dict(
+ name='kpt-95',
+ id=95,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-91'),
+ 96:
+ dict(
+ name='kpt-96',
+ id=96,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-100'),
+ 97:
+ dict(
+ name='kpt-97',
+ id=97,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-99'),
+ 98:
+ dict(name='kpt-98', id=98, color=[255, 0, 0], type='lower', swap=''),
+ 99:
+ dict(
+ name='kpt-99',
+ id=99,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-97'),
+ 100:
+ dict(
+ name='kpt-100',
+ id=100,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-96'),
+ 101:
+ dict(
+ name='kpt-101',
+ id=101,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-103'),
+ 102:
+ dict(name='kpt-102', id=102, color=[255, 0, 0], type='lower', swap=''),
+ 103:
+ dict(
+ name='kpt-103',
+ id=103,
+ color=[255, 0, 0],
+ type='lower',
+ swap='kpt-101'),
+ 104:
+ dict(
+ name='kpt-104',
+ id=104,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-105'),
+ 105:
+ dict(
+ name='kpt-105',
+ id=105,
+ color=[255, 0, 0],
+ type='upper',
+ swap='kpt-104')
+ },
+ skeleton_info={},
+ joint_weights=[
+ 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8,
+ 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8,
+ 0.8, 0.8, 0.8, 0.8, 0.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0,
+ 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0,
+ 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5,
+ 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.0, 1.0
+ ],
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/locust.py b/mmpose/configs/_base_/datasets/locust.py
new file mode 100644
index 0000000000000000000000000000000000000000..db3fa15aa060b5806faae7a21f65460f77be2745
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/locust.py
@@ -0,0 +1,263 @@
+dataset_info = dict(
+ dataset_name='locust',
+ paper_info=dict(
+ author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and '
+ 'Li, Liang and Koger, Benjamin and Costelloe, Blair R and '
+ 'Couzin, Iain D',
+ title='DeepPoseKit, a software toolkit for fast and robust '
+ 'animal pose estimation using deep learning',
+ container='Elife',
+ year='2019',
+ homepage='https://github.com/jgraving/DeepPoseKit-Data',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='head', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='neck', id=1, color=[255, 255, 255], type='', swap=''),
+ 2:
+ dict(name='thorax', id=2, color=[255, 255, 255], type='', swap=''),
+ 3:
+ dict(name='abdomen1', id=3, color=[255, 255, 255], type='', swap=''),
+ 4:
+ dict(name='abdomen2', id=4, color=[255, 255, 255], type='', swap=''),
+ 5:
+ dict(
+ name='anttipL',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='anttipR'),
+ 6:
+ dict(
+ name='antbaseL',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='antbaseR'),
+ 7:
+ dict(name='eyeL', id=7, color=[255, 255, 255], type='', swap='eyeR'),
+ 8:
+ dict(
+ name='forelegL1',
+ id=8,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR1'),
+ 9:
+ dict(
+ name='forelegL2',
+ id=9,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR2'),
+ 10:
+ dict(
+ name='forelegL3',
+ id=10,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR3'),
+ 11:
+ dict(
+ name='forelegL4',
+ id=11,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR4'),
+ 12:
+ dict(
+ name='midlegL1',
+ id=12,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR1'),
+ 13:
+ dict(
+ name='midlegL2',
+ id=13,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR2'),
+ 14:
+ dict(
+ name='midlegL3',
+ id=14,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR3'),
+ 15:
+ dict(
+ name='midlegL4',
+ id=15,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegR4'),
+ 16:
+ dict(
+ name='hindlegL1',
+ id=16,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR1'),
+ 17:
+ dict(
+ name='hindlegL2',
+ id=17,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR2'),
+ 18:
+ dict(
+ name='hindlegL3',
+ id=18,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR3'),
+ 19:
+ dict(
+ name='hindlegL4',
+ id=19,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR4'),
+ 20:
+ dict(
+ name='anttipR',
+ id=20,
+ color=[255, 255, 255],
+ type='',
+ swap='anttipL'),
+ 21:
+ dict(
+ name='antbaseR',
+ id=21,
+ color=[255, 255, 255],
+ type='',
+ swap='antbaseL'),
+ 22:
+ dict(name='eyeR', id=22, color=[255, 255, 255], type='', swap='eyeL'),
+ 23:
+ dict(
+ name='forelegR1',
+ id=23,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL1'),
+ 24:
+ dict(
+ name='forelegR2',
+ id=24,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL2'),
+ 25:
+ dict(
+ name='forelegR3',
+ id=25,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL3'),
+ 26:
+ dict(
+ name='forelegR4',
+ id=26,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL4'),
+ 27:
+ dict(
+ name='midlegR1',
+ id=27,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL1'),
+ 28:
+ dict(
+ name='midlegR2',
+ id=28,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL2'),
+ 29:
+ dict(
+ name='midlegR3',
+ id=29,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL3'),
+ 30:
+ dict(
+ name='midlegR4',
+ id=30,
+ color=[255, 255, 255],
+ type='',
+ swap='midlegL4'),
+ 31:
+ dict(
+ name='hindlegR1',
+ id=31,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL1'),
+ 32:
+ dict(
+ name='hindlegR2',
+ id=32,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL2'),
+ 33:
+ dict(
+ name='hindlegR3',
+ id=33,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL3'),
+ 34:
+ dict(
+ name='hindlegR4',
+ id=34,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL4')
+ },
+ skeleton_info={
+ 0: dict(link=('neck', 'head'), id=0, color=[255, 255, 255]),
+ 1: dict(link=('thorax', 'neck'), id=1, color=[255, 255, 255]),
+ 2: dict(link=('abdomen1', 'thorax'), id=2, color=[255, 255, 255]),
+ 3: dict(link=('abdomen2', 'abdomen1'), id=3, color=[255, 255, 255]),
+ 4: dict(link=('antbaseL', 'anttipL'), id=4, color=[255, 255, 255]),
+ 5: dict(link=('eyeL', 'antbaseL'), id=5, color=[255, 255, 255]),
+ 6: dict(link=('forelegL2', 'forelegL1'), id=6, color=[255, 255, 255]),
+ 7: dict(link=('forelegL3', 'forelegL2'), id=7, color=[255, 255, 255]),
+ 8: dict(link=('forelegL4', 'forelegL3'), id=8, color=[255, 255, 255]),
+ 9: dict(link=('midlegL2', 'midlegL1'), id=9, color=[255, 255, 255]),
+ 10: dict(link=('midlegL3', 'midlegL2'), id=10, color=[255, 255, 255]),
+ 11: dict(link=('midlegL4', 'midlegL3'), id=11, color=[255, 255, 255]),
+ 12:
+ dict(link=('hindlegL2', 'hindlegL1'), id=12, color=[255, 255, 255]),
+ 13:
+ dict(link=('hindlegL3', 'hindlegL2'), id=13, color=[255, 255, 255]),
+ 14:
+ dict(link=('hindlegL4', 'hindlegL3'), id=14, color=[255, 255, 255]),
+ 15: dict(link=('antbaseR', 'anttipR'), id=15, color=[255, 255, 255]),
+ 16: dict(link=('eyeR', 'antbaseR'), id=16, color=[255, 255, 255]),
+ 17:
+ dict(link=('forelegR2', 'forelegR1'), id=17, color=[255, 255, 255]),
+ 18:
+ dict(link=('forelegR3', 'forelegR2'), id=18, color=[255, 255, 255]),
+ 19:
+ dict(link=('forelegR4', 'forelegR3'), id=19, color=[255, 255, 255]),
+ 20: dict(link=('midlegR2', 'midlegR1'), id=20, color=[255, 255, 255]),
+ 21: dict(link=('midlegR3', 'midlegR2'), id=21, color=[255, 255, 255]),
+ 22: dict(link=('midlegR4', 'midlegR3'), id=22, color=[255, 255, 255]),
+ 23:
+ dict(link=('hindlegR2', 'hindlegR1'), id=23, color=[255, 255, 255]),
+ 24:
+ dict(link=('hindlegR3', 'hindlegR2'), id=24, color=[255, 255, 255]),
+ 25:
+ dict(link=('hindlegR4', 'hindlegR3'), id=25, color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 35,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/macaque.py b/mmpose/configs/_base_/datasets/macaque.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea8dac297ea2f0e36dabccccc021d953216a6ac8
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/macaque.py
@@ -0,0 +1,183 @@
+dataset_info = dict(
+ dataset_name='macaque',
+ paper_info=dict(
+ author='Labuguen, Rollyn and Matsumoto, Jumpei and '
+ 'Negrete, Salvador and Nishimaru, Hiroshi and '
+ 'Nishijo, Hisao and Takada, Masahiko and '
+ 'Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro',
+ title='MacaquePose: A novel "in the wild" macaque monkey pose dataset '
+ 'for markerless motion capture',
+ container='bioRxiv',
+ year='2020',
+ homepage='http://www.pri.kyoto-u.ac.jp/datasets/'
+ 'macaquepose/index.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/mmpose/configs/_base_/datasets/mhp.py b/mmpose/configs/_base_/datasets/mhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..e16e37c79cb63c4352c48bb4e45602b8408f534b
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/mhp.py
@@ -0,0 +1,156 @@
+dataset_info = dict(
+ dataset_name='mhp',
+ paper_info=dict(
+ author='Zhao, Jian and Li, Jianshu and Cheng, Yu and '
+ 'Sim, Terence and Yan, Shuicheng and Feng, Jiashi',
+ title='Understanding humans in crowded scenes: '
+ 'Deep nested adversarial learning and a '
+ 'new benchmark for multi-human parsing',
+ container='Proceedings of the 26th ACM '
+ 'international conference on Multimedia',
+ year='2018',
+ homepage='https://lv-mhp.github.io/dataset',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_ankle',
+ id=0,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 1:
+ dict(
+ name='right_knee',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 2:
+ dict(
+ name='right_hip',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 3:
+ dict(
+ name='left_hip',
+ id=3,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 4:
+ dict(
+ name='left_knee',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 5:
+ dict(
+ name='left_ankle',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 6:
+ dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''),
+ 7:
+ dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(
+ name='upper_neck',
+ id=8,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 9:
+ dict(
+ name='head_top', id=9, color=[51, 153, 255], type='upper',
+ swap=''),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='right_elbow',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 12:
+ dict(
+ name='right_shoulder',
+ id=12,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 13:
+ dict(
+ name='left_shoulder',
+ id=13,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 14:
+ dict(
+ name='left_elbow',
+ id=14,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 15:
+ dict(
+ name='left_wrist',
+ id=15,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
+ 5:
+ dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128,
+ 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]),
+ 14:
+ dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0])
+ },
+ joint_weights=[
+ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
+ ],
+ # Adapted from COCO dataset.
+ sigmas=[
+ 0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026,
+ 0.062, 0.072, 0.179, 0.179, 0.072, 0.062
+ ])
diff --git a/mmpose/configs/_base_/datasets/mpi_inf_3dhp.py b/mmpose/configs/_base_/datasets/mpi_inf_3dhp.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffd0a70297b24456ea38566ac205bb585aa47e5d
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/mpi_inf_3dhp.py
@@ -0,0 +1,132 @@
+dataset_info = dict(
+ dataset_name='mpi_inf_3dhp',
+ paper_info=dict(
+ author='ehta, Dushyant and Rhodin, Helge and Casas, Dan and '
+ 'Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and '
+ 'Theobalt, Christian',
+ title='Monocular 3D Human Pose Estimation In The Wild Using Improved '
+ 'CNN Supervision',
+ container='2017 international conference on 3D vision (3DV)',
+ year='2017',
+ homepage='http://gvv.mpi-inf.mpg.de/3dhp-dataset',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='head_top', id=0, color=[51, 153, 255], type='upper',
+ swap=''),
+ 1:
+ dict(name='neck', id=1, color=[51, 153, 255], type='upper', swap=''),
+ 2:
+ dict(
+ name='right_shoulder',
+ id=2,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 3:
+ dict(
+ name='right_elbow',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 4:
+ dict(
+ name='right_wrist',
+ id=4,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='left_elbow',
+ id=6,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 7:
+ dict(
+ name='left_wrist',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 8:
+ dict(
+ name='right_hip',
+ id=8,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='right_ankle',
+ id=10,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='left_knee',
+ id=12,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 13:
+ dict(
+ name='left_ankle',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 14:
+ dict(name='root', id=14, color=[51, 153, 255], type='lower', swap=''),
+ 15:
+ dict(name='spine', id=15, color=[51, 153, 255], type='upper', swap=''),
+ 16:
+ dict(name='head', id=16, color=[51, 153, 255], type='upper', swap='')
+ },
+ skeleton_info={
+ 0: dict(link=('neck', 'right_shoulder'), id=0, color=[255, 128, 0]),
+ 1: dict(
+ link=('right_shoulder', 'right_elbow'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_elbow', 'right_wrist'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('neck', 'left_shoulder'), id=3, color=[0, 255, 0]),
+ 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6: dict(link=('root', 'right_hip'), id=6, color=[255, 128, 0]),
+ 7: dict(link=('right_hip', 'right_knee'), id=7, color=[255, 128, 0]),
+ 8: dict(link=('right_knee', 'right_ankle'), id=8, color=[255, 128, 0]),
+ 9: dict(link=('root', 'left_hip'), id=9, color=[0, 255, 0]),
+ 10: dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]),
+ 11: dict(link=('left_knee', 'left_ankle'), id=11, color=[0, 255, 0]),
+ 12: dict(link=('head_top', 'head'), id=12, color=[51, 153, 255]),
+ 13: dict(link=('head', 'neck'), id=13, color=[51, 153, 255]),
+ 14: dict(link=('neck', 'spine'), id=14, color=[51, 153, 255]),
+ 15: dict(link=('spine', 'root'), id=15, color=[51, 153, 255])
+ },
+ joint_weights=[1.] * 17,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/mpii.py b/mmpose/configs/_base_/datasets/mpii.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c2a491c7b58bc3eaa5c0056d3d7184bdd1d1cc7
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/mpii.py
@@ -0,0 +1,155 @@
+dataset_info = dict(
+ dataset_name='mpii',
+ paper_info=dict(
+ author='Mykhaylo Andriluka and Leonid Pishchulin and '
+ 'Peter Gehler and Schiele, Bernt',
+ title='2D Human Pose Estimation: New Benchmark and '
+ 'State of the Art Analysis',
+ container='IEEE Conference on Computer Vision and '
+ 'Pattern Recognition (CVPR)',
+ year='2014',
+ homepage='http://human-pose.mpi-inf.mpg.de/',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_ankle',
+ id=0,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 1:
+ dict(
+ name='right_knee',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 2:
+ dict(
+ name='right_hip',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 3:
+ dict(
+ name='left_hip',
+ id=3,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 4:
+ dict(
+ name='left_knee',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 5:
+ dict(
+ name='left_ankle',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 6:
+ dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''),
+ 7:
+ dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''),
+ 8:
+ dict(
+ name='upper_neck',
+ id=8,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 9:
+ dict(
+ name='head_top', id=9, color=[51, 153, 255], type='upper',
+ swap=''),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='right_elbow',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 12:
+ dict(
+ name='right_shoulder',
+ id=12,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 13:
+ dict(
+ name='left_shoulder',
+ id=13,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 14:
+ dict(
+ name='left_elbow',
+ id=14,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 15:
+ dict(
+ name='left_wrist',
+ id=15,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
+ 5:
+ dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128,
+ 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]),
+ 14:
+ dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0])
+ },
+ joint_weights=[
+ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
+ ],
+ # Adapted from COCO dataset.
+ sigmas=[
+ 0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026,
+ 0.062, 0.072, 0.179, 0.179, 0.072, 0.062
+ ])
diff --git a/mmpose/configs/_base_/datasets/mpii_trb.py b/mmpose/configs/_base_/datasets/mpii_trb.py
new file mode 100644
index 0000000000000000000000000000000000000000..73940d4b4827f8e08343c3b517360db788e4820d
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/mpii_trb.py
@@ -0,0 +1,380 @@
+dataset_info = dict(
+ dataset_name='mpii_trb',
+ paper_info=dict(
+ author='Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and '
+ 'Liu, Wentao and Qian, Chen and Ouyang, Wanli',
+ title='TRB: A Novel Triplet Representation for '
+ 'Understanding 2D Human Body',
+ container='Proceedings of the IEEE International '
+ 'Conference on Computer Vision',
+ year='2019',
+ homepage='https://github.com/kennymckormick/'
+ 'Triplet-Representation-of-human-Body',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='left_shoulder',
+ id=0,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 1:
+ dict(
+ name='right_shoulder',
+ id=1,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 2:
+ dict(
+ name='left_elbow',
+ id=2,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 3:
+ dict(
+ name='right_elbow',
+ id=3,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 4:
+ dict(
+ name='left_wrist',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 5:
+ dict(
+ name='right_wrist',
+ id=5,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='right_hip',
+ id=7,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 8:
+ dict(
+ name='left_knee',
+ id=8,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 9:
+ dict(
+ name='right_knee',
+ id=9,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 10:
+ dict(
+ name='left_ankle',
+ id=10,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 11:
+ dict(
+ name='right_ankle',
+ id=11,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 12:
+ dict(name='head', id=12, color=[51, 153, 255], type='upper', swap=''),
+ 13:
+ dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap=''),
+ 14:
+ dict(
+ name='right_neck',
+ id=14,
+ color=[255, 255, 255],
+ type='upper',
+ swap='left_neck'),
+ 15:
+ dict(
+ name='left_neck',
+ id=15,
+ color=[255, 255, 255],
+ type='upper',
+ swap='right_neck'),
+ 16:
+ dict(
+ name='medial_right_shoulder',
+ id=16,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_left_shoulder'),
+ 17:
+ dict(
+ name='lateral_right_shoulder',
+ id=17,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_left_shoulder'),
+ 18:
+ dict(
+ name='medial_right_bow',
+ id=18,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_left_bow'),
+ 19:
+ dict(
+ name='lateral_right_bow',
+ id=19,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_left_bow'),
+ 20:
+ dict(
+ name='medial_right_wrist',
+ id=20,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_left_wrist'),
+ 21:
+ dict(
+ name='lateral_right_wrist',
+ id=21,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_left_wrist'),
+ 22:
+ dict(
+ name='medial_left_shoulder',
+ id=22,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_right_shoulder'),
+ 23:
+ dict(
+ name='lateral_left_shoulder',
+ id=23,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_right_shoulder'),
+ 24:
+ dict(
+ name='medial_left_bow',
+ id=24,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_right_bow'),
+ 25:
+ dict(
+ name='lateral_left_bow',
+ id=25,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_right_bow'),
+ 26:
+ dict(
+ name='medial_left_wrist',
+ id=26,
+ color=[255, 255, 255],
+ type='upper',
+ swap='medial_right_wrist'),
+ 27:
+ dict(
+ name='lateral_left_wrist',
+ id=27,
+ color=[255, 255, 255],
+ type='upper',
+ swap='lateral_right_wrist'),
+ 28:
+ dict(
+ name='medial_right_hip',
+ id=28,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_left_hip'),
+ 29:
+ dict(
+ name='lateral_right_hip',
+ id=29,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_left_hip'),
+ 30:
+ dict(
+ name='medial_right_knee',
+ id=30,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_left_knee'),
+ 31:
+ dict(
+ name='lateral_right_knee',
+ id=31,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_left_knee'),
+ 32:
+ dict(
+ name='medial_right_ankle',
+ id=32,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_left_ankle'),
+ 33:
+ dict(
+ name='lateral_right_ankle',
+ id=33,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_left_ankle'),
+ 34:
+ dict(
+ name='medial_left_hip',
+ id=34,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_right_hip'),
+ 35:
+ dict(
+ name='lateral_left_hip',
+ id=35,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_right_hip'),
+ 36:
+ dict(
+ name='medial_left_knee',
+ id=36,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_right_knee'),
+ 37:
+ dict(
+ name='lateral_left_knee',
+ id=37,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_right_knee'),
+ 38:
+ dict(
+ name='medial_left_ankle',
+ id=38,
+ color=[255, 255, 255],
+ type='lower',
+ swap='medial_right_ankle'),
+ 39:
+ dict(
+ name='lateral_left_ankle',
+ id=39,
+ color=[255, 255, 255],
+ type='lower',
+ swap='lateral_right_ankle'),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('head', 'neck'), id=0, color=[51, 153, 255]),
+ 1:
+ dict(link=('neck', 'left_shoulder'), id=1, color=[51, 153, 255]),
+ 2:
+ dict(link=('neck', 'right_shoulder'), id=2, color=[51, 153, 255]),
+ 3:
+ dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]),
+ 5:
+ dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]),
+ 7:
+ dict(link=('left_shoulder', 'left_hip'), id=7, color=[51, 153, 255]),
+ 8:
+ dict(link=('right_shoulder', 'right_hip'), id=8, color=[51, 153, 255]),
+ 9:
+ dict(link=('left_hip', 'right_hip'), id=9, color=[51, 153, 255]),
+ 10:
+ dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_hip', 'right_knee'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_knee', 'left_ankle'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('right_knee', 'right_ankle'), id=13, color=[255, 128, 0]),
+ 14:
+ dict(link=('right_neck', 'left_neck'), id=14, color=[255, 255, 255]),
+ 15:
+ dict(
+ link=('medial_right_shoulder', 'lateral_right_shoulder'),
+ id=15,
+ color=[255, 255, 255]),
+ 16:
+ dict(
+ link=('medial_right_bow', 'lateral_right_bow'),
+ id=16,
+ color=[255, 255, 255]),
+ 17:
+ dict(
+ link=('medial_right_wrist', 'lateral_right_wrist'),
+ id=17,
+ color=[255, 255, 255]),
+ 18:
+ dict(
+ link=('medial_left_shoulder', 'lateral_left_shoulder'),
+ id=18,
+ color=[255, 255, 255]),
+ 19:
+ dict(
+ link=('medial_left_bow', 'lateral_left_bow'),
+ id=19,
+ color=[255, 255, 255]),
+ 20:
+ dict(
+ link=('medial_left_wrist', 'lateral_left_wrist'),
+ id=20,
+ color=[255, 255, 255]),
+ 21:
+ dict(
+ link=('medial_right_hip', 'lateral_right_hip'),
+ id=21,
+ color=[255, 255, 255]),
+ 22:
+ dict(
+ link=('medial_right_knee', 'lateral_right_knee'),
+ id=22,
+ color=[255, 255, 255]),
+ 23:
+ dict(
+ link=('medial_right_ankle', 'lateral_right_ankle'),
+ id=23,
+ color=[255, 255, 255]),
+ 24:
+ dict(
+ link=('medial_left_hip', 'lateral_left_hip'),
+ id=24,
+ color=[255, 255, 255]),
+ 25:
+ dict(
+ link=('medial_left_knee', 'lateral_left_knee'),
+ id=25,
+ color=[255, 255, 255]),
+ 26:
+ dict(
+ link=('medial_left_ankle', 'lateral_left_ankle'),
+ id=26,
+ color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 40,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/ochuman.py b/mmpose/configs/_base_/datasets/ochuman.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ef20838fe583fde133a97e688d30e91ae562746
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/ochuman.py
@@ -0,0 +1,181 @@
+dataset_info = dict(
+ dataset_name='ochuman',
+ paper_info=dict(
+ author='Zhang, Song-Hai and Li, Ruilong and Dong, Xin and '
+ 'Rosin, Paul and Cai, Zixi and Han, Xi and '
+ 'Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min',
+ title='Pose2seg: Detection free human instance segmentation',
+ container='Proceedings of the IEEE conference on computer '
+ 'vision and pattern recognition',
+ year='2019',
+ homepage='https://github.com/liruilong940607/OCHumanApi',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='left_eye',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 2:
+ dict(
+ name='right_eye',
+ id=2,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
+ 15:
+ dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
+ 16:
+ dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
+ 17:
+ dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
+ 18:
+ dict(
+ link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/mmpose/configs/_base_/datasets/onehand10k.py b/mmpose/configs/_base_/datasets/onehand10k.py
new file mode 100644
index 0000000000000000000000000000000000000000..016770f14f3075dfa7d59389524a0c11a4feb802
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/onehand10k.py
@@ -0,0 +1,142 @@
+dataset_info = dict(
+ dataset_name='onehand10k',
+ paper_info=dict(
+ author='Wang, Yangang and Peng, Cong and Liu, Yebin',
+ title='Mask-pose cascaded cnn for 2d hand pose estimation '
+ 'from single color image',
+ container='IEEE Transactions on Circuits and Systems '
+ 'for Video Technology',
+ year='2018',
+ homepage='https://www.yangangwang.com/papers/WANG-MCC-2018-10.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/panoptic_body3d.py b/mmpose/configs/_base_/datasets/panoptic_body3d.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3b19ac462415a840ca2e0b9e214bdb35d91b5e4
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/panoptic_body3d.py
@@ -0,0 +1,160 @@
+dataset_info = dict(
+ dataset_name='panoptic_pose_3d',
+ paper_info=dict(
+ author='Joo, Hanbyul and Simon, Tomas and Li, Xulong'
+ 'and Liu, Hao and Tan, Lei and Gui, Lin and Banerjee, Sean'
+ 'and Godisart, Timothy and Nabbe, Bart and Matthews, Iain'
+ 'and Kanade, Takeo and Nobuhara, Shohei and Sheikh, Yaser',
+ title='Panoptic Studio: A Massively Multiview System '
+ 'for Interaction Motion Capture',
+ container='IEEE Transactions on Pattern Analysis'
+ ' and Machine Intelligence',
+ year='2017',
+ homepage='http://domedb.perception.cs.cmu.edu',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='neck', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(name='nose', id=1, color=[51, 153, 255], type='upper', swap=''),
+ 2:
+ dict(name='mid_hip', id=2, color=[0, 255, 0], type='lower', swap=''),
+ 3:
+ dict(
+ name='left_shoulder',
+ id=3,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 4:
+ dict(
+ name='left_elbow',
+ id=4,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 5:
+ dict(
+ name='left_wrist',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 6:
+ dict(
+ name='left_hip',
+ id=6,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 7:
+ dict(
+ name='left_knee',
+ id=7,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 8:
+ dict(
+ name='left_ankle',
+ id=8,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 9:
+ dict(
+ name='right_shoulder',
+ id=9,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 10:
+ dict(
+ name='right_elbow',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 11:
+ dict(
+ name='right_wrist',
+ id=11,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='right_knee',
+ id=13,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 14:
+ dict(
+ name='right_ankle',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 15:
+ dict(
+ name='left_eye',
+ id=15,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_eye'),
+ 16:
+ dict(
+ name='left_ear',
+ id=16,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 17:
+ dict(
+ name='right_eye',
+ id=17,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_eye'),
+ 18:
+ dict(
+ name='right_ear',
+ id=18,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear')
+ },
+ skeleton_info={
+ 0: dict(link=('nose', 'neck'), id=0, color=[51, 153, 255]),
+ 1: dict(link=('neck', 'left_shoulder'), id=1, color=[0, 255, 0]),
+ 2: dict(link=('neck', 'right_shoulder'), id=2, color=[255, 128, 0]),
+ 3: dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]),
+ 4: dict(
+ link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]),
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
+ 6:
+ dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]),
+ 7: dict(link=('left_ankle', 'left_knee'), id=7, color=[0, 255, 0]),
+ 8: dict(link=('left_knee', 'left_hip'), id=8, color=[0, 255, 0]),
+ 9: dict(link=('right_ankle', 'right_knee'), id=9, color=[255, 128, 0]),
+ 10: dict(link=('right_knee', 'right_hip'), id=10, color=[255, 128, 0]),
+ 11: dict(link=('mid_hip', 'left_hip'), id=11, color=[0, 255, 0]),
+ 12: dict(link=('mid_hip', 'right_hip'), id=12, color=[255, 128, 0]),
+ 13: dict(link=('mid_hip', 'neck'), id=13, color=[51, 153, 255]),
+ },
+ joint_weights=[
+ 1.0, 1.0, 1.0, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2,
+ 1.5, 1.0, 1.0, 1.0, 1.0
+ ],
+ sigmas=[
+ 0.026, 0.026, 0.107, 0.079, 0.072, 0.062, 0.107, 0.087, 0.089, 0.079,
+ 0.072, 0.062, 0.107, 0.087, 0.089, 0.025, 0.035, 0.025, 0.035
+ ])
diff --git a/mmpose/configs/_base_/datasets/panoptic_hand2d.py b/mmpose/configs/_base_/datasets/panoptic_hand2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a65731ba87b155beb1b40591fd9acb232c2afc6
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/panoptic_hand2d.py
@@ -0,0 +1,143 @@
+dataset_info = dict(
+ dataset_name='panoptic_hand2d',
+ paper_info=dict(
+ author='Simon, Tomas and Joo, Hanbyul and '
+ 'Matthews, Iain and Sheikh, Yaser',
+ title='Hand keypoint detection in single images using '
+ 'multiview bootstrapping',
+ container='Proceedings of the IEEE conference on '
+ 'Computer Vision and Pattern Recognition',
+ year='2017',
+ homepage='http://domedb.perception.cs.cmu.edu/handdb.html',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger1',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger2',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger3',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger4',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/posetrack18.py b/mmpose/configs/_base_/datasets/posetrack18.py
new file mode 100644
index 0000000000000000000000000000000000000000..5aefd1c97fe083df35ee88bebab4f99134c27971
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/posetrack18.py
@@ -0,0 +1,176 @@
+dataset_info = dict(
+ dataset_name='posetrack18',
+ paper_info=dict(
+ author='Andriluka, Mykhaylo and Iqbal, Umar and '
+ 'Insafutdinov, Eldar and Pishchulin, Leonid and '
+ 'Milan, Anton and Gall, Juergen and Schiele, Bernt',
+ title='Posetrack: A benchmark for human pose estimation and tracking',
+ container='Proceedings of the IEEE Conference on '
+ 'Computer Vision and Pattern Recognition',
+ year='2018',
+ homepage='https://posetrack.net/users/download.php',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
+ 1:
+ dict(
+ name='head_bottom',
+ id=1,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 2:
+ dict(
+ name='head_top', id=2, color=[51, 153, 255], type='upper',
+ swap=''),
+ 3:
+ dict(
+ name='left_ear',
+ id=3,
+ color=[51, 153, 255],
+ type='upper',
+ swap='right_ear'),
+ 4:
+ dict(
+ name='right_ear',
+ id=4,
+ color=[51, 153, 255],
+ type='upper',
+ swap='left_ear'),
+ 5:
+ dict(
+ name='left_shoulder',
+ id=5,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 6:
+ dict(
+ name='right_shoulder',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 7:
+ dict(
+ name='left_elbow',
+ id=7,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 8:
+ dict(
+ name='right_elbow',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 9:
+ dict(
+ name='left_wrist',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 10:
+ dict(
+ name='right_wrist',
+ id=10,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 11:
+ dict(
+ name='left_hip',
+ id=11,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 12:
+ dict(
+ name='right_hip',
+ id=12,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 13:
+ dict(
+ name='left_knee',
+ id=13,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 14:
+ dict(
+ name='right_knee',
+ id=14,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 15:
+ dict(
+ name='left_ankle',
+ id=15,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 16:
+ dict(
+ name='right_ankle',
+ id=16,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
+ 1:
+ dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
+ 2:
+ dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
+ 6:
+ dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
+ 7:
+ dict(
+ link=('left_shoulder', 'right_shoulder'),
+ id=7,
+ color=[51, 153, 255]),
+ 8:
+ dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(
+ link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
+ 12:
+ dict(link=('nose', 'head_bottom'), id=12, color=[51, 153, 255]),
+ 13:
+ dict(link=('nose', 'head_top'), id=13, color=[51, 153, 255]),
+ 14:
+ dict(
+ link=('head_bottom', 'left_shoulder'), id=14, color=[51, 153,
+ 255]),
+ 15:
+ dict(
+ link=('head_bottom', 'right_shoulder'),
+ id=15,
+ color=[51, 153, 255])
+ },
+ joint_weights=[
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
+ 1.5
+ ],
+ sigmas=[
+ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
+ 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
+ ])
diff --git a/mmpose/configs/_base_/datasets/rhd2d.py b/mmpose/configs/_base_/datasets/rhd2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..4631ccd03814155b06687e0b1ba2b83404c837fc
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/rhd2d.py
@@ -0,0 +1,151 @@
+dataset_info = dict(
+ dataset_name='rhd2d',
+ paper_info=dict(
+ author='Christian Zimmermann and Thomas Brox',
+ title='Learning to Estimate 3D Hand Pose from Single RGB Images',
+ container='arXiv',
+ year='2017',
+ homepage='https://lmb.informatik.uni-freiburg.de/resources/'
+ 'datasets/RenderedHandposeDataset.en.html',
+ ),
+ # In RHD, 1-4: left thumb [tip to palm], which means the finger is from
+ # tip to palm, so as other fingers. Please refer to
+ # `https://lmb.informatik.uni-freiburg.de/resources/datasets/
+ # RenderedHandpose/README` for details of keypoint definition.
+ # But in COCO-WholeBody-Hand, FreiHand, CMU Panoptic HandDB, it is in
+ # inverse order. Pay attention to this if you want to combine RHD with
+ # other hand datasets to train a single model.
+ # Also, note that 'keypoint_info' will not directly affect the order of
+ # the keypoint in the dataset. It is mostly for visualization & storing
+ # information about flip_pairs.
+ keypoint_info={
+ 0:
+ dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='thumb4', id=1, color=[255, 128, 0], type='', swap=''),
+ 2:
+ dict(name='thumb3', id=2, color=[255, 128, 0], type='', swap=''),
+ 3:
+ dict(name='thumb2', id=3, color=[255, 128, 0], type='', swap=''),
+ 4:
+ dict(name='thumb1', id=4, color=[255, 128, 0], type='', swap=''),
+ 5:
+ dict(
+ name='forefinger4', id=5, color=[255, 153, 255], type='', swap=''),
+ 6:
+ dict(
+ name='forefinger3', id=6, color=[255, 153, 255], type='', swap=''),
+ 7:
+ dict(
+ name='forefinger2', id=7, color=[255, 153, 255], type='', swap=''),
+ 8:
+ dict(
+ name='forefinger1', id=8, color=[255, 153, 255], type='', swap=''),
+ 9:
+ dict(
+ name='middle_finger4',
+ id=9,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 10:
+ dict(
+ name='middle_finger3',
+ id=10,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 11:
+ dict(
+ name='middle_finger2',
+ id=11,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 12:
+ dict(
+ name='middle_finger1',
+ id=12,
+ color=[102, 178, 255],
+ type='',
+ swap=''),
+ 13:
+ dict(
+ name='ring_finger4', id=13, color=[255, 51, 51], type='', swap=''),
+ 14:
+ dict(
+ name='ring_finger3', id=14, color=[255, 51, 51], type='', swap=''),
+ 15:
+ dict(
+ name='ring_finger2', id=15, color=[255, 51, 51], type='', swap=''),
+ 16:
+ dict(
+ name='ring_finger1', id=16, color=[255, 51, 51], type='', swap=''),
+ 17:
+ dict(name='pinky_finger4', id=17, color=[0, 255, 0], type='', swap=''),
+ 18:
+ dict(name='pinky_finger3', id=18, color=[0, 255, 0], type='', swap=''),
+ 19:
+ dict(name='pinky_finger2', id=19, color=[0, 255, 0], type='', swap=''),
+ 20:
+ dict(name='pinky_finger1', id=20, color=[0, 255, 0], type='', swap='')
+ },
+ skeleton_info={
+ 0:
+ dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
+ 3:
+ dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
+ 4:
+ dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
+ 5:
+ dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
+ 6:
+ dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
+ 7:
+ dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
+ 8:
+ dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
+ 9:
+ dict(
+ link=('middle_finger1', 'middle_finger2'),
+ id=9,
+ color=[102, 178, 255]),
+ 10:
+ dict(
+ link=('middle_finger2', 'middle_finger3'),
+ id=10,
+ color=[102, 178, 255]),
+ 11:
+ dict(
+ link=('middle_finger3', 'middle_finger4'),
+ id=11,
+ color=[102, 178, 255]),
+ 12:
+ dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
+ 13:
+ dict(
+ link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
+ 14:
+ dict(
+ link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
+ 15:
+ dict(
+ link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
+ 16:
+ dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
+ 17:
+ dict(
+ link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
+ 18:
+ dict(
+ link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
+ 19:
+ dict(
+ link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
+ },
+ joint_weights=[1.] * 21,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/shelf.py b/mmpose/configs/_base_/datasets/shelf.py
new file mode 100644
index 0000000000000000000000000000000000000000..5fe6e42b3b44e3f65947284efd9ffac58d41d43f
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/shelf.py
@@ -0,0 +1,151 @@
+dataset_info = dict(
+ dataset_name='shelf',
+ paper_info=dict(
+ author='Belagiannis, Vasileios and Amin, Sikandar and Andriluka, '
+ 'Mykhaylo and Schiele, Bernt and Navab, Nassir and Ilic, Slobodan',
+ title='3D Pictorial Structures for Multiple Human Pose Estimation',
+ container='IEEE Computer Society Conference on Computer Vision and '
+ 'Pattern Recognition (CVPR)',
+ year='2014',
+ homepage='http://campar.in.tum.de/Chair/MultiHumanPose',
+ ),
+ keypoint_info={
+ 0:
+ dict(
+ name='right_ankle',
+ id=0,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_ankle'),
+ 1:
+ dict(
+ name='right_knee',
+ id=1,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_knee'),
+ 2:
+ dict(
+ name='right_hip',
+ id=2,
+ color=[255, 128, 0],
+ type='lower',
+ swap='left_hip'),
+ 3:
+ dict(
+ name='left_hip',
+ id=3,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_hip'),
+ 4:
+ dict(
+ name='left_knee',
+ id=4,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_knee'),
+ 5:
+ dict(
+ name='left_ankle',
+ id=5,
+ color=[0, 255, 0],
+ type='lower',
+ swap='right_ankle'),
+ 6:
+ dict(
+ name='right_wrist',
+ id=6,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_wrist'),
+ 7:
+ dict(
+ name='right_elbow',
+ id=7,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_elbow'),
+ 8:
+ dict(
+ name='right_shoulder',
+ id=8,
+ color=[255, 128, 0],
+ type='upper',
+ swap='left_shoulder'),
+ 9:
+ dict(
+ name='left_shoulder',
+ id=9,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_shoulder'),
+ 10:
+ dict(
+ name='left_elbow',
+ id=10,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_elbow'),
+ 11:
+ dict(
+ name='left_wrist',
+ id=11,
+ color=[0, 255, 0],
+ type='upper',
+ swap='right_wrist'),
+ 12:
+ dict(
+ name='bottom_head',
+ id=12,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ 13:
+ dict(
+ name='top_head',
+ id=13,
+ color=[51, 153, 255],
+ type='upper',
+ swap=''),
+ },
+ skeleton_info={
+ 0:
+ dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
+ 1:
+ dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
+ 2:
+ dict(link=('left_hip', 'left_knee'), id=2, color=[0, 255, 0]),
+ 3:
+ dict(link=('left_knee', 'left_ankle'), id=3, color=[0, 255, 0]),
+ 4:
+ dict(link=('right_hip', 'left_hip'), id=4, color=[51, 153, 255]),
+ 5:
+ dict(link=('right_wrist', 'right_elbow'), id=5, color=[255, 128, 0]),
+ 6:
+ dict(
+ link=('right_elbow', 'right_shoulder'), id=6, color=[255, 128, 0]),
+ 7:
+ dict(link=('left_shoulder', 'left_elbow'), id=7, color=[0, 255, 0]),
+ 8:
+ dict(link=('left_elbow', 'left_wrist'), id=8, color=[0, 255, 0]),
+ 9:
+ dict(link=('right_hip', 'right_shoulder'), id=9, color=[255, 128, 0]),
+ 10:
+ dict(link=('left_hip', 'left_shoulder'), id=10, color=[0, 255, 0]),
+ 11:
+ dict(
+ link=('right_shoulder', 'bottom_head'), id=11, color=[255, 128,
+ 0]),
+ 12:
+ dict(link=('left_shoulder', 'bottom_head'), id=12, color=[0, 255, 0]),
+ 13:
+ dict(link=('bottom_head', 'top_head'), id=13, color=[51, 153, 255]),
+ },
+ joint_weights=[
+ 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.5, 1.2, 1.0, 1.0, 1.2, 1.5, 1.0, 1.0
+ ],
+ sigmas=[
+ 0.089, 0.087, 0.107, 0.107, 0.087, 0.089, 0.062, 0.072, 0.079, 0.079,
+ 0.072, 0.062, 0.026, 0.026
+ ])
diff --git a/mmpose/configs/_base_/datasets/wflw.py b/mmpose/configs/_base_/datasets/wflw.py
new file mode 100644
index 0000000000000000000000000000000000000000..80c29b696cf5031d8f21d7d8ed7e573043666f35
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/wflw.py
@@ -0,0 +1,192 @@
+dataset_info = dict(
+ dataset_name='wflw',
+ paper_info=dict(
+ author='Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, '
+ 'Quan and Cai, Yici and Zhou, Qiang',
+ title='Look at boundary: A boundary-aware face alignment algorithm',
+ container='Proceedings of the IEEE conference on computer '
+ 'vision and pattern recognition',
+ year='2018',
+ homepage='https://wywu.github.io/projects/LAB/WFLW.html',
+ ),
+ keypoint_info={
+ 0: dict(name='kpt-0', id=0, color=[255, 0, 0], type='', swap='kpt-32'),
+ 1: dict(name='kpt-1', id=1, color=[255, 0, 0], type='', swap='kpt-31'),
+ 2: dict(name='kpt-2', id=2, color=[255, 0, 0], type='', swap='kpt-30'),
+ 3: dict(name='kpt-3', id=3, color=[255, 0, 0], type='', swap='kpt-29'),
+ 4: dict(name='kpt-4', id=4, color=[255, 0, 0], type='', swap='kpt-28'),
+ 5: dict(name='kpt-5', id=5, color=[255, 0, 0], type='', swap='kpt-27'),
+ 6: dict(name='kpt-6', id=6, color=[255, 0, 0], type='', swap='kpt-26'),
+ 7: dict(name='kpt-7', id=7, color=[255, 0, 0], type='', swap='kpt-25'),
+ 8: dict(name='kpt-8', id=8, color=[255, 0, 0], type='', swap='kpt-24'),
+ 9: dict(name='kpt-9', id=9, color=[255, 0, 0], type='', swap='kpt-23'),
+ 10:
+ dict(name='kpt-10', id=10, color=[255, 0, 0], type='', swap='kpt-22'),
+ 11:
+ dict(name='kpt-11', id=11, color=[255, 0, 0], type='', swap='kpt-21'),
+ 12:
+ dict(name='kpt-12', id=12, color=[255, 0, 0], type='', swap='kpt-20'),
+ 13:
+ dict(name='kpt-13', id=13, color=[255, 0, 0], type='', swap='kpt-19'),
+ 14:
+ dict(name='kpt-14', id=14, color=[255, 0, 0], type='', swap='kpt-18'),
+ 15:
+ dict(name='kpt-15', id=15, color=[255, 0, 0], type='', swap='kpt-17'),
+ 16: dict(name='kpt-16', id=16, color=[255, 0, 0], type='', swap=''),
+ 17:
+ dict(name='kpt-17', id=17, color=[255, 0, 0], type='', swap='kpt-15'),
+ 18:
+ dict(name='kpt-18', id=18, color=[255, 0, 0], type='', swap='kpt-14'),
+ 19:
+ dict(name='kpt-19', id=19, color=[255, 0, 0], type='', swap='kpt-13'),
+ 20:
+ dict(name='kpt-20', id=20, color=[255, 0, 0], type='', swap='kpt-12'),
+ 21:
+ dict(name='kpt-21', id=21, color=[255, 0, 0], type='', swap='kpt-11'),
+ 22:
+ dict(name='kpt-22', id=22, color=[255, 0, 0], type='', swap='kpt-10'),
+ 23:
+ dict(name='kpt-23', id=23, color=[255, 0, 0], type='', swap='kpt-9'),
+ 24:
+ dict(name='kpt-24', id=24, color=[255, 0, 0], type='', swap='kpt-8'),
+ 25:
+ dict(name='kpt-25', id=25, color=[255, 0, 0], type='', swap='kpt-7'),
+ 26:
+ dict(name='kpt-26', id=26, color=[255, 0, 0], type='', swap='kpt-6'),
+ 27:
+ dict(name='kpt-27', id=27, color=[255, 0, 0], type='', swap='kpt-5'),
+ 28:
+ dict(name='kpt-28', id=28, color=[255, 0, 0], type='', swap='kpt-4'),
+ 29:
+ dict(name='kpt-29', id=29, color=[255, 0, 0], type='', swap='kpt-3'),
+ 30:
+ dict(name='kpt-30', id=30, color=[255, 0, 0], type='', swap='kpt-2'),
+ 31:
+ dict(name='kpt-31', id=31, color=[255, 0, 0], type='', swap='kpt-1'),
+ 32:
+ dict(name='kpt-32', id=32, color=[255, 0, 0], type='', swap='kpt-0'),
+ 33:
+ dict(name='kpt-33', id=33, color=[255, 0, 0], type='', swap='kpt-46'),
+ 34:
+ dict(name='kpt-34', id=34, color=[255, 0, 0], type='', swap='kpt-45'),
+ 35:
+ dict(name='kpt-35', id=35, color=[255, 0, 0], type='', swap='kpt-44'),
+ 36:
+ dict(name='kpt-36', id=36, color=[255, 0, 0], type='', swap='kpt-43'),
+ 37: dict(
+ name='kpt-37', id=37, color=[255, 0, 0], type='', swap='kpt-42'),
+ 38: dict(
+ name='kpt-38', id=38, color=[255, 0, 0], type='', swap='kpt-50'),
+ 39: dict(
+ name='kpt-39', id=39, color=[255, 0, 0], type='', swap='kpt-49'),
+ 40: dict(
+ name='kpt-40', id=40, color=[255, 0, 0], type='', swap='kpt-48'),
+ 41: dict(
+ name='kpt-41', id=41, color=[255, 0, 0], type='', swap='kpt-47'),
+ 42: dict(
+ name='kpt-42', id=42, color=[255, 0, 0], type='', swap='kpt-37'),
+ 43: dict(
+ name='kpt-43', id=43, color=[255, 0, 0], type='', swap='kpt-36'),
+ 44: dict(
+ name='kpt-44', id=44, color=[255, 0, 0], type='', swap='kpt-35'),
+ 45: dict(
+ name='kpt-45', id=45, color=[255, 0, 0], type='', swap='kpt-34'),
+ 46: dict(
+ name='kpt-46', id=46, color=[255, 0, 0], type='', swap='kpt-33'),
+ 47: dict(
+ name='kpt-47', id=47, color=[255, 0, 0], type='', swap='kpt-41'),
+ 48: dict(
+ name='kpt-48', id=48, color=[255, 0, 0], type='', swap='kpt-40'),
+ 49: dict(
+ name='kpt-49', id=49, color=[255, 0, 0], type='', swap='kpt-39'),
+ 50: dict(
+ name='kpt-50', id=50, color=[255, 0, 0], type='', swap='kpt-38'),
+ 51: dict(name='kpt-51', id=51, color=[255, 0, 0], type='', swap=''),
+ 52: dict(name='kpt-52', id=52, color=[255, 0, 0], type='', swap=''),
+ 53: dict(name='kpt-53', id=53, color=[255, 0, 0], type='', swap=''),
+ 54: dict(name='kpt-54', id=54, color=[255, 0, 0], type='', swap=''),
+ 55: dict(
+ name='kpt-55', id=55, color=[255, 0, 0], type='', swap='kpt-59'),
+ 56: dict(
+ name='kpt-56', id=56, color=[255, 0, 0], type='', swap='kpt-58'),
+ 57: dict(name='kpt-57', id=57, color=[255, 0, 0], type='', swap=''),
+ 58: dict(
+ name='kpt-58', id=58, color=[255, 0, 0], type='', swap='kpt-56'),
+ 59: dict(
+ name='kpt-59', id=59, color=[255, 0, 0], type='', swap='kpt-55'),
+ 60: dict(
+ name='kpt-60', id=60, color=[255, 0, 0], type='', swap='kpt-72'),
+ 61: dict(
+ name='kpt-61', id=61, color=[255, 0, 0], type='', swap='kpt-71'),
+ 62: dict(
+ name='kpt-62', id=62, color=[255, 0, 0], type='', swap='kpt-70'),
+ 63: dict(
+ name='kpt-63', id=63, color=[255, 0, 0], type='', swap='kpt-69'),
+ 64: dict(
+ name='kpt-64', id=64, color=[255, 0, 0], type='', swap='kpt-68'),
+ 65: dict(
+ name='kpt-65', id=65, color=[255, 0, 0], type='', swap='kpt-75'),
+ 66: dict(
+ name='kpt-66', id=66, color=[255, 0, 0], type='', swap='kpt-74'),
+ 67: dict(
+ name='kpt-67', id=67, color=[255, 0, 0], type='', swap='kpt-73'),
+ 68: dict(
+ name='kpt-68', id=68, color=[255, 0, 0], type='', swap='kpt-64'),
+ 69: dict(
+ name='kpt-69', id=69, color=[255, 0, 0], type='', swap='kpt-63'),
+ 70: dict(
+ name='kpt-70', id=70, color=[255, 0, 0], type='', swap='kpt-62'),
+ 71: dict(
+ name='kpt-71', id=71, color=[255, 0, 0], type='', swap='kpt-61'),
+ 72: dict(
+ name='kpt-72', id=72, color=[255, 0, 0], type='', swap='kpt-60'),
+ 73: dict(
+ name='kpt-73', id=73, color=[255, 0, 0], type='', swap='kpt-67'),
+ 74: dict(
+ name='kpt-74', id=74, color=[255, 0, 0], type='', swap='kpt-66'),
+ 75: dict(
+ name='kpt-75', id=75, color=[255, 0, 0], type='', swap='kpt-65'),
+ 76: dict(
+ name='kpt-76', id=76, color=[255, 0, 0], type='', swap='kpt-82'),
+ 77: dict(
+ name='kpt-77', id=77, color=[255, 0, 0], type='', swap='kpt-81'),
+ 78: dict(
+ name='kpt-78', id=78, color=[255, 0, 0], type='', swap='kpt-80'),
+ 79: dict(name='kpt-79', id=79, color=[255, 0, 0], type='', swap=''),
+ 80: dict(
+ name='kpt-80', id=80, color=[255, 0, 0], type='', swap='kpt-78'),
+ 81: dict(
+ name='kpt-81', id=81, color=[255, 0, 0], type='', swap='kpt-77'),
+ 82: dict(
+ name='kpt-82', id=82, color=[255, 0, 0], type='', swap='kpt-76'),
+ 83: dict(
+ name='kpt-83', id=83, color=[255, 0, 0], type='', swap='kpt-87'),
+ 84: dict(
+ name='kpt-84', id=84, color=[255, 0, 0], type='', swap='kpt-86'),
+ 85: dict(name='kpt-85', id=85, color=[255, 0, 0], type='', swap=''),
+ 86: dict(
+ name='kpt-86', id=86, color=[255, 0, 0], type='', swap='kpt-84'),
+ 87: dict(
+ name='kpt-87', id=87, color=[255, 0, 0], type='', swap='kpt-83'),
+ 88: dict(
+ name='kpt-88', id=88, color=[255, 0, 0], type='', swap='kpt-92'),
+ 89: dict(
+ name='kpt-89', id=89, color=[255, 0, 0], type='', swap='kpt-91'),
+ 90: dict(name='kpt-90', id=90, color=[255, 0, 0], type='', swap=''),
+ 91: dict(
+ name='kpt-91', id=91, color=[255, 0, 0], type='', swap='kpt-89'),
+ 92: dict(
+ name='kpt-92', id=92, color=[255, 0, 0], type='', swap='kpt-88'),
+ 93: dict(
+ name='kpt-93', id=93, color=[255, 0, 0], type='', swap='kpt-95'),
+ 94: dict(name='kpt-94', id=94, color=[255, 0, 0], type='', swap=''),
+ 95: dict(
+ name='kpt-95', id=95, color=[255, 0, 0], type='', swap='kpt-93'),
+ 96: dict(
+ name='kpt-96', id=96, color=[255, 0, 0], type='', swap='kpt-97'),
+ 97: dict(
+ name='kpt-97', id=97, color=[255, 0, 0], type='', swap='kpt-96')
+ },
+ skeleton_info={},
+ joint_weights=[1.] * 98,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/datasets/zebra.py b/mmpose/configs/_base_/datasets/zebra.py
new file mode 100644
index 0000000000000000000000000000000000000000..eac71f796a761bbf87b123f8b7b8b4585df0c525
--- /dev/null
+++ b/mmpose/configs/_base_/datasets/zebra.py
@@ -0,0 +1,64 @@
+dataset_info = dict(
+ dataset_name='zebra',
+ paper_info=dict(
+ author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and '
+ 'Li, Liang and Koger, Benjamin and Costelloe, Blair R and '
+ 'Couzin, Iain D',
+ title='DeepPoseKit, a software toolkit for fast and robust '
+ 'animal pose estimation using deep learning',
+ container='Elife',
+ year='2019',
+ homepage='https://github.com/jgraving/DeepPoseKit-Data',
+ ),
+ keypoint_info={
+ 0:
+ dict(name='snout', id=0, color=[255, 255, 255], type='', swap=''),
+ 1:
+ dict(name='head', id=1, color=[255, 255, 255], type='', swap=''),
+ 2:
+ dict(name='neck', id=2, color=[255, 255, 255], type='', swap=''),
+ 3:
+ dict(
+ name='forelegL1',
+ id=3,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegR1'),
+ 4:
+ dict(
+ name='forelegR1',
+ id=4,
+ color=[255, 255, 255],
+ type='',
+ swap='forelegL1'),
+ 5:
+ dict(
+ name='hindlegL1',
+ id=5,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegR1'),
+ 6:
+ dict(
+ name='hindlegR1',
+ id=6,
+ color=[255, 255, 255],
+ type='',
+ swap='hindlegL1'),
+ 7:
+ dict(name='tailbase', id=7, color=[255, 255, 255], type='', swap=''),
+ 8:
+ dict(name='tailtip', id=8, color=[255, 255, 255], type='', swap='')
+ },
+ skeleton_info={
+ 0: dict(link=('head', 'snout'), id=0, color=[255, 255, 255]),
+ 1: dict(link=('neck', 'head'), id=1, color=[255, 255, 255]),
+ 2: dict(link=('forelegL1', 'neck'), id=2, color=[255, 255, 255]),
+ 3: dict(link=('forelegR1', 'neck'), id=3, color=[255, 255, 255]),
+ 4: dict(link=('hindlegL1', 'tailbase'), id=4, color=[255, 255, 255]),
+ 5: dict(link=('hindlegR1', 'tailbase'), id=5, color=[255, 255, 255]),
+ 6: dict(link=('tailbase', 'neck'), id=6, color=[255, 255, 255]),
+ 7: dict(link=('tailtip', 'tailbase'), id=7, color=[255, 255, 255])
+ },
+ joint_weights=[1.] * 9,
+ sigmas=[])
diff --git a/mmpose/configs/_base_/default_runtime.py b/mmpose/configs/_base_/default_runtime.py
new file mode 100644
index 0000000000000000000000000000000000000000..561d574fa757fa295f349394bf57047a2d8b576d
--- /dev/null
+++ b/mmpose/configs/_base_/default_runtime.py
@@ -0,0 +1,49 @@
+default_scope = 'mmpose'
+
+# hooks
+default_hooks = dict(
+ timer=dict(type='IterTimerHook'),
+ logger=dict(type='LoggerHook', interval=50),
+ param_scheduler=dict(type='ParamSchedulerHook'),
+ checkpoint=dict(type='CheckpointHook', interval=10),
+ sampler_seed=dict(type='DistSamplerSeedHook'),
+ visualization=dict(type='PoseVisualizationHook', enable=False),
+)
+
+# custom hooks
+custom_hooks = [
+ # Synchronize model buffers such as running_mean and running_var in BN
+ # at the end of each epoch
+ dict(type='SyncBuffersHook')
+]
+
+# multi-processing backend
+env_cfg = dict(
+ cudnn_benchmark=False,
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
+ dist_cfg=dict(backend='nccl'),
+)
+
+# visualizer
+vis_backends = [
+ dict(type='LocalVisBackend'),
+ # dict(type='TensorboardVisBackend'),
+ # dict(type='WandbVisBackend'),
+]
+visualizer = dict(
+ type='PoseLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+
+# logger
+log_processor = dict(
+ type='LogProcessor', window_size=50, by_epoch=True, num_digits=6)
+log_level = 'INFO'
+load_from = None
+resume = False
+
+# file I/O backend
+backend_args = dict(backend='local')
+
+# training/validation/testing progress
+train_cfg = dict(by_epoch=True)
+val_cfg = dict()
+test_cfg = dict()
diff --git a/mmpose/configs/animal_2d_keypoint/README.md b/mmpose/configs/animal_2d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..efcc3841a51c20d776360c99eccfaeb94247ff0d
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/README.md
@@ -0,0 +1,20 @@
+# 2D Animal Keypoint Detection
+
+2D animal keypoint detection (animal pose estimation) aims to detect the key-point of different species, including rats,
+dogs, macaques, and cheetah. It provides detailed behavioral analysis for neuroscience, medical and ecology applications.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_animal_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [DEMO](/demo/docs/en/2d_animal_demo.md) to generate fancy demos.
+
+
+
+
+
+
+
+
diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/README.md b/mmpose/configs/animal_2d_keypoint/rtmpose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbb103e36c5c9e66292904d15c8db467ce18f3b4
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/rtmpose/README.md
@@ -0,0 +1,16 @@
+# RTMPose
+
+Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency.
+In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose.
+Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries.
+To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device.
+
+## Results and Models
+
+### AP-10K Dataset
+
+Results on AP-10K validation set
+
+| Model | Input Size | AP | Details and Download |
+| :-------: | :--------: | :---: | :------------------------------------------: |
+| RTMPose-m | 256x256 | 0.722 | [rtmpose_cp10k.md](./ap10k/rtmpose_ap10k.md) |
diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..46dbfbef9fb42e15188d2ea8ae1763e84ff05d78
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py
@@ -0,0 +1,246 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'AP10KDataset'
+data_mode = 'topdown'
+data_root = 'data/ap10k/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/pose/ap10k/',
+# f'{data_root}': 's3://openmmlab/datasets/pose/ap10k/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-train-split1.json',
+ data_prefix=dict(img='data/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-val-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-test-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-val-split1.json')
+test_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-test-split1.json')
diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.md b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..4d035a372572aaac93ff980acbb367a1cc6a5efa
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.md
@@ -0,0 +1,25 @@
+
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+Results on AP-10K validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
+| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
+| [rtmpose-m](/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.722 | 0.939 | 0.788 | 0.569 | 0.728 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.yml b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0441d9e65faa6b0274f9152ff31ef1b66a112214
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose_ap10k.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/animal_2d_keypoint/rtmpose/ap10k/rtmpose-m_8xb64-210e_ap10k-256x256.py
+ In Collection: RTMPose
+ Alias: animal
+ Metadata:
+ Architecture:
+ - RTMPose
+ Training Data: AP-10K
+ Name: rtmpose-m_8xb64-210e_ap10k-256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.722
+ AP@0.5: 0.939
+ AP@0.75: 0.788
+ AP (L): 0.728
+ AP (M): 0.569
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-ap10k_pt-aic-coco_210e-256x256-7a041aa1_20230206.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b4f8e366ff378831724f970e4ef2245f9f4b4468
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/README.md
@@ -0,0 +1,54 @@
+# Top-down heatmap-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
+likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html).
+
+
+

+
+
+## Results and Models
+
+### Animal-Pose Dataset
+
+Results on AnimalPose validation set (1117 instances)
+
+| Model | Input Size | AP | AR | Details and Download |
+| :--------: | :--------: | :---: | :---: | :-------------------------------------------------------: |
+| HRNet-w32 | 256x256 | 0.740 | 0.780 | [hrnet_animalpose.md](./animalpose/hrnet_animalpose.md) |
+| HRNet-w48 | 256x256 | 0.738 | 0.778 | [hrnet_animalpose.md](./animalpose/hrnet_animalpose.md) |
+| ResNet-152 | 256x256 | 0.704 | 0.748 | [resnet_animalpose.md](./animalpose/resnet_animalpose.md) |
+| ResNet-101 | 256x256 | 0.696 | 0.736 | [resnet_animalpose.md](./animalpose/resnet_animalpose.md) |
+| ResNet-50 | 256x256 | 0.691 | 0.736 | [resnet_animalpose.md](./animalpose/resnet_animalpose.md) |
+
+### AP-10K Dataset
+
+Results on AP-10K validation set
+
+| Model | Input Size | AP | Details and Download |
+| :--------: | :--------: | :---: | :--------------------------------------------------: |
+| HRNet-w48 | 256x256 | 0.728 | [hrnet_ap10k.md](./ap10k/hrnet_ap10k.md) |
+| HRNet-w32 | 256x256 | 0.722 | [hrnet_ap10k.md](./ap10k/hrnet_ap10k.md) |
+| ResNet-101 | 256x256 | 0.681 | [resnet_ap10k.md](./ap10k/resnet_ap10k.md) |
+| ResNet-50 | 256x256 | 0.680 | [resnet_ap10k.md](./ap10k/resnet_ap10k.md) |
+| CSPNeXt-m | 256x256 | 0.703 | [cspnext_udp_ap10k.md](./ap10k/cspnext_udp_ap10k.md) |
+
+### Desert Locust Dataset
+
+Results on Desert Locust test set
+
+| Model | Input Size | AUC | EPE | Details and Download |
+| :--------: | :--------: | :---: | :--: | :-------------------------------------------: |
+| ResNet-152 | 160x160 | 0.925 | 1.49 | [resnet_locust.md](./locust/resnet_locust.md) |
+| ResNet-101 | 160x160 | 0.907 | 2.03 | [resnet_locust.md](./locust/resnet_locust.md) |
+| ResNet-50 | 160x160 | 0.900 | 2.27 | [resnet_locust.md](./locust/resnet_locust.md) |
+
+### Grévy’s Zebra Dataset
+
+Results on Grévy’s Zebra test set
+
+| Model | Input Size | AUC | EPE | Details and Download |
+| :--------: | :--------: | :---: | :--: | :----------------------------------------: |
+| ResNet-152 | 160x160 | 0.921 | 1.67 | [resnet_zebra.md](./zebra/resnet_zebra.md) |
+| ResNet-101 | 160x160 | 0.915 | 1.83 | [resnet_zebra.md](./zebra/resnet_zebra.md) |
+| ResNet-50 | 160x160 | 0.914 | 1.87 | [resnet_zebra.md](./zebra/resnet_zebra.md) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..58b971313fbf4a446a2c9720ac0a687fcc956513
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.md
@@ -0,0 +1,40 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+Animal-Pose (ICCV'2019)
+
+```bibtex
+@InProceedings{Cao_2019_ICCV,
+ author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
+ title = {Cross-Domain Adaptation for Animal Pose Estimation},
+ booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
+ month = {October},
+ year = {2019}
+}
+```
+
+
+
+Results on AnimalPose validation set (1117 instances)
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hrnet_w32](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.740 | 0.959 | 0.833 | 0.780 | 0.965 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256_20210426.log.json) |
+| [pose_hrnet_w48](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.738 | 0.958 | 0.831 | 0.778 | 0.962 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256_20210426.log.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..caba133370aafff30be90aa9171f8df66fefe7f4
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/hrnet_animalpose.yml
@@ -0,0 +1,34 @@
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: Animal-Pose
+ Name: td-hm_hrnet-w32_8xb64-210e_animalpose-256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.740
+ AP@0.5: 0.959
+ AP@0.75: 0.833
+ AR: 0.780
+ AR@0.5: 0.965
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: Animal-Pose
+ Name: td-hm_hrnet-w48_8xb64-210e_animalpose-256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.738
+ AP@0.5: 0.958
+ AP@0.75: 0.831
+ AR: 0.778
+ AR@0.5: 0.962
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..20ddf54031e18f8bb9150fccfccff1f6cd5949bf
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.md
@@ -0,0 +1,41 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Animal-Pose (ICCV'2019)
+
+```bibtex
+@InProceedings{Cao_2019_ICCV,
+ author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
+ title = {Cross-Domain Adaptation for Animal Pose Estimation},
+ booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
+ month = {October},
+ year = {2019}
+}
+```
+
+
+
+Results on AnimalPose validation set (1117 instances)
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.691 | 0.947 | 0.770 | 0.736 | 0.955 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256_20210426.log.json) |
+| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py) | 256x256 | 0.696 | 0.948 | 0.774 | 0.736 | 0.951 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256_20210426.log.json) |
+| [pose_resnet_152](/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py) | 256x256 | 0.704 | 0.938 | 0.786 | 0.748 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256_20210426.log.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..345c13c138aafccb5ce1be0ea4136634327248c8
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/resnet_animalpose.yml
@@ -0,0 +1,51 @@
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: Animal-Pose
+ Name: td-hm_res50_8xb64-210e_animalpose-256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.691
+ AP@0.5: 0.947
+ AP@0.75: 0.770
+ AR: 0.736
+ AR@0.5: 0.955
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Animal-Pose
+ Name: td-hm_res101_8xb64-210e_animalpose-256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.696
+ AP@0.5: 0.948
+ AP@0.75: 0.774
+ AR: 0.736
+ AR@0.5: 0.951
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Animal-Pose
+ Name: td-hm_res152_8xb32-210e_animalpose-256x256
+ Results:
+ - Dataset: Animal-Pose
+ Metrics:
+ AP: 0.704
+ AP@0.5: 0.938
+ AP@0.75: 0.786
+ AR: 0.748
+ AR@0.5: 0.946
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..2680fe8956e7b1cbf186b1c536204917478d721f
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w32_8xb64-210e_animalpose-256x256.py
@@ -0,0 +1,147 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=20,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AnimalPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/animalpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d4a76d8f506c60493ef7e476cb5ed3310044ba2
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_hrnet-w48_8xb64-210e_animalpose-256x256.py
@@ -0,0 +1,147 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=20,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AnimalPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/animalpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ffaabb06f160fb66260507db057686f4621b6b2
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res101_8xb64-210e_animalpose-256x256.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=20,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AnimalPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/animalpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ed92929c9d42fa0caad87a5f6292f75745bd0bf
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res152_8xb32-210e_animalpose-256x256.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=20,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AnimalPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/animalpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c053c8881461de72345478da49293a6ca96c1ed4
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/animalpose/td-hm_res50_8xb64-210e_animalpose-256x256.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=20,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AnimalPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/animalpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/animalpose_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric', ann_file=data_root + 'annotations/animalpose_val.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa3139a71ac9dfa7e50bb742760b42b33178641d
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
@@ -0,0 +1,220 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'AP10KDataset'
+data_mode = 'topdown'
+data_root = 'data/ap10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-train-split1.json',
+ data_prefix=dict(img='data/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-val-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-test-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-val-split1.json')
+test_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-test-split1.json')
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..fb10359685ecf9b546093b402c292c4c8a8ba0a9
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.md
@@ -0,0 +1,58 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+Results on AP-10K validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
+| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
+| [pose_cspnext_m](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.703 | 0.944 | 0.776 | 0.513 | 0.710 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8fedc88374a9c027ed3f3268a42b5eed24a980f0
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext_udp_ap10k.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/cspnext-m_udp_8xb64-210e_ap10k-256x256.py
+ In Collection: UDP
+ Metadata:
+ Architecture: &id001
+ - UDP
+ - HRNet
+ Training Data: AP-10K
+ Name: cspnext-m_udp_8xb64-210e_ap10k-256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.703
+ AP@0.5: 0.944
+ AP@0.75: 0.776
+ AP (L): 0.71
+ AP (M): 0.513
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-ap10k_pt-in1k_210e-256x256-1f2d947a_20230123.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..fbdd2cbf9f54807a8fcc00adc31f5839fcf94ea1
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.md
@@ -0,0 +1,41 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+Results on AP-10K validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
+| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
+| [pose_hrnet_w32](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.722 | 0.935 | 0.789 | 0.557 | 0.729 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.log.json) |
+| [pose_hrnet_w48](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.728 | 0.936 | 0.802 | 0.577 | 0.735 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.log.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..38aade8349ed34a214139574c0e83ae67b37e630
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/hrnet_ap10k.yml
@@ -0,0 +1,34 @@
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: AP-10K
+ Name: td-hm_hrnet-w32_8xb64-210e_ap10k-256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.722
+ AP@0.5: 0.935
+ AP@0.75: 0.789
+ AP (L): 0.729
+ AP (M): 0.557
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: AP-10K
+ Name: td-hm_hrnet-w48_8xb64-210e_ap10k-256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.728
+ AP@0.5: 0.936
+ AP@0.75: 0.802
+ AP (L): 0.735
+ AP (M): 0.577
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..11ad6ed033516732bb921f0b32ed1d7336e6517b
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.md
@@ -0,0 +1,41 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+AP-10K (NeurIPS'2021)
+
+```bibtex
+@misc{yu2021ap10k,
+ title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
+ author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
+ year={2021},
+ eprint={2108.12617},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+Results on AP-10K validation set
+
+| Arch | Input Size | AP | AP50 | AP75 | APM | APL | ckpt | log |
+| :----------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :------------: | :------------: | :-----------------------------------------: | :----------------------------------------: |
+| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.680 | 0.926 | 0.738 | 0.552 | 0.687 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.log.json) |
+| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py) | 256x256 | 0.681 | 0.921 | 0.751 | 0.545 | 0.690 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.log.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..84cc4156b9447dacdf0554851602cc2c907814c9
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/resnet_ap10k.yml
@@ -0,0 +1,35 @@
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: AP-10K
+ Name: td-hm_res50_8xb64-210e_ap10k-256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.680
+ AP@0.5: 0.926
+ AP@0.75: 0.738
+ AP (L): 0.687
+ AP (M): 0.552
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: AP-10K
+ Name: td-hm_res101_8xb64-210e_ap10k-256x256
+ Results:
+ - Dataset: AP-10K
+ Metrics:
+ AP: 0.681
+ AP@0.5: 0.921
+ AP@0.75: 0.751
+ AP (L): 0.690
+ AP (M): 0.545
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c61e6384aeea7efcca3ac2f2268fef01663e3234
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w32_8xb64-210e_ap10k-256x256.py
@@ -0,0 +1,164 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AP10KDataset'
+data_mode = 'topdown'
+data_root = 'data/ap10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-train-split1.json',
+ data_prefix=dict(img='data/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-val-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-test-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-val-split1.json')
+test_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-test-split1.json')
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..146114a887663a230f7a504e83f13da6fa4a2571
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_hrnet-w48_8xb64-210e_ap10k-256x256.py
@@ -0,0 +1,164 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AP10KDataset'
+data_mode = 'topdown'
+data_root = 'data/ap10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-train-split1.json',
+ data_prefix=dict(img='data/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-val-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-test-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-val-split1.json')
+test_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-test-split1.json')
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..be49577511584f892cc4c82797207e8ee1d6a8b4
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res101_8xb64-210e_ap10k-256x256.py
@@ -0,0 +1,135 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AP10KDataset'
+data_mode = 'topdown'
+data_root = 'data/ap10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-train-split1.json',
+ data_prefix=dict(img='data/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-val-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-test-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-val-split1.json')
+test_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-test-split1.json')
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..2172cbe938506ae2faa08ed731710e51203d579f
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/ap10k/td-hm_res50_8xb64-210e_ap10k-256x256.py
@@ -0,0 +1,135 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AP10KDataset'
+data_mode = 'topdown'
+data_root = 'data/ap10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-train-split1.json',
+ data_prefix=dict(img='data/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-val-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/ap10k-test-split1.json',
+ data_prefix=dict(img='data/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-val-split1.json')
+test_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/ap10k-test-split1.json')
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.md
new file mode 100644
index 0000000000000000000000000000000000000000..bb7c8374926f95a1e726973f2bddc8af04a702ed
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.md
@@ -0,0 +1,43 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Desert Locust (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
+
+Results on Desert Locust test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py) | 160x160 | 1.000 | 0.900 | 2.27 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160_20210407.log.json) |
+| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py) | 160x160 | 1.000 | 0.907 | 2.03 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160_20210407.log.json) |
+| [pose_resnet_152](/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py) | 160x160 | 1.000 | 0.925 | 1.49 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160_20210407.log.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c7d174fafc5136953beebf9b0dbc8dda5a800199
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/resnet_locust.yml
@@ -0,0 +1,45 @@
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: Desert Locust
+ Name: td-hm_res50_8xb64-210e_locust-160x160
+ Results:
+ - Dataset: Desert Locust
+ Metrics:
+ AUC: 0.9
+ EPE: 2.27
+ PCK@0.2: 1
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Desert Locust
+ Name: td-hm_res101_8xb64-210e_locust-160x160
+ Results:
+ - Dataset: Desert Locust
+ Metrics:
+ AUC: 0.907
+ EPE: 2.03
+ PCK@0.2: 1
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: Desert Locust
+ Name: td-hm_res152_8xb32-210e_locust-160x160
+ Results:
+ - Dataset: Desert Locust
+ Metrics:
+ AUC: 0.925
+ EPE: 1.49
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6e6c2e39bb28913b7ba180d0ab74c71a24c6cb6
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res101_8xb64-210e_locust-160x160.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=35,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'LocustDataset'
+data_mode = 'topdown'
+data_root = 'data/locust/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.25,
+ rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/locust_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/locust_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f0a58bc88efab80a383df61137dbb45253da636
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res152_8xb32-210e_locust-160x160.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=35,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'LocustDataset'
+data_mode = 'topdown'
+data_root = 'data/locust/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.25,
+ rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/locust_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/locust_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..adbb89ee5b23f8697059f6778f1bfe13bd21432a
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/locust/td-hm_res50_8xb64-210e_locust-160x160.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=35,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'LocustDataset'
+data_mode = 'topdown'
+data_root = 'data/locust/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.25,
+ rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/locust_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/locust_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.md b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.md
new file mode 100644
index 0000000000000000000000000000000000000000..0c12aed0f3407c5303f66326708ebc2d082c5a1f
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.md
@@ -0,0 +1,43 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Grévy’s Zebra (Elife'2019)
+
+```bibtex
+@article{graving2019deepposekit,
+ title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
+ author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
+ journal={Elife},
+ volume={8},
+ pages={e47994},
+ year={2019},
+ publisher={eLife Sciences Publications Limited}
+}
+```
+
+
+
+Results on Grévy’s Zebra test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_resnet_50](/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py) | 160x160 | 1.000 | 0.914 | 1.87 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160_20210407.log.json) |
+| [pose_resnet_101](/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py) | 160x160 | 1.000 | 0.915 | 1.83 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160_20210407.log.json) |
+| [pose_resnet_152](/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py) | 160x160 | 1.000 | 0.921 | 1.67 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160_20210407.log.json) |
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.yml b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3ecedc9700739b50697314df1c4f2f23416f4cfd
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/resnet_zebra.yml
@@ -0,0 +1,45 @@
+Models:
+- Config: configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: "Gr\xE9vy\u2019s Zebra"
+ Name: td-hm_res50_8xb64-210e_zebra-160x160
+ Results:
+ - Dataset: "Gr\xE9vy\u2019s Zebra"
+ Metrics:
+ AUC: 0.914
+ EPE: 1.87
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_zebra_160x160-5a104833_20210407.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: "Gr\xE9vy\u2019s Zebra"
+ Name: td-hm_res101_8xb64-210e_zebra-160x160
+ Results:
+ - Dataset: "Gr\xE9vy\u2019s Zebra"
+ Metrics:
+ AUC: 0.915
+ EPE: 1.83
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_zebra_160x160-e8cb2010_20210407.pth
+- Config: configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: "Gr\xE9vy\u2019s Zebra"
+ Name: td-hm_res152_8xb32-210e_zebra-160x160
+ Results:
+ - Dataset: "Gr\xE9vy\u2019s Zebra"
+ Metrics:
+ AUC: 0.921
+ EPE: 1.67
+ PCK@0.2: 1.0
+ Task: Animal 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_zebra_160x160-05de71dd_20210407.pth
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..68c56d80fb91b068d684ec29b5c77da3e920a71f
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res101_8xb64-210e_zebra-160x160.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=9,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'ZebraDataset'
+data_mode = 'topdown'
+data_root = 'data/zebra/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.25,
+ rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/zebra_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/zebra_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..abb14eefb84dd91912f84cf407faeabc83ec5c25
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res152_8xb32-210e_zebra-160x160.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=9,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'ZebraDataset'
+data_mode = 'topdown'
+data_root = 'data/zebra/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.25,
+ rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/zebra_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/zebra_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4d2777751d7837e7c892868f3027b145610de24
--- /dev/null
+++ b/mmpose/configs/animal_2d_keypoint/topdown_heatmap/zebra/td-hm_res50_8xb64-210e_zebra-160x160.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(160, 160), heatmap_size=(40, 40), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=9,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'ZebraDataset'
+data_mode = 'topdown'
+data_root = 'data/zebra/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.25,
+ rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/zebra_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/zebra_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/README.md b/mmpose/configs/body_2d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d005d3fed76ccdb4260fef2f1a0f2c3466136d67
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/README.md
@@ -0,0 +1,21 @@
+# Human Body 2D Pose Estimation
+
+Multi-person human pose estimation is defined as the task of detecting the poses (or keypoints) of all people from an input image.
+
+Existing approaches can be categorized into top-down and bottom-up approaches.
+
+Top-down methods (e.g. DeepPose) divide the task into two stages: human detection and pose estimation. They perform human detection first, followed by single-person pose estimation given human bounding boxes.
+
+Bottom-up approaches (e.g. Associative Embedding) first detect all the keypoints and then group/associate them into person instances.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_body_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/en/2d_human_pose_demo.md#2d-human-pose-demo) to run demos.
+
+
+

+
diff --git a/mmpose/configs/body_2d_keypoint/associative_embedding/README.md b/mmpose/configs/body_2d_keypoint/associative_embedding/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7f5fa8ea1734e3ef121567aaaa1032fc71b862b2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/associative_embedding/README.md
@@ -0,0 +1,9 @@
+# Associative embedding: End-to-end learning for joint detection and grouping (AE)
+
+Associative Embedding is one of the most popular 2D bottom-up pose estimation approaches, that first detect all the keypoints and then group/associate them into person instances.
+
+In order to group all the predicted keypoints to individuals, a tag is also predicted for each detected keypoint. Tags of the same person are similar, while tags of different people are different. Thus the keypoints can be grouped according to the tags.
+
+
+

+
diff --git a/mmpose/configs/body_2d_keypoint/associative_embedding/coco/ae_hrnet-w32_8xb24-300e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/associative_embedding/coco/ae_hrnet-w32_8xb24-300e_coco-512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..5adc1aac1adc08cab8710ec83938d5261ebb7e7e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/associative_embedding/coco/ae_hrnet-w32_8xb24-300e_coco-512x512.py
@@ -0,0 +1,159 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=300, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1.5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=300,
+ milestones=[200, 260],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=192)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', interval=50))
+
+# codec settings
+codec = dict(
+ type='AssociativeEmbedding',
+ input_size=(512, 512),
+ heatmap_size=(128, 128),
+ sigma=2,
+ decode_keypoint_order=[
+ 0, 1, 2, 3, 4, 5, 6, 11, 12, 7, 8, 9, 10, 13, 14, 15, 16
+ ],
+ decode_max_instances=30)
+
+# model settings
+model = dict(
+ type='BottomupPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='AssociativeEmbeddingHead',
+ in_channels=32,
+ num_keypoints=17,
+ tag_dim=1,
+ tag_per_keypoint=True,
+ deconv_out_channels=None,
+ keypoint_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ tag_loss=dict(type='AssociativeEmbeddingLoss', loss_weight=0.001),
+ # The heatmap will be resized to the input size before decoding
+ # if ``restore_heatmap_size==True``
+ decoder=dict(codec, heatmap_size=codec['input_size'])),
+ test_cfg=dict(
+ multiscale_test=False,
+ flip_test=True,
+ shift_heatmap=True,
+ restore_heatmap_size=True,
+ align_corners=False))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = []
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(
+ type='BottomupResize',
+ input_size=codec['input_size'],
+ size_factor=32,
+ resize_mode='expand'),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=24,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none',
+ score_mode='keypoint',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..955293dcb1314f1d57cdb9efc4f62669cf41fabc
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py
@@ -0,0 +1,164 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=140,
+ milestones=[90, 120],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=160)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128))
+
+# model settings
+model = dict(
+ type='BottomupPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256),
+ multiscale_output=True)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='CIDHead',
+ in_channels=480,
+ num_keypoints=17,
+ gfd_channels=32,
+ coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0),
+ decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0),
+ contrastive_loss=dict(
+ type='InfoNCELoss', temperature=0.05, loss_weight=1.0),
+ decoder=codec,
+ ),
+ train_cfg=dict(max_train_instances=200),
+ test_cfg=dict(
+ multiscale_test=False,
+ flip_test=True,
+ shift_heatmap=False,
+ align_corners=False))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='BottomupGetHeatmapMask'),
+ dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(
+ type='BottomupResize',
+ input_size=codec['input_size'],
+ size_factor=64,
+ resize_mode='expand'),
+ dict(
+ type='PackPoseInputs',
+ meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+ 'img_shape', 'input_size', 'input_center', 'input_scale',
+ 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+ 'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=20,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_thr=0.8,
+ score_mode='keypoint',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..a114088ae217d8c8a2e0d16bab4459e163c6a129
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py
@@ -0,0 +1,164 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=140,
+ milestones=[90, 120],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=160)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128))
+
+# model settings
+model = dict(
+ type='BottomupPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384),
+ multiscale_output=True)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='CIDHead',
+ in_channels=720,
+ num_keypoints=17,
+ gfd_channels=48,
+ coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0),
+ decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0),
+ contrastive_loss=dict(
+ type='InfoNCELoss', temperature=0.05, loss_weight=1.0),
+ decoder=codec,
+ ),
+ train_cfg=dict(max_train_instances=200),
+ test_cfg=dict(
+ multiscale_test=False,
+ flip_test=True,
+ shift_heatmap=False,
+ align_corners=False))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='BottomupGetHeatmapMask'),
+ dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(
+ type='BottomupResize',
+ input_size=codec['input_size'],
+ size_factor=64,
+ resize_mode='expand'),
+ dict(
+ type='PackPoseInputs',
+ meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+ 'img_shape', 'input_size', 'input_center', 'input_scale',
+ 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+ 'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=20,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_thr=0.8,
+ score_mode='keypoint',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.md b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..f82cb04db0150ec1b63868ea875c5654fcb800d3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.md
@@ -0,0 +1,42 @@
+
+
+
+CID (CVPR'2022)
+
+```bibtex
+@InProceedings{Wang_2022_CVPR,
+ author = {Wang, Dongkai and Zhang, Shiliang},
+ title = {Contextual Instance Decoupling for Robust Multi-Person Pose Estimation},
+ booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2022},
+ pages = {11060-11068}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py) | 512x512 | 0.704 | 0.894 | 0.775 | 0.753 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_42b7e6e6-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_20230207.json) |
+| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py) | 512x512 | 0.715 | 0.900 | 0.782 | 0.765 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_a36c3ecf-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_20230207.json) |
diff --git a/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b230d20e247c24d7e3c998714eabbe6b132007dc
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/cid/coco/hrnet_coco.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: CID
+ Paper:
+ Title: Contextual Instance Decoupling for Robust Multi-Person Pose Estimation
+ URL: https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/cid.md
+Models:
+- Config: configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py
+ In Collection: CID
+ Metadata:
+ Architecture: &id001
+ - CID
+ - HRNet
+ Training Data: COCO
+ Name: cid_hrnet-w32_8xb20-140e_coco-512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.704
+ AP@0.5: 0.894
+ AP@0.75: 0.775
+ AR: 0.753
+ AR@0.5: 0.928
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_42b7e6e6-20230207.pth
+- Config: configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py
+ In Collection: CID
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: cid_hrnet-w48_8xb20-140e_coco-512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.715
+ AP@0.5: 0.9
+ AP@0.75: 0.782
+ AR: 0.765
+ AR@0.5: 0.935
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_a36c3ecf-20230207.pth
diff --git a/mmpose/configs/body_2d_keypoint/dekr/README.md b/mmpose/configs/body_2d_keypoint/dekr/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..04726421c0d67793dc4d2fc55fcf2cf491d3813e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/README.md
@@ -0,0 +1,22 @@
+# Bottom-up Human Pose Estimation via Disentangled Keypoint Regression (DEKR)
+
+
+
+
+DEKR (CVPR'2021)
+
+```bibtex
+@inproceedings{geng2021bottom,
+ title={Bottom-up human pose estimation via disentangled keypoint regression},
+ author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={14676--14686},
+ year={2021}
+}
+```
+
+
+
+DEKR is a popular 2D bottom-up pose estimation approach that simultaneously detects all the instances and regresses the offsets from the instance centers to joints.
+
+In order to predict the offsets more accurately, the offsets of different joints are regressed using separated branches with deformable convolutional layers. Thus convolution kernels with different shapes are adopted to extract features for the corresponding joint.
diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f2d03a82fcde5408b65756e73bb10769d71aec4
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py
@@ -0,0 +1,186 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=140,
+ milestones=[90, 120],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=80)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='SPR',
+ input_size=(512, 512),
+ heatmap_size=(128, 128),
+ sigma=(4, 2),
+ minimal_diagonal_length=32**0.5,
+ generate_keypoint_heatmaps=True,
+ decode_max_instances=30)
+
+# model settings
+model = dict(
+ type='BottomupPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256),
+ multiscale_output=True)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='DEKRHead',
+ in_channels=480,
+ num_keypoints=17,
+ heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ displacement_loss=dict(
+ type='SoftWeightSmoothL1Loss',
+ use_target_weight=True,
+ supervise_empty=False,
+ beta=1 / 9,
+ loss_weight=0.002,
+ ),
+ decoder=codec,
+ rescore_cfg=dict(
+ in_channels=74,
+ norm_indexes=(5, 6),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/kpt_rescore_coco-33d58c5c.pth')),
+ ),
+ test_cfg=dict(
+ multiscale_test=False,
+ flip_test=True,
+ nms_dist_thr=0.05,
+ shift_heatmap=True,
+ align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='BottomupGetHeatmapMask'),
+ dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(
+ type='BottomupResize',
+ input_size=codec['input_size'],
+ size_factor=32,
+ resize_mode='expand'),
+ dict(
+ type='PackPoseInputs',
+ meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+ 'img_shape', 'input_size', 'input_center', 'input_scale',
+ 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+ 'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=10,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none',
+ score_mode='keypoint',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..776a6bb0394f5e24d87935f524a788828cd563ea
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py
@@ -0,0 +1,187 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=140,
+ milestones=[90, 120],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=80)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='SPR',
+ input_size=(640, 640),
+ heatmap_size=(160, 160),
+ sigma=(4, 2),
+ minimal_diagonal_length=32**0.5,
+ generate_keypoint_heatmaps=True,
+ decode_max_instances=30)
+
+# model settings
+model = dict(
+ type='BottomupPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384),
+ multiscale_output=True)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='DEKRHead',
+ in_channels=720,
+ num_keypoints=17,
+ num_heatmap_filters=48,
+ heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ displacement_loss=dict(
+ type='SoftWeightSmoothL1Loss',
+ use_target_weight=True,
+ supervise_empty=False,
+ beta=1 / 9,
+ loss_weight=0.002,
+ ),
+ decoder=codec,
+ rescore_cfg=dict(
+ in_channels=74,
+ norm_indexes=(5, 6),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/kpt_rescore_coco-33d58c5c.pth')),
+ ),
+ test_cfg=dict(
+ multiscale_test=False,
+ flip_test=True,
+ nms_dist_thr=0.05,
+ shift_heatmap=True,
+ align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'bottomup'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='BottomupGetHeatmapMask'),
+ dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(
+ type='BottomupResize',
+ input_size=codec['input_size'],
+ size_factor=32,
+ resize_mode='expand'),
+ dict(
+ type='PackPoseInputs',
+ meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+ 'img_shape', 'input_size', 'input_center', 'input_scale',
+ 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+ 'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=10,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none',
+ score_mode='keypoint',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..648b9bc735eea503707402c9f90f837288872f50
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.md
@@ -0,0 +1,58 @@
+
+
+
+DEKR (CVPR'2021)
+
+```bibtex
+@inproceedings{geng2021bottom,
+ title={Bottom-up human pose estimation via disentangled keypoint regression},
+ author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={14676--14686},
+ year={2021}
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [HRNet-w32](/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py) | 512x512 | 0.686 | 0.868 | 0.750 | 0.735 | 0.898 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_ac7c17bf-20221228.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_20221228.json) |
+| [HRNet-w48](/configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py) | 640x640 | 0.714 | 0.883 | 0.777 | 0.762 | 0.915 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_74796c32-20230124.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_20230124.json) |
diff --git a/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0246b0723b93e8c04f61324de7add7f1e569ebce
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/coco/hrnet_coco.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: DEKR
+ Paper:
+ Title: Bottom-up human pose estimation via disentangled keypoint regression
+ URL: https://arxiv.org/abs/2104.02300
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/dekr.md
+Models:
+- Config: configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512.py
+ In Collection: DEKR
+ Metadata:
+ Architecture: &id001
+ - DEKR
+ - HRNet
+ Training Data: COCO
+ Name: dekr_hrnet-w32_8xb10-140e_coco-512x512
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.686
+ AP@0.5: 0.868
+ AP@0.75: 0.750
+ AR: 0.735
+ AR@0.5: 0.898
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w32_8xb10-140e_coco-512x512_ac7c17bf-20221228.pth
+- Config: configs/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640.py
+ In Collection: DEKR
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: dekr_hrnet-w48_8xb10-140e_coco-640x640
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.714
+ AP@0.5: 0.883
+ AP@0.75: 0.777
+ AR: 0.762
+ AR@0.5: 0.915
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/coco/dekr_hrnet-w48_8xb10-140e_coco-640x640_74796c32-20230124.pth
diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py
new file mode 100644
index 0000000000000000000000000000000000000000..c00f0459de9e2bd47ea2ee793315484e96685cbc
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py
@@ -0,0 +1,187 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=300, val_interval=20)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=300,
+ milestones=[200, 260],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=80)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='SPR',
+ input_size=(512, 512),
+ heatmap_size=(128, 128),
+ sigma=(4, 2),
+ minimal_diagonal_length=32**0.5,
+ generate_keypoint_heatmaps=True,
+ decode_max_instances=30)
+
+# model settings
+model = dict(
+ type='BottomupPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256),
+ multiscale_output=True)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='DEKRHead',
+ in_channels=480,
+ num_keypoints=14,
+ heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ displacement_loss=dict(
+ type='SoftWeightSmoothL1Loss',
+ use_target_weight=True,
+ supervise_empty=False,
+ beta=1 / 9,
+ loss_weight=0.004,
+ ),
+ decoder=codec,
+ rescore_cfg=dict(
+ in_channels=59,
+ norm_indexes=(0, 1),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/kpt_rescore_crowdpose-300c7efe.pth')),
+ ),
+ test_cfg=dict(
+ multiscale_test=False,
+ flip_test=True,
+ nms_dist_thr=0.05,
+ shift_heatmap=True,
+ align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'bottomup'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(
+ type='BottomupResize',
+ input_size=codec['input_size'],
+ size_factor=32,
+ resize_mode='expand'),
+ dict(
+ type='PackPoseInputs',
+ meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+ 'img_shape', 'input_size', 'input_center', 'input_scale',
+ 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+ 'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=10,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+ nms_mode='none',
+ score_mode='keypoint',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py
new file mode 100644
index 0000000000000000000000000000000000000000..31d637299a81876481455df33013b4262387a36e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py
@@ -0,0 +1,188 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=300, val_interval=20)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=300,
+ milestones=[200, 260],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=40)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='SPR',
+ input_size=(640, 640),
+ heatmap_size=(160, 160),
+ sigma=(4, 2),
+ minimal_diagonal_length=32**0.5,
+ generate_keypoint_heatmaps=True,
+ decode_max_instances=30)
+
+# model settings
+model = dict(
+ type='BottomupPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384),
+ multiscale_output=True)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='DEKRHead',
+ in_channels=720,
+ num_keypoints=14,
+ num_heatmap_filters=48,
+ heatmap_loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ displacement_loss=dict(
+ type='SoftWeightSmoothL1Loss',
+ use_target_weight=True,
+ supervise_empty=False,
+ beta=1 / 9,
+ loss_weight=0.004,
+ ),
+ decoder=codec,
+ rescore_cfg=dict(
+ in_channels=59,
+ norm_indexes=(0, 1),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/kpt_rescore_crowdpose-300c7efe.pth')),
+ ),
+ test_cfg=dict(
+ multiscale_test=False,
+ flip_test=True,
+ nms_dist_thr=0.05,
+ shift_heatmap=True,
+ align_corners=False))
+
+# enable DDP training when rescore net is used
+find_unused_parameters = True
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'bottomup'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='BottomupRandomAffine', input_size=codec['input_size']),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs'),
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(
+ type='BottomupResize',
+ input_size=codec['input_size'],
+ size_factor=32,
+ resize_mode='expand'),
+ dict(
+ type='PackPoseInputs',
+ meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
+ 'img_shape', 'input_size', 'input_center', 'input_scale',
+ 'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
+ 'skeleton_links'))
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=5,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=1,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+ nms_mode='none',
+ score_mode='keypoint',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..ea58d95b7f9a2323c68216baf04c4ee1afc2447b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.md
@@ -0,0 +1,56 @@
+
+
+
+DEKR (CVPR'2021)
+
+```bibtex
+@inproceedings{geng2021bottom,
+ title={Bottom-up human pose estimation via disentangled keypoint regression},
+ author={Geng, Zigang and Sun, Ke and Xiao, Bin and Zhang, Zhaoxiang and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={14676--14686},
+ year={2021}
+}
+```
+
+
+
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test without multi-scale test
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: |
+| [HRNet-w32](/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py) | 512x512 | 0.663 | 0.857 | 0.714 | 0.740 | 0.671 | 0.576 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-140e_crowdpose-512x512_147bae97-20221228.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-140e_crowdpose-512x512_20221228.json) |
+| [HRNet-w48](/configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py) | 640x640 | 0.679 | 0.869 | 0.731 | 0.753 | 0.688 | 0.593 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640_4ea6031e-20230128.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640_20230128.json) |
diff --git a/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..02312e8cbacc6f8348184e50390cb2ab951965b3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/dekr/crowdpose/hrnet_crowdpose.yml
@@ -0,0 +1,37 @@
+Models:
+- Config: configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-300e_crowdpose-512x512.py
+ In Collection: DEKR
+ Metadata:
+ Architecture: &id001
+ - DEKR
+ - HRNet
+ Training Data: CrowdPose
+ Name: dekr_hrnet-w32_8xb10-300e_crowdpose-512x512
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.663
+ AP@0.5: 0.857
+ AP@0.75: 0.714
+ AP (E): 0.74
+ AP (M): 0.671
+ AP (L): 0.576
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w32_8xb10-140e_crowdpose-512x512_147bae97-20221228.pth
+- Config: configs/body_2d_keypoint/dekr/crowdpose/dekr_hrnet-w48_8xb5-300e_crowdpose-640x640.py
+ In Collection: DEKR
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: dekr_hrnet-w48_8xb5-300e_crowdpose-640x640
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.679
+ AP@0.5: 0.869
+ AP@0.75: 0.731
+ AP (E): 0.753
+ AP (M): 0.688
+ AP (L): 0.593
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/bottom_up/dekr/hrnet_w48_crowdpose_640x640-ef6b6040_20220930.pth
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/README.md b/mmpose/configs/body_2d_keypoint/integral_regression/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d60eaa1a575686006139a8584851e994b7b29e60
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/README.md
@@ -0,0 +1,15 @@
+# Top-down integral-regression-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, integral regression based methods use a simple integral operation relates and unifies the heatmap and joint regression differentiably, thus obtain the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Integral Human Pose Regression](https://arxiv.org/abs/1711.08229).
+
+## Results and Models
+
+### COCO Dataset
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Model | Input Size | AP | AR | Details and Download |
+| :------------------: | :--------: | :---: | :---: | :---------------------------------------------------: |
+| ResNet-50+Debias-IPR | 256x256 | 0.675 | 0.765 | [resnet_debias_coco.md](./coco/resnet_debias_coco.md) |
+| ResNet-50+DSNT | 256x256 | 0.674 | 0.764 | [resnet_dsnt_coco.md](./coco/resnet_dsnt_coco.md) |
+| ResNet-50+IPR | 256x256 | 0.633 | 0.730 | [resnet_ipr_coco.md](./coco/resnet_ipr_coco.md) |
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3dfaeeda8b850fa361eebbf5342ec64842d858e8
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
@@ -0,0 +1,134 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='IntegralRegressionLabel',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2.0,
+ normalize=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ ),
+ head=dict(
+ type='DSNTHead',
+ in_channels=2048,
+ in_featuremap_size=(8, 8),
+ num_joints=17,
+ loss=dict(
+ type='MultipleLossWrapper',
+ losses=[
+ dict(type='SmoothL1Loss', use_target_weight=True),
+ dict(type='KeypointMSELoss', use_target_weight=True)
+ ]),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ shift_heatmap=True,
+ ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth'))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+test_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=test_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..9618c810ea20b0d147f71930034b616f6bed3a97
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py
@@ -0,0 +1,136 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='IntegralRegressionLabel',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2.0,
+ normalize=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ ),
+ head=dict(
+ type='DSNTHead',
+ in_channels=2048,
+ in_featuremap_size=(8, 8),
+ num_joints=17,
+ debias=True,
+ beta=10.,
+ loss=dict(
+ type='MultipleLossWrapper',
+ losses=[
+ dict(type='SmoothL1Loss', use_target_weight=True),
+ dict(type='JSDiscretLoss', use_target_weight=True)
+ ]),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ shift_heatmap=True,
+ ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth'))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+test_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=test_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c3897fce1acd0deabaedacea3b38b08b9138330
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py
@@ -0,0 +1,134 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='IntegralRegressionLabel',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2.0,
+ normalize=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ ),
+ head=dict(
+ type='DSNTHead',
+ in_channels=2048,
+ in_featuremap_size=(8, 8),
+ num_joints=17,
+ loss=dict(
+ type='MultipleLossWrapper',
+ losses=[
+ dict(type='SmoothL1Loss', use_target_weight=True),
+ dict(type='JSDiscretLoss', use_target_weight=True)
+ ]),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ shift_heatmap=True,
+ ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth'))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+test_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=test_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..40e3660e4f24f0d67d680d0f0ddc6c9f6c6c1014
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.md
@@ -0,0 +1,57 @@
+
+
+
+Debias IPR (ICCV'2021)
+
+```bibtex
+@inproceedings{gu2021removing,
+ title={Removing the Bias of Integral Pose Regression},
+ author={Gu, Kerui and Yang, Linlin and Yao, Angela},
+ booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+ pages={11067--11076},
+ year={2021}
+ }
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [debias-ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256.py) | 256x256 | 0.675 | 0.872 | 0.740 | 0.765 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.yml b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b965238a5d482ec7212f8f95d0e4abadfe6773ce
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_debias_coco.yml
@@ -0,0 +1,25 @@
+Collections:
+- Name: DebiasIPR
+ Paper:
+ Title: Removing the Bias of Integral Pose Regression
+ URL: https://openaccess.thecvf.com/content/ICCV2021/papers/Gu_Removing_the_Bias_of_Integral_Pose_Regression_ICCV_2021_paper.pdf
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/debias_ipr.md
+Models:
+- Config: configs/body_2d_keypoint/integral_regression/coco/ipr_res50_debias--8xb64-210e_coco-256x256.py
+ In Collection: DebiasIPR
+ Metadata:
+ Architecture: &id001
+ - Debias
+ - ResNet
+ Training Data: COCO
+ Name: ipr_res50_debias--8xb64-210e_coco-256x256
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.675
+ AP@0.5: 0.872
+ AP@0.75: 0.74
+ AR: 0.765
+ AR@0.5: 0.928
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_debias-8xb64-210e_coco-256x256-055a7699_20220913.pth
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..608974ae82e6f05913c98c35fb8fff3e5220b83a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.md
@@ -0,0 +1,56 @@
+
+
+
+DSNT (2018)
+
+```bibtex
+@article{nibali2018numerical,
+ title={Numerical Coordinate Regression with Convolutional Neural Networks},
+ author={Nibali, Aiden and He, Zhen and Morgan, Stuart and Prendergast, Luke},
+ journal={arXiv preprint arXiv:1801.07372},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ipr_resnet_50_dsnt](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py) | 256x256 | 0.674 | 0.870 | 0.744 | 0.764 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.yml b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f34e839c105c514b63f1b434207d56cfa1d57726
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_dsnt_coco.yml
@@ -0,0 +1,25 @@
+Collections:
+- Name: DSNT
+ Paper:
+ Title: Numerical Coordinate Regression with Convolutional Neural Networks
+ URL: https://arxiv.org/abs/1801.07372v2
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/dsnt.md
+Models:
+- Config: configs/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256.py
+ In Collection: DSNT
+ Metadata:
+ Architecture: &id001
+ - DSNT
+ - ResNet
+ Training Data: COCO
+ Name: ipr_res50_dsnt-8xb64-210e_coco-256x256
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.674
+ AP@0.5: 0.87
+ AP@0.75: 0.744
+ AR: 0.764
+ AR@0.5: 0.928
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_dsnt-8xb64-210e_coco-256x256-441eedc0_20220913.pth
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..ce4fbae5011b451b4fd639ac896de1b150b2592a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.md
@@ -0,0 +1,57 @@
+
+
+
+IPR (ECCV'2018)
+
+```bibtex
+@inproceedings{sun2018integral,
+ title={Integral human pose regression},
+ author={Sun, Xiao and Xiao, Bin and Wei, Fangyin and Liang, Shuang and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={529--545},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ipr_resnet_50](/configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py) | 256x256 | 0.633 | 0.860 | 0.703 | 0.730 | 0.919 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.yml b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fa22133f3eb3fd4020f4c17ead813fc1908b23cb
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/integral_regression/coco/resnet_ipr_coco.yml
@@ -0,0 +1,25 @@
+Collections:
+- Name: IPR
+ Paper:
+ Title: Integral human pose regression
+ URL: https://arxiv.org/abs/1711.08229
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/ipr.md
+Models:
+- Config: configs/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256.py
+ In Collection: IPR
+ Metadata:
+ Architecture: &id001
+ - IPR
+ - ResNet
+ Training Data: COCO
+ Name: ipr_res50_8xb64-210e_coco-256x256
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.633
+ AP@0.5: 0.86
+ AP@0.75: 0.703
+ AR: 0.73
+ AR@0.5: 0.919
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/integral_regression/coco/ipr_res50_8xb64-210e_coco-256x256-a3898a33_20220913.pth
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/README.md b/mmpose/configs/body_2d_keypoint/rtmpose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..303797491770ccaf6de7945035253b23f1e78e74
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/README.md
@@ -0,0 +1,39 @@
+# RTMPose
+
+Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency.
+In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose.
+Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries.
+To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device.
+
+## Results and Models
+
+### COCO Dataset
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Model | Input Size | AP | AR | Details and Download |
+| :----------------: | :--------: | :---: | :---: | :---------------------------------------: |
+| RTMPose-t | 256x192 | 0.682 | 0.736 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-s | 256x192 | 0.716 | 0.768 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-m | 256x192 | 0.746 | 0.795 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-l | 256x192 | 0.758 | 0.806 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-t-aic-coco | 256x192 | 0.685 | 0.738 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-s-aic-coco | 256x192 | 0.722 | 0.772 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-m-aic-coco | 256x192 | 0.758 | 0.806 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-l-aic-coco | 256x192 | 0.765 | 0.813 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-m-aic-coco | 384x288 | 0.770 | 0.816 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+| RTMPose-l-aic-coco | 384x288 | 0.773 | 0.819 | [rtmpose_coco.md](./coco/rtmpose_coco.md) |
+
+### MPII Dataset
+
+| Model | Input Size | PCKh@0.5 | PCKh@0.1 | Details and Download |
+| :-------: | :--------: | :------: | :------: | :---------------------------------------: |
+| RTMPose-m | 256x256 | 0.907 | 0.348 | [rtmpose_mpii.md](./mpii/rtmpose_mpii.md) |
+
+### CrowdPose Dataset
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Model | Input Size | AP | AR | Details and Download |
+| :-------: | :--------: | :---: | :---: | :------------------------------------------------------: |
+| RTMPose-m | 256x192 | 0.706 | 0.788 | [rtmpose_crowdpose.md](./crowdpose/rtmpose_crowdpose.md) |
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..dec6a3615b63ae68e6cdf3d44c28e506fa1755f5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py
@@ -0,0 +1,553 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-l_udp-body7_210e-256x192-5e9558ef_20230504.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(type='PhotometricDistortion'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.5, 1.5],
+ rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# mapping
+aic_coco = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+]
+
+crowdpose_coco = [
+ (0, 5),
+ (1, 6),
+ (2, 7),
+ (3, 8),
+ (4, 9),
+ (5, 10),
+ (6, 11),
+ (7, 12),
+ (8, 13),
+ (9, 14),
+ (10, 15),
+ (11, 16),
+]
+
+mpii_coco = [
+ (0, 16),
+ (1, 14),
+ (2, 12),
+ (3, 11),
+ (4, 13),
+ (5, 15),
+ (10, 10),
+ (11, 8),
+ (12, 6),
+ (13, 5),
+ (14, 7),
+ (15, 9),
+]
+
+jhmdb_coco = [
+ (3, 6),
+ (4, 5),
+ (5, 12),
+ (6, 11),
+ (7, 8),
+ (8, 7),
+ (9, 14),
+ (10, 13),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+]
+
+halpe_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+ochuman_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+posetrack_coco = [
+ (0, 0),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+dataset_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+dataset_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_train.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+dataset_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_train.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+dataset_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+dataset_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_train.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ dataset_coco,
+ dataset_aic,
+ dataset_crowdpose,
+ dataset_mpii,
+ dataset_jhmdb,
+ dataset_halpe,
+ dataset_posetrack,
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# val datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_val.json',
+ data_prefix=dict(
+ img='pose/ai_challenge/ai_challenger_keypoint'
+ '_validation_20170911/keypoint_validation_images_20170911/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+val_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+val_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_val.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+val_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_test.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+val_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+val_ochuman = dict(
+ type='OCHumanDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='ochuman/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ data_prefix=dict(img='pose/OCHuman/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco)
+ ],
+)
+
+val_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_val.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+test_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ val_coco,
+ val_aic,
+ val_crowdpose,
+ val_mpii,
+ val_jhmdb,
+ val_halpe,
+ val_ochuman,
+ val_posetrack,
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+# default_hooks = dict(
+# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = [
+ dict(type='PCKAccuracy', thr=0.1),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..b5c83dd50dd28a0c7775388dc7a3cbab7b0b1b41
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py
@@ -0,0 +1,553 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(288, 384),
+ sigma=(6., 6.93),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-l_udp-body7_210e-384x288-b15bc30d_20230504.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(9, 12),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(type='PhotometricDistortion'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.5, 1.5],
+ rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# mapping
+aic_coco = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+]
+
+crowdpose_coco = [
+ (0, 5),
+ (1, 6),
+ (2, 7),
+ (3, 8),
+ (4, 9),
+ (5, 10),
+ (6, 11),
+ (7, 12),
+ (8, 13),
+ (9, 14),
+ (10, 15),
+ (11, 16),
+]
+
+mpii_coco = [
+ (0, 16),
+ (1, 14),
+ (2, 12),
+ (3, 11),
+ (4, 13),
+ (5, 15),
+ (10, 10),
+ (11, 8),
+ (12, 6),
+ (13, 5),
+ (14, 7),
+ (15, 9),
+]
+
+jhmdb_coco = [
+ (3, 6),
+ (4, 5),
+ (5, 12),
+ (6, 11),
+ (7, 8),
+ (8, 7),
+ (9, 14),
+ (10, 13),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+]
+
+halpe_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+ochuman_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+posetrack_coco = [
+ (0, 0),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+dataset_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+dataset_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_train.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+dataset_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_train.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+dataset_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+dataset_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_train.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ dataset_coco,
+ dataset_aic,
+ dataset_crowdpose,
+ dataset_mpii,
+ dataset_jhmdb,
+ dataset_halpe,
+ dataset_posetrack,
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# val datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_val.json',
+ data_prefix=dict(
+ img='pose/ai_challenge/ai_challenger_keypoint'
+ '_validation_20170911/keypoint_validation_images_20170911/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+val_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+val_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_val.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+val_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_test.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+val_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+val_ochuman = dict(
+ type='OCHumanDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='ochuman/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ data_prefix=dict(img='pose/OCHuman/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco)
+ ],
+)
+
+val_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_val.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+test_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ val_coco,
+ val_aic,
+ val_crowdpose,
+ val_mpii,
+ val_jhmdb,
+ val_halpe,
+ val_ochuman,
+ val_posetrack,
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+# default_hooks = dict(
+# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = [
+ dict(type='PCKAccuracy', thr=0.1),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..39621ceb70429f4cbba45c81bc8c9ea05acd4fc5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py
@@ -0,0 +1,553 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-body7_210e-256x192-e0c9327b_20230504.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.0,
+ drop_path=0.0,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(type='PhotometricDistortion'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.5, 1.5],
+ rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# mapping
+aic_coco = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+]
+
+crowdpose_coco = [
+ (0, 5),
+ (1, 6),
+ (2, 7),
+ (3, 8),
+ (4, 9),
+ (5, 10),
+ (6, 11),
+ (7, 12),
+ (8, 13),
+ (9, 14),
+ (10, 15),
+ (11, 16),
+]
+
+mpii_coco = [
+ (0, 16),
+ (1, 14),
+ (2, 12),
+ (3, 11),
+ (4, 13),
+ (5, 15),
+ (10, 10),
+ (11, 8),
+ (12, 6),
+ (13, 5),
+ (14, 7),
+ (15, 9),
+]
+
+jhmdb_coco = [
+ (3, 6),
+ (4, 5),
+ (5, 12),
+ (6, 11),
+ (7, 8),
+ (8, 7),
+ (9, 14),
+ (10, 13),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+]
+
+halpe_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+ochuman_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+posetrack_coco = [
+ (0, 0),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+dataset_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+dataset_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_train.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+dataset_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_train.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+dataset_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+dataset_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_train.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ dataset_coco,
+ dataset_aic,
+ dataset_crowdpose,
+ dataset_mpii,
+ dataset_jhmdb,
+ dataset_halpe,
+ dataset_posetrack,
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# val datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_val.json',
+ data_prefix=dict(
+ img='pose/ai_challenge/ai_challenger_keypoint'
+ '_validation_20170911/keypoint_validation_images_20170911/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+val_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+val_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_val.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+val_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_test.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+val_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+val_ochuman = dict(
+ type='OCHumanDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='ochuman/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ data_prefix=dict(img='pose/OCHuman/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco)
+ ],
+)
+
+val_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_val.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+test_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ val_coco,
+ val_aic,
+ val_crowdpose,
+ val_mpii,
+ val_jhmdb,
+ val_halpe,
+ val_ochuman,
+ val_posetrack,
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+# default_hooks = dict(
+# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = [
+ dict(type='PCKAccuracy', thr=0.1),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..42a9355c6a3531d4ab4d013cc74cf523fbb720d9
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py
@@ -0,0 +1,553 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(288, 384),
+ sigma=(6., 6.93),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-body7_210e-384x288-b9bc2b57_20230504.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(9, 12),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.0,
+ drop_path=0.0,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(type='PhotometricDistortion'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.5, 1.5],
+ rotate_factor=90),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# mapping
+aic_coco = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+]
+
+crowdpose_coco = [
+ (0, 5),
+ (1, 6),
+ (2, 7),
+ (3, 8),
+ (4, 9),
+ (5, 10),
+ (6, 11),
+ (7, 12),
+ (8, 13),
+ (9, 14),
+ (10, 15),
+ (11, 16),
+]
+
+mpii_coco = [
+ (0, 16),
+ (1, 14),
+ (2, 12),
+ (3, 11),
+ (4, 13),
+ (5, 15),
+ (10, 10),
+ (11, 8),
+ (12, 6),
+ (13, 5),
+ (14, 7),
+ (15, 9),
+]
+
+jhmdb_coco = [
+ (3, 6),
+ (4, 5),
+ (5, 12),
+ (6, 11),
+ (7, 8),
+ (8, 7),
+ (9, 14),
+ (10, 13),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+]
+
+halpe_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+ochuman_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+posetrack_coco = [
+ (0, 0),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+dataset_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+dataset_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_train.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+dataset_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_train.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+dataset_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+dataset_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_train.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ dataset_coco,
+ dataset_aic,
+ dataset_crowdpose,
+ dataset_mpii,
+ dataset_jhmdb,
+ dataset_halpe,
+ dataset_posetrack,
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# val datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_val.json',
+ data_prefix=dict(
+ img='pose/ai_challenge/ai_challenger_keypoint'
+ '_validation_20170911/keypoint_validation_images_20170911/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+val_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+val_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_val.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+val_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_test.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+val_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+val_ochuman = dict(
+ type='OCHumanDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='ochuman/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ data_prefix=dict(img='pose/OCHuman/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco)
+ ],
+)
+
+val_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_val.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+test_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ val_coco,
+ val_aic,
+ val_crowdpose,
+ val_mpii,
+ val_jhmdb,
+ val_halpe,
+ val_ochuman,
+ val_posetrack,
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+# default_hooks = dict(
+# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = [
+ dict(type='PCKAccuracy', thr=0.1),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e82f07cf6b465e1b19250a96117e4d439061448
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py
@@ -0,0 +1,553 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.33,
+ widen_factor=0.5,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-s_udp-body7_210e-256x192-8c9ccbdb_20230504.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=512,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(type='PhotometricDistortion'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# mapping
+aic_coco = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+]
+
+crowdpose_coco = [
+ (0, 5),
+ (1, 6),
+ (2, 7),
+ (3, 8),
+ (4, 9),
+ (5, 10),
+ (6, 11),
+ (7, 12),
+ (8, 13),
+ (9, 14),
+ (10, 15),
+ (11, 16),
+]
+
+mpii_coco = [
+ (0, 16),
+ (1, 14),
+ (2, 12),
+ (3, 11),
+ (4, 13),
+ (5, 15),
+ (10, 10),
+ (11, 8),
+ (12, 6),
+ (13, 5),
+ (14, 7),
+ (15, 9),
+]
+
+jhmdb_coco = [
+ (3, 6),
+ (4, 5),
+ (5, 12),
+ (6, 11),
+ (7, 8),
+ (8, 7),
+ (9, 14),
+ (10, 13),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+]
+
+halpe_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+ochuman_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+posetrack_coco = [
+ (0, 0),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+dataset_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+dataset_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_train.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+dataset_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_train.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+dataset_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+dataset_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_train.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ dataset_coco,
+ dataset_aic,
+ dataset_crowdpose,
+ dataset_mpii,
+ dataset_jhmdb,
+ dataset_halpe,
+ dataset_posetrack,
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# val datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_val.json',
+ data_prefix=dict(
+ img='pose/ai_challenge/ai_challenger_keypoint'
+ '_validation_20170911/keypoint_validation_images_20170911/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+val_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+val_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_val.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+val_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_test.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+val_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+val_ochuman = dict(
+ type='OCHumanDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='ochuman/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ data_prefix=dict(img='pose/OCHuman/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco)
+ ],
+)
+
+val_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_val.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+test_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ val_coco,
+ val_aic,
+ val_crowdpose,
+ val_mpii,
+ val_jhmdb,
+ val_halpe,
+ val_ochuman,
+ val_posetrack,
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+# default_hooks = dict(
+# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = [
+ dict(type='PCKAccuracy', thr=0.1),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fd705251db31acc77d9307aadeb5899688882182
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py
@@ -0,0 +1,554 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.167,
+ widen_factor=0.375,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-tiny_udp-body7_210e-256x192-a3775292_20230504.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=384,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(type='PhotometricDistortion'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# mapping
+aic_coco = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+]
+
+crowdpose_coco = [
+ (0, 5),
+ (1, 6),
+ (2, 7),
+ (3, 8),
+ (4, 9),
+ (5, 10),
+ (6, 11),
+ (7, 12),
+ (8, 13),
+ (9, 14),
+ (10, 15),
+ (11, 16),
+]
+
+mpii_coco = [
+ (0, 16),
+ (1, 14),
+ (2, 12),
+ (3, 11),
+ (4, 13),
+ (5, 15),
+ (10, 10),
+ (11, 8),
+ (12, 6),
+ (13, 5),
+ (14, 7),
+ (15, 9),
+]
+
+jhmdb_coco = [
+ (3, 6),
+ (4, 5),
+ (5, 12),
+ (6, 11),
+ (7, 8),
+ (8, 7),
+ (9, 14),
+ (10, 13),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+]
+
+halpe_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+ochuman_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+posetrack_coco = [
+ (0, 0),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+dataset_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+dataset_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_train.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+dataset_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_train.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+dataset_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+dataset_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_train.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ dataset_coco,
+ dataset_aic,
+ dataset_crowdpose,
+ dataset_mpii,
+ dataset_jhmdb,
+ dataset_halpe,
+ dataset_posetrack,
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# val datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_val.json',
+ data_prefix=dict(
+ img='pose/ai_challenge/ai_challenger_keypoint'
+ '_validation_20170911/keypoint_validation_images_20170911/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=aic_coco)
+ ],
+)
+
+val_crowdpose = dict(
+ type='CrowdPoseDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=crowdpose_coco)
+ ],
+)
+
+val_mpii = dict(
+ type='MpiiDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='mpii/annotations/mpii_val.json',
+ data_prefix=dict(img='pose/MPI/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=mpii_coco)
+ ],
+)
+
+val_jhmdb = dict(
+ type='JhmdbDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='jhmdb/annotations/Sub1_test.json',
+ data_prefix=dict(img='pose/JHMDB/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=jhmdb_coco)
+ ],
+)
+
+val_halpe = dict(
+ type='HalpeDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=halpe_coco)
+ ],
+)
+
+val_ochuman = dict(
+ type='OCHumanDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='ochuman/annotations/'
+ 'ochuman_coco_format_val_range_0.00_1.00.json',
+ data_prefix=dict(img='pose/OCHuman/images/'),
+ pipeline=[
+ dict(type='KeypointConverter', num_keypoints=17, mapping=ochuman_coco)
+ ],
+)
+
+val_posetrack = dict(
+ type='PoseTrack18Dataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='posetrack18/annotations/posetrack18_val.json',
+ data_prefix=dict(img='pose/PoseChallenge2018/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter', num_keypoints=17, mapping=posetrack_coco)
+ ],
+)
+
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+test_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[
+ val_coco,
+ val_aic,
+ val_crowdpose,
+ val_mpii,
+ val_jhmdb,
+ val_halpe,
+ val_ochuman,
+ val_posetrack,
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+# default_hooks = dict(
+# checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ # dict(
+ # type='EMAHook',
+ # ema_type='ExpMomentumEMA',
+ # momentum=0.0002,
+ # update_buffers=True,
+ # priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+
+test_evaluator = [
+ dict(type='PCKAccuracy', thr=0.1),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.md b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.md
new file mode 100644
index 0000000000000000000000000000000000000000..a294be844ec1ee9d1e9c72ffba4b375af97efe07
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.md
@@ -0,0 +1,76 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+- Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset.
+- `*` denotes model trained on 7 public datasets:
+ - [AI Challenger](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#aic)
+ - [MS COCO](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#coco)
+ - [CrowdPose](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#crowdpose)
+ - [MPII](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#mpii)
+ - [sub-JHMDB](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#sub-jhmdb-dataset)
+ - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe)
+ - [PoseTrack18](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#posetrack18)
+- `Body8` denotes the addition of the [OCHuman](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_body_keypoint.html#ochuman) dataset, in addition to the 7 datasets mentioned above, for evaluation.
+
+| Config | Input Size | AP
(COCO) | PCK@0.1
(Body8) | AUC
(Body8) | EPE
(Body8) | Params(M) | FLOPS(G) | Download |
+| :--------------------------------------------: | :--------: | :---------------: | :---------------------: | :-----------------: | :-----------------: | :-------: | :------: | :-----------------------------------------------: |
+| [RTMPose-t\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py) | 256x192 | 65.9 | 91.44 | 63.18 | 19.45 | 3.34 | 0.36 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-t_simcc-body7_pt-body7_420e-256x192-026a1439_20230504.pth) |
+| [RTMPose-s\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py) | 256x192 | 69.7 | 92.45 | 65.15 | 17.85 | 5.47 | 0.68 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-body7_pt-body7_420e-256x192-acd4a1ef_20230504.pth) |
+| [RTMPose-m\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py) | 256x192 | 74.9 | 94.25 | 68.59 | 15.12 | 13.59 | 1.93 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-256x192-e48f03d0_20230504.pth) |
+| [RTMPose-l\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py) | 256x192 | 76.7 | 95.08 | 70.14 | 13.79 | 27.66 | 4.16 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-256x192-4dba18fc_20230504.pth) |
+| [RTMPose-m\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py) | 384x288 | 76.6 | 94.64 | 70.38 | 13.98 | 13.72 | 4.33 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth) |
+| [RTMPose-l\*](/configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py) | 384x288 | 78.3 | 95.36 | 71.58 | 13.08 | 27.79 | 9.35 | [Model](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth) |
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.yml b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c0f5a90863edbab45cf791a1ca478610d1eb8d55
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/body8/rtmpose_body8.yml
@@ -0,0 +1,93 @@
+Collections:
+- Name: RTMPose
+ Paper:
+ Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose"
+ URL: https://arxiv.org/abs/2303.07399
+ README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md
+Models:
+- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-t_8xb256-210e_body8-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: &id001
+ - RTMPose
+ Training Data: &id002
+ - AI Challenger
+ - COCO
+ - CrowdPose
+ - MPII
+ - sub-JHMDB
+ - Halpe
+ - PoseTrack18
+ Name: rtmpose-t_8xb256-210e_body8-256x192
+ Results:
+ - Dataset: Body8
+ Metrics:
+ AP: 0.659
+ Mean@0.1: 0.914
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-t_simcc-body7_pt-body7_420e-256x192-026a1439_20230504.pth
+- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-s_8xb256-210e_body8-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-s_8xb256-210e_body8-256x192
+ Results:
+ - Dataset: Body8
+ Metrics:
+ AP: 0.697
+ Mean@0.1: 0.925
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-body7_pt-body7_420e-256x192-acd4a1ef_20230504.pth
+- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-m_8xb256-210e_body8-256x192
+ Results:
+ - Dataset: Body8
+ Metrics:
+ AP: 0.749
+ Mean@0.1: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-256x192-e48f03d0_20230504.pth
+- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-l_8xb256-210e_body8-256x192
+ Results:
+ - Dataset: Body8
+ Metrics:
+ AP: 0.767
+ Mean@0.1: 0.951
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-256x192-4dba18fc_20230504.pth
+- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-m_8xb256-210e_body8-384x288.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-m_8xb256-210e_body8-384x288
+ Results:
+ - Dataset: Body8
+ Metrics:
+ AP: 0.766
+ Mean@0.1: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-body7_pt-body7_420e-384x288-65e718c4_20230504.pth
+- Config: configs/body_2d_keypoint/rtmpose/body8/rtmpose-l_8xb256-210e_body8-384x288.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-l_8xb256-210e_body8-384x288
+ Results:
+ - Dataset: Body8
+ Metrics:
+ AP: 0.783
+ Mean@0.1: 0.964
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-body7_pt-body7_420e-384x288-3f5a1437_20230504.pth
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..5af4dd04a26d2c9bd832bad3dc54d405410d32b9
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py
@@ -0,0 +1,272 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=17,
+ mapping=[
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ ])
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..015c46b70b2e921ef942cd30bc3068e41c99145f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py
@@ -0,0 +1,272 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(288, 384),
+ sigma=(6., 6.93),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(9, 12),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=17,
+ mapping=[
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ ])
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2459b3417e6e104a5d8fc0a2ca5eb01724b5864e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file=f'{data_root}person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..151197d1f11f58ae63394499fbdc02d1bb7791a0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py
@@ -0,0 +1,272 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=17,
+ mapping=[
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ ])
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=128 * 2,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..6507ba247b737877293640ba89e23bc238625c24
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py
@@ -0,0 +1,272 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(288, 384),
+ sigma=(6., 6.93),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(9, 12),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=17,
+ mapping=[
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ ])
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=128 * 2,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..7438820418f715683513317a6e4c7f0c0c785ab1
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file=f'{data_root}person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..29c82d78ed8a276125aed42f80aea69dcc5f5b24
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py
@@ -0,0 +1,272 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.33,
+ widen_factor=0.5,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=512,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=17,
+ mapping=[
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ ])
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=128 * 2,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a2f57c95ca8969d98664947951af4a255bbc931
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.33,
+ widen_factor=0.5,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=512,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file=f'{data_root}person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd375b06bfd029e61744e48f3ce521369fe48cc5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py
@@ -0,0 +1,273 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.167,
+ widen_factor=0.375,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=384,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=17,
+ mapping=[
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ ])
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ # Turn off EMA while training the tiny model
+ # dict(
+ # type='EMAHook',
+ # ema_type='ExpMomentumEMA',
+ # momentum=0.0002,
+ # update_buffers=True,
+ # priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..4180d6c7b15295bde1d85d97dee5134c5ef1c807
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py
@@ -0,0 +1,233 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 420
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.167,
+ widen_factor=0.375,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=384,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file=f'{data_root}person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ # Turn off EMA while training the tiny model
+ # dict(
+ # type='EMAHook',
+ # ema_type='ExpMomentumEMA',
+ # momentum=0.0002,
+ # update_buffers=True,
+ # priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.md b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..d3cc9298df5e723f77cf26a4184c2efc7ca4469b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.md
@@ -0,0 +1,71 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [rtmpose-t](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py) | 256x192 | 0.682 | 0.883 | 0.759 | 0.736 | 0.920 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-coco_pt-aic-coco_420e-256x192-e613ba3f_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-coco_pt-aic-coco_420e-256x192-e613ba3f_20230127.json) |
+| [rtmpose-s](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py) | 256x192 | 0.716 | 0.892 | 0.789 | 0.768 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.json) |
+| [rtmpose-m](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py) | 256x192 | 0.746 | 0.899 | 0.817 | 0.795 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.json) |
+| [rtmpose-l](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py) | 256x192 | 0.758 | 0.906 | 0.826 | 0.806 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.json) |
+| [rtmpose-t-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.685 | 0.880 | 0.761 | 0.738 | 0.918 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.json) |
+| [rtmpose-s-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.722 | 0.892 | 0.794 | 0.772 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.json) |
+| [rtmpose-m-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.758 | 0.903 | 0.826 | 0.806 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.json) |
+| [rtmpose-l-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py) | 256x192 | 0.765 | 0.906 | 0.835 | 0.813 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.json) |
+| [rtmpose-m-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py) | 384x288 | 0.770 | 0.908 | 0.833 | 0.816 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.json) |
+| [rtmpose-l-aic-coco](/configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py) | 384x288 | 0.773 | 0.907 | 0.835 | 0.819 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.json) |
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.yml b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bebe64b3b7a1ce4038e58366d0c5e9998cec7b20
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/coco/rtmpose_coco.yml
@@ -0,0 +1,171 @@
+Collections:
+- Name: RTMPose
+ Paper:
+ Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose"
+ URL: https://arxiv.org/abs/2303.07399
+ README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md
+Models:
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_coco-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: &id001
+ - RTMPose
+ Training Data: COCO
+ Name: rtmpose-t_8xb256-420e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.682
+ AP@0.5: 0.883
+ AP@0.75: 0.759
+ AR: 0.736
+ AR@0.5: 0.92
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-coco_pt-aic-coco_420e-256x192-e613ba3f_20230127.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_coco-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: rtmpose-s_8xb256-420e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.716
+ AP@0.5: 0.892
+ AP@0.75: 0.789
+ AR: 0.768
+ AR@0.5: 0.929
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-coco_pt-aic-coco_420e-256x192-8edcf0d7_20230127.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_coco-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: rtmpose-m_8xb256-420e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.746
+ AP@0.5: 0.899
+ AP@0.75: 0.817
+ AR: 0.795
+ AR@0.5: 0.935
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco_pt-aic-coco_420e-256x192-d8dd5ca4_20230127.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_coco-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: rtmpose-l_8xb256-420e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.758
+ AP@0.5: 0.906
+ AP@0.75: 0.826
+ AR: 0.806
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco_pt-aic-coco_420e-256x192-1352a4d2_20230127.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-t_8xb256-420e_aic-coco-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: &id002
+ - COCO
+ - AI Challenger
+ Name: rtmpose-t_8xb256-420e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.685
+ AP@0.5: 0.88
+ AP@0.75: 0.761
+ AR: 0.738
+ AR@0.5: 0.918
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-tiny_simcc-aic-coco_pt-aic-coco_420e-256x192-cfc8f33d_20230126.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-s_8xb256-420e_aic-coco-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-s_8xb256-420e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.722
+ AP@0.5: 0.892
+ AP@0.75: 0.794
+ AR: 0.772
+ AR@0.5: 0.929
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-s_simcc-aic-coco_pt-aic-coco_420e-256x192-fcb2599b_20230126.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-256x192.py
+ In Collection: RTMPose
+ Alias: human
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-m_8xb256-420e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.758
+ AP@0.5: 0.903
+ AP@0.75: 0.826
+ AR: 0.806
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-256x192-63eb25f7_20230126.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-l_8xb256-420e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.765
+ AP@0.5: 0.906
+ AP@0.75: 0.835
+ AR: 0.813
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-256x192-f016ffe0_20230126.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-m_8xb256-420e_aic-coco-384x288.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-m_8xb256-420e_aic-coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.770
+ AP@0.5: 0.908
+ AP@0.75: 0.833
+ AR: 0.816
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-aic-coco_pt-aic-coco_420e-384x288-a62a0b32_20230228.pth
+- Config: configs/body_2d_keypoint/rtmpose/coco/rtmpose-l_8xb256-420e_aic-coco-384x288.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: *id002
+ Name: rtmpose-l_8xb256-420e_aic-coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.773
+ AP@0.5: 0.907
+ AP@0.75: 0.835
+ AR: 0.819
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-aic-coco_pt-aic-coco_420e-384x288-97d6cb0f_20230228.pth
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3660fb43b787b572856fa61c34417008c0c19451
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py
@@ -0,0 +1,235 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 5e-4
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=14,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='crowdpose/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'crowdpose/annotations/mmpose_crowdpose_test.json',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.md b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..42bcf0f65f3b76b453d82e8e24bc040cd14bcb0b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.md
@@ -0,0 +1,60 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: |
+| [rtmpose-m](/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.706 | 0.841 | 0.765 | 0.799 | 0.719 | 0.582 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-crowdpose_pt-aic-coco_210e-256x192-e6192cac_20230224.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-crowdpose_pt-aic-coco_210e-256x192-e6192cac_20230224.json) |
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.yml b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5fb842f56355267e7b22509c76717c97223b2721
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose_crowdpose.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/body_2d_keypoint/rtmpose/crowdpose/rtmpose-m_8xb64-210e_crowdpose-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture:
+ - RTMPose
+ Training Data: CrowdPose
+ Name: rtmpose-t_8xb256-420e_coco-256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.706
+ AP@0.5: 0.841
+ AP@0.75: 0.765
+ AP (E): 0.799
+ AP (M): 0.719
+ AP (L): 0.582
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-crowdpose_pt-aic-coco_210e-256x192-e6192cac_20230224.pth
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3ee02f215a3ba595f0af1882cea386ef6daa029
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,228 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=16,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/',
+# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.md b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..990edb45eb2b882e6ddfe14253562dce5a5adba9
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.md
@@ -0,0 +1,43 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean / w. flip | Mean@0.1 | ckpt | log |
+| :------------------------------------------------------- | :--------: | :------------: | :------: | :------------------------------------------------------: | :------------------------------------------------------: |
+| [rtmpose-m](/configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py) | 256x256 | 0.907 | 0.348 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-mpii_pt-aic-coco_210e-256x256-ec4dbec8_20230206.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-mpii_pt-aic-coco_210e-256x256-ec4dbec8_20230206.json) |
diff --git a/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.yml b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2e1eb28659f49681496cacdbd5bb4f2062e5358b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/rtmpose/mpii/rtmpose_mpii.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/body_2d_keypoint/rtmpose/mpii/rtmpose-m_8xb64-210e_mpii-256x256.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture:
+ - RTMPose
+ Training Data: MPII
+ Name: rtmpose-m_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.907
+ Mean@0.1: 0.348
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-mpii_pt-aic-coco_210e-256x256-ec4dbec8_20230206.pth
diff --git a/mmpose/configs/body_2d_keypoint/simcc/README.md b/mmpose/configs/body_2d_keypoint/simcc/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6148c18bf5061743de2eacf531554f567fa50516
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/README.md
@@ -0,0 +1,20 @@
+# Top-down SimCC-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, SimCC based methods reformulate human pose estimation as two classification tasks for horizontal and vertical coordinates, and uniformly divide each pixel into several bins, thus obtain the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation](https://arxiv.org/abs/2107.03332).
+
+
+

+
+
+## Results and Models
+
+### COCO Dataset
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Model | Input Size | AP | AR | Details and Download |
+| :---------------------------: | :--------: | :---: | :---: | :-----------------------------------------------: |
+| ResNet-50+SimCC | 384x288 | 0.735 | 0.790 | [resnet_coco.md](./coco/resnet_coco.md) |
+| ResNet-50+SimCC | 256x192 | 0.721 | 0.781 | [resnet_coco.md](./coco/resnet_coco.md) |
+| S-ViPNAS-MobileNet-V3+SimCC | 256x192 | 0.695 | 0.755 | [vipnas_coco.md](./coco/vipnas_coco.md) |
+| MobileNet-V2+SimCC(wo/deconv) | 256x192 | 0.620 | 0.678 | [mobilenetv2_coco.md](./coco/mobilenetv2_coco.md) |
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..42438774bade657bd5c927d08d99353acbcf7f82
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.md
@@ -0,0 +1,55 @@
+
+
+
+SimCC (ECCV'2022)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2107.03332,
+ title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
+ author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
+ year={2021}
+}
+```
+
+
+
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [simcc_mobilenetv2_wo_deconv](/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py) | 256x192 | 0.620 | 0.855 | 0.697 | 0.678 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-4b0703bb_20221010.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-4b0703bb_20221010.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.yml b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..00ef5aaecd3bfde35036e309d7a438fd2d9ea219
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/mobilenetv2_coco.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py
+ In Collection: SimCC
+ Metadata:
+ Architecture: &id001
+ - SimCC
+ - MobilenetV2
+ Training Data: COCO
+ Name: simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.62
+ AP@0.5: 0.855
+ AP@0.75: 0.697
+ AR: 0.678
+ AR@0.5: 0.902
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192-4b0703bb_20221010.pth
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.md b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..80592b4db38a306c297b952264dc0e0b51d64fde
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.md
@@ -0,0 +1,56 @@
+
+
+
+SimCC (ECCV'2022)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2107.03332,
+ title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
+ author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
+ year={2021}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.721 | 0.897 | 0.798 | 0.781 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.log.json) |
+| [simcc_resnet_50](/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py) | 384x288 | 0.735 | 0.899 | 0.800 | 0.790 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1e56c9e477f36576909633a4107f24db5872bbfe
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/resnet_coco.yml
@@ -0,0 +1,41 @@
+Collections:
+- Name: SimCC
+ Paper:
+ Title: A Simple Coordinate Classification Perspective for Human Pose Estimation
+ URL: https://arxiv.org/abs/2107.03332
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/simcc.md
+Models:
+- Config: configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
+ In Collection: SimCC
+ Metadata:
+ Architecture: &id001
+ - SimCC
+ - ResNet
+ Training Data: COCO
+ Name: simcc_res50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.721
+ AP@0.5: 0.900
+ AP@0.75: 0.798
+ AR: 0.781
+ AR@0.5: 0.937
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192-8e0f5b59_20220919.pth
+- Config: configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py
+ In Collection: SimCC
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: simcc_res50_8xb32-140e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.735
+ AP@0.5: 0.899
+ AP@0.75: 0.800
+ AR: 0.790
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288-45c3ba34_20220913.pth
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..65101ada88bbc10603d6931848e92be532d1768c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_mobilenetv2_wo-deconv-8xb64-210e_coco-256x192.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='mmcls://mobilenet_v2',
+ )),
+ head=dict(
+ type='SimCCHead',
+ in_channels=1280,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ deconv_out_channels=None,
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ed9586bfb1f733c964b2ad093d0b862d6f3a07a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb32-140e_coco-384x288.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=140, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[90, 120],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(288, 384), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='SimCCHead',
+ in_channels=2048,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(9, 12),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+test_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=test_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e1fe440d183fc9c535d45bba7cbb6856429c760
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_res50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,114 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(type='MultiStepLR', milestones=[170, 200], gamma=0.1, by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='SimCCHead',
+ in_channels=2048,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+test_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=test_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea61b0fb4fc49801254ded96d93f37e155796f5d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py
@@ -0,0 +1,119 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(192, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ head=dict(
+ type='SimCCHead',
+ in_channels=160,
+ out_channels=17,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ deconv_type='vipnas',
+ deconv_out_channels=(160, 160, 160),
+ deconv_num_groups=(160, 160, 160),
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=data_root + 'person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..a9d8b98fc3a9dbae4128ac9ce3c09a24de0474eb
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.md
@@ -0,0 +1,54 @@
+
+
+
+SimCC (ECCV'2022)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2107.03332,
+ title={SimCC: a Simple Coordinate Classification Perspective for Human Pose Estimation},
+ author={Li, Yanjie and Yang, Sen and Liu, Peidong and Zhang, Shoukui and Wang, Yunxiao and Wang, Zhicheng and Yang, Wankou and Xia, Shu-Tao},
+ year={2021}
+}
+```
+
+
+
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [simcc_S-ViPNAS-MobileNetV3](/configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py) | 256x192 | 0.695 | 0.883 | 0.772 | 0.755 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.yml b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..95077c05c658a42e767833d2102b2e0603288f72
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/coco/vipnas_coco.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192.py
+ In Collection: SimCC
+ Metadata:
+ Architecture: &id001
+ - SimCC
+ - ViPNAS
+ Training Data: COCO
+ Name: simcc_vipnas-mbv3_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.695
+ AP@0.5: 0.883
+ AP@0.75: 0.772
+ AR: 0.755
+ AR@0.5: 0.927
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/simcc/coco/simcc_vipnas-mbv3_8xb64-210e_coco-256x192-719f3489_20220922.pth
diff --git a/mmpose/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..965fda71e6a01b00f86669e1b7ba1b8bc57f2831
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/simcc/mpii/simcc_res50_wo-deconv-8xb64-210e_mpii-256x256.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel', input_size=(256, 256), sigma=6.0, simcc_split_ratio=2.0)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='SimCCHead',
+ in_channels=2048,
+ out_channels=16,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ deconv_out_channels=None,
+ loss=dict(type='KLDiscretLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9e23b874bc6211eb51a7305c7913f8d1b46e3e4e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/README.md
@@ -0,0 +1,117 @@
+# Top-down heatmap-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html).
+
+
+

+
+
+## Results and Models
+
+### COCO Dataset
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Model | Input Size | AP | AR | Details and Download |
+| :-------------: | :--------: | :---: | :---: | :-------------------------------------------------: |
+| ViTPose-h | 256x192 | 0.790 | 0.840 | [vitpose_coco.md](./coco/vitpose_coco.md) |
+| HRNet-w48+UDP | 256x192 | 0.768 | 0.817 | [hrnet_udp_coco.md](./coco/hrnet_udp_coco.md) |
+| MSPN 4-stg | 256x192 | 0.765 | 0.826 | [mspn_coco.md](./coco/mspn_coco.md) |
+| HRNet-w48+Dark | 256x192 | 0.764 | 0.814 | [hrnet_dark_coco.md](./coco/hrnet_dark_coco.md) |
+| HRNet-w48 | 256x192 | 0.756 | 0.809 | [hrnet_coco.md](./coco/hrnet_coco.md) |
+| HRFormer-B | 256x192 | 0.754 | 0.807 | [hrformer_coco.md](./coco/hrformer_coco.md) |
+| RSN-50-3x | 256x192 | 0.750 | 0.814 | [rsn_coco.md](./coco/rsn_coco.md) |
+| CSPNeXt-l | 256x192 | 0.750 | 0.800 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) |
+| HRNet-w32 | 256x192 | 0.749 | 0.804 | [hrnet_coco.md](./coco/hrnet_coco.md) |
+| Swin-L | 256x192 | 0.743 | 0.798 | [swin_coco.md](./coco/swin_coco.md) |
+| ViTPose-s | 256x192 | 0.739 | 0.792 | [vitpose_coco.md](./coco/vitpose_coco.md) |
+| HRFormer-S | 256x192 | 0.738 | 0.793 | [hrformer_coco.md](./coco/hrformer_coco.md) |
+| Swin-B | 256x192 | 0.737 | 0.794 | [swin_coco.md](./coco/swin_coco.md) |
+| SEResNet-101 | 256x192 | 0.734 | 0.790 | [seresnet_coco.md](./coco/seresnet_coco.md) |
+| SCNet-101 | 256x192 | 0.733 | 0.789 | [scnet_coco.md](./coco/scnet_coco.md) |
+| ResNet-101+Dark | 256x192 | 0.733 | 0.786 | [resnet_dark_coco.md](./coco/resnet_dark_coco.md) |
+| CSPNeXt-m | 256x192 | 0.732 | 0.785 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) |
+| ResNetV1d-101 | 256x192 | 0.732 | 0.785 | [resnetv1d_coco.md](./coco/resnetv1d_coco.md) |
+| SEResNet-50 | 256x192 | 0.729 | 0.784 | [seresnet_coco.md](./coco/seresnet_coco.md) |
+| SCNet-50 | 256x192 | 0.728 | 0.784 | [scnet_coco.md](./coco/scnet_coco.md) |
+| ResNet-101 | 256x192 | 0.726 | 0.783 | [resnet_coco.md](./coco/resnet_coco.md) |
+| ResNeXt-101 | 256x192 | 0.726 | 0.781 | [resnext_coco.md](./coco/resnext_coco.md) |
+| HourglassNet | 256x256 | 0.726 | 0.780 | [hourglass_coco.md](./coco/hourglass_coco.md) |
+| ResNeSt-101 | 256x192 | 0.725 | 0.781 | [resnest_coco.md](./coco/resnest_coco.md) |
+| RSN-50 | 256x192 | 0.724 | 0.790 | [rsn_coco.md](./coco/rsn_coco.md) |
+| Swin-T | 256x192 | 0.724 | 0.782 | [swin_coco.md](./coco/swin_coco.md) |
+| MSPN 1-stg | 256x192 | 0.723 | 0.788 | [mspn_coco.md](./coco/mspn_coco.md) |
+| ResNetV1d-50 | 256x192 | 0.722 | 0.777 | [resnetv1d_coco.md](./coco/resnetv1d_coco.md) |
+| ResNeSt-50 | 256x192 | 0.720 | 0.775 | [resnest_coco.md](./coco/resnest_coco.md) |
+| ResNet-50 | 256x192 | 0.718 | 0.774 | [resnet_coco.md](./coco/resnet_coco.md) |
+| ResNeXt-50 | 256x192 | 0.715 | 0.771 | [resnext_coco.md](./coco/resnext_coco.md) |
+| PVT-S | 256x192 | 0.714 | 0.773 | [pvt_coco.md](./coco/pvt_coco.md) |
+| CSPNeXt-s | 256x192 | 0.697 | 0.753 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) |
+| LiteHRNet-30 | 256x192 | 0.676 | 0.736 | [litehrnet_coco.md](./coco/litehrnet_coco.md) |
+| CSPNeXt-tiny | 256x192 | 0.665 | 0.723 | [cspnext_udp_coco.md](./coco/cspnext_udp_coco.md) |
+| MobileNet-v2 | 256x192 | 0.648 | 0.709 | [mobilenetv2_coco.md](./coco/mobilenetv2_coco.md) |
+| LiteHRNet-18 | 256x192 | 0.642 | 0.705 | [litehrnet_coco.md](./coco/litehrnet_coco.md) |
+| CPM | 256x192 | 0.627 | 0.689 | [cpm_coco.md](./coco/cpm_coco.md) |
+| ShuffleNet-v2 | 256x192 | 0.602 | 0.668 | [shufflenetv2_coco.md](./coco/shufflenetv2_coco.md) |
+| ShuffleNet-v1 | 256x192 | 0.587 | 0.654 | [shufflenetv1_coco.md](./coco/shufflenetv1_coco.md) |
+| AlexNet | 256x192 | 0.448 | 0.521 | [alexnet_coco.md](./coco/alexnet_coco.md) |
+
+### MPII Dataset
+
+| Model | Input Size | PCKh@0.5 | PCKh@0.1 | Details and Download |
+| :------------: | :--------: | :------: | :------: | :-------------------------------------------------: |
+| HRNet-w48+Dark | 256x256 | 0.905 | 0.360 | [hrnet_dark_mpii.md](./mpii/hrnet_dark_mpii.md) |
+| HRNet-w48 | 256x256 | 0.902 | 0.303 | [hrnet_mpii.md](./mpii/cspnext_udp_mpii.md) |
+| HRNet-w48 | 256x256 | 0.901 | 0.337 | [hrnet_mpii.md](./mpii/hrnet_mpii.md) |
+| HRNet-w32 | 256x256 | 0.900 | 0.334 | [hrnet_mpii.md](./mpii/hrnet_mpii.md) |
+| HourglassNet | 256x256 | 0.889 | 0.317 | [hourglass_mpii.md](./mpii/hourglass_mpii.md) |
+| ResNet-152 | 256x256 | 0.889 | 0.303 | [resnet_mpii.md](./mpii/resnet_mpii.md) |
+| ResNetV1d-152 | 256x256 | 0.888 | 0.300 | [resnetv1d_mpii.md](./mpii/resnetv1d_mpii.md) |
+| SCNet-50 | 256x256 | 0.888 | 0.290 | [scnet_mpii.md](./mpii/scnet_mpii.md) |
+| ResNeXt-152 | 256x256 | 0.887 | 0.294 | [resnext_mpii.md](./mpii/resnext_mpii.md) |
+| SEResNet-50 | 256x256 | 0.884 | 0.292 | [seresnet_mpii.md](./mpii/seresnet_mpii.md) |
+| ResNet-50 | 256x256 | 0.882 | 0.286 | [resnet_mpii.md](./mpii/resnet_mpii.md) |
+| ResNetV1d-50 | 256x256 | 0.881 | 0.290 | [resnetv1d_mpii.md](./mpii/resnetv1d_mpii.md) |
+| CPM | 368x368\* | 0.876 | 0.285 | [cpm_mpii.md](./mpii/cpm_mpii.md) |
+| LiteHRNet-30 | 256x256 | 0.869 | 0.271 | [litehrnet_mpii.md](./mpii/litehrnet_mpii.md) |
+| LiteHRNet-18 | 256x256 | 0.859 | 0.260 | [litehrnet_mpii.md](./mpii/litehrnet_mpii.md) |
+| MobileNet-v2 | 256x256 | 0.854 | 0.234 | [mobilenetv2_mpii.md](./mpii/mobilenetv2_mpii.md) |
+| ShuffleNet-v2 | 256x256 | 0.828 | 0.205 | [shufflenetv2_mpii.md](./mpii/shufflenetv2_mpii.md) |
+| ShuffleNet-v1 | 256x256 | 0.824 | 0.195 | [shufflenetv1_mpii.md](./mpii/shufflenetv1_mpii.md) |
+
+### CrowdPose Dataset
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Model | Input Size | AP | AR | Details and Download |
+| :--------: | :--------: | :---: | :---: | :--------------------------------------------------------: |
+| HRNet-w32 | 256x192 | 0.675 | 0.816 | [hrnet_crowdpose.md](./crowdpose/hrnet_crowdpose.md) |
+| CSPNeXt-m | 256x192 | 0.662 | 0.755 | [hrnet_crowdpose.md](./crowdpose/cspnext_udp_crowdpose.md) |
+| ResNet-101 | 256x192 | 0.647 | 0.800 | [resnet_crowdpose.md](./crowdpose/resnet_crowdpose.md) |
+| HRNet-w32 | 256x192 | 0.637 | 0.785 | [resnet_crowdpose.md](./crowdpose/resnet_crowdpose.md) |
+
+### AIC Dataset
+
+Results on AIC val set with ground-truth bounding boxes.
+
+| Model | Input Size | AP | AR | Details and Download |
+| :--------: | :--------: | :---: | :---: | :----------------------------------: |
+| HRNet-w32 | 256x192 | 0.323 | 0.366 | [hrnet_aic.md](./aic/hrnet_aic.md) |
+| ResNet-101 | 256x192 | 0.294 | 0.337 | [resnet_aic.md](./aic/resnet_aic.md) |
+
+### JHMDB Dataset
+
+| Model | Input Size | PCK(norm. by person size) | PCK (norm. by torso size) | Details and Download |
+| :-------: | :--------: | :-----------------------: | :-----------------------: | :----------------------------------------: |
+| ResNet-50 | 256x256 | 96.0 | 80.1 | [resnet_jhmdb.md](./jhmdb/resnet_jhmdb.md) |
+| CPM | 368x368 | 89.8 | 65.7 | [cpm_jhmdb.md](./jhmdb/cpm_jhmdb.md) |
+
+### PoseTrack2018 Dataset
+
+Results on PoseTrack2018 val with ground-truth bounding boxes.
+
+| Model | Input Size | AP | Details and Download |
+| :-------: | :--------: | :--: | :----------------------------------------------------------: |
+| HRNet-w48 | 256x192 | 84.6 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) |
+| HRNet-w32 | 256x192 | 83.4 | [hrnet_posetrack18.md](./posetrack18/hrnet_posetrack18.md) |
+| ResNet-50 | 256x192 | 81.2 | [resnet_posetrack18.md](./posetrack18/resnet_posetrack18.md) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..4b2cefcdcbe8909a7134a556f7da48881c6333c3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.md
@@ -0,0 +1,38 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+Results on AIC val set with ground-truth bounding boxes
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py) | 256x192 | 0.323 | 0.761 | 0.218 | 0.366 | 0.789 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192-30a4e465_20200826.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192_20200826.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0bbc52ccb8d3fc439887937333898f84ca40b167
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/hrnet_aic.yml
@@ -0,0 +1,18 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture:
+ - HRNet
+ Training Data: AI Challenger
+ Name: td-hm_hrnet-w32_8xb64-210e_aic-256x192
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.323
+ AP@0.5: 0.761
+ AP@0.75: 0.218
+ AR: 0.366
+ AR@0.5: 0.789
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_aic_256x192-30a4e465_20200826.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..1cb0f57eb38ac56e2aaaeef20ad7bda5cb240e96
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.md
@@ -0,0 +1,55 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+Results on AIC val set with ground-truth bounding boxes
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py) | 256x192 | 0.294 | 0.736 | 0.172 | 0.337 | 0.762 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192-79b35445_20200826.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192_20200826.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e320056858565d88ac3b1a4e3e4960019be02ffb
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/resnet_aic.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: AI Challenger
+ Name: td-hm_res101_8xb64-210e_aic-256x192
+ Results:
+ - Dataset: AI Challenger
+ Metrics:
+ AP: 0.294
+ AP@0.5: 0.736
+ AP@0.75: 0.172
+ AR: 0.337
+ AR@0.5: 0.762
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_aic_256x192-79b35445_20200826.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d4c504d388fbe627ef7f62393e5135604403110
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_hrnet-w32_8xb64-210e_aic-256x192.py
@@ -0,0 +1,151 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=14,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AicDataset'
+data_mode = 'topdown'
+data_root = 'data/aic/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/aic_train.json',
+ data_prefix=dict(img='ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/aic_val.json',
+ data_prefix=dict(img='ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/aic_val.json',
+ use_area=False)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e61da3a5c4b6cbb89e78576c34ca8040f0fcca05
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/aic/td-hm_res101_8xb64-210e_aic-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=14,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AicDataset'
+data_mode = 'topdown'
+data_root = 'data/aic/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/aic_train.json',
+ data_prefix=dict(img='ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/aic_val.json',
+ data_prefix=dict(img='ai_challenger_keypoint_validation_20170911/'
+ 'keypoint_validation_images_20170911/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/aic_val.json',
+ use_area=False)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f82685ba81d28da32bfd9e578df397c9690ef5f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.md
@@ -0,0 +1,40 @@
+
+
+
+AlexNet (NeurIPS'2012)
+
+```bibtex
+@inproceedings{krizhevsky2012imagenet,
+ title={Imagenet classification with deep convolutional neural networks},
+ author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
+ booktitle={Advances in neural information processing systems},
+ pages={1097--1105},
+ year={2012}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_alexnet](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py) | 256x192 | 0.448 | 0.767 | 0.461 | 0.521 | 0.829 | [ckpt](https://download.openmmlab.com/mmpose/top_down/alexnet/alexnet_coco_256x192-a7b1fd15_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/alexnet/alexnet_coco_256x192_20200727.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0c851c3c793f7360617313a2b4f09e49ebc87484
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/alexnet_coco.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - AlexNet
+ Training Data: COCO
+ Name: td-hm_alexnet_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.448
+ AP@0.5: 0.767
+ AP@0.75: 0.461
+ AR: 0.521
+ AR@0.5: 0.829
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/alexnet/alexnet_coco_256x192-a7b1fd15_20200727.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..3d4453a36986ccca8511e3f589dfac39cc3185ec
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.md
@@ -0,0 +1,41 @@
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [cpm](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py) | 256x192 | 0.627 | 0.862 | 0.709 | 0.689 | 0.906 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192-0e978875_20220920.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192_20220920.log) |
+| [cpm](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py) | 384x288 | 0.652 | 0.865 | 0.730 | 0.710 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288-165487b8_20221011.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288_20221011.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2c1cad9713c6a8be51e59ec67047267c8a425e1f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cpm_coco.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: CPM
+ Paper:
+ Title: Convolutional pose machines
+ URL: http://openaccess.thecvf.com/content_cvpr_2016/html/Wei_Convolutional_Pose_Machines_CVPR_2016_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/cpm.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py
+ In Collection: CPM
+ Metadata:
+ Architecture: &id001
+ - CPM
+ Training Data: COCO
+ Name: td-hm_cpm_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.627
+ AP@0.5: 0.862
+ AP@0.75: 0.709
+ AR: 0.689
+ AR@0.5: 0.906
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192-0e978875_20220920.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_cpm_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.652
+ AP@0.5: 0.865
+ AP@0.75: 0.730
+ AR: 0.710
+ AR@0.5: 0.907
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288-165487b8_20221011.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc1eb0d36c8b185369c8a722522f527fa37e0f8c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py
@@ -0,0 +1,284 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# keypoint mappings
+keypoint_mapping_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+keypoint_mapping_aic = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ (12, 17),
+ (13, 18),
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=19,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ output_keypoint_indices=[
+ target for _, target in keypoint_mapping_coco
+ ]))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_coco)
+ ],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_aic)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cce193544c775b5f4c749e3ca9c81ff547a507e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py
@@ -0,0 +1,214 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..096bf307859ee2946e8d42c66dc10ed23dbfe545
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py
@@ -0,0 +1,284 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# keypoint mappings
+keypoint_mapping_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+keypoint_mapping_aic = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ (12, 17),
+ (13, 18),
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=19,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ output_keypoint_indices=[
+ target for _, target in keypoint_mapping_coco
+ ]))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_coco)
+ ],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_aic)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f86e9a8d609c2f200c888ad183f3cd890f35c388
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py
@@ -0,0 +1,214 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..94cc7d02d2789fd5a82ff9d352063f5afe99aaf0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py
@@ -0,0 +1,284 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# keypoint mappings
+keypoint_mapping_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+keypoint_mapping_aic = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ (12, 17),
+ (13, 18),
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.33,
+ widen_factor=0.5,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-s_imagenet_600e-ea671761.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=512,
+ out_channels=19,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ output_keypoint_indices=[
+ target for _, target in keypoint_mapping_coco
+ ]))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_coco)
+ ],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_aic)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f50542e5bceb86c652bf4d8ab893386197217ef
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py
@@ -0,0 +1,214 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.33,
+ widen_factor=0.5,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-s_imagenet_600e-ea671761.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=512,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..cef1b204501573d4e0d3228c36595eb784fdc83b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py
@@ -0,0 +1,284 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.0),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# keypoint mappings
+keypoint_mapping_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+keypoint_mapping_aic = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ (12, 17),
+ (13, 18),
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.167,
+ widen_factor=0.375,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-tiny_imagenet_600e-3a2dd350.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=384,
+ out_channels=19,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ output_keypoint_indices=[
+ target for _, target in keypoint_mapping_coco
+ ]))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type='RepeatDataset',
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_coco)
+ ],
+ ),
+ times=3)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='aic/annotations/aic_train.json',
+ data_prefix=dict(img='pose/ai_challenge/ai_challenger_keypoint'
+ '_train_20170902/keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_aic)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ # dict(
+ # type='EMAHook',
+ # ema_type='ExpMomentumEMA',
+ # momentum=0.0002,
+ # update_buffers=True,
+ # priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'coco/annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ec0bb2be7dbd59be7401cca1d4995d7741ee2b6
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py
@@ -0,0 +1,214 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 105 to 210 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.167,
+ widen_factor=0.375,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-tiny_imagenet_600e-3a2dd350.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=384,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ # bbox_file='data/coco/person_detection_results/'
+ # 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ # dict(
+ # type='EMAHook',
+ # ema_type='ExpMomentumEMA',
+ # momentum=0.0002,
+ # update_buffers=True,
+ # priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..7aad2bf6b31428bb8ff52149d0e8cb9f85820709
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.md
@@ -0,0 +1,69 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_cspnext_t_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.665 | 0.874 | 0.723 | 0.723 | 0.917 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-coco_pt-in1k_210e-256x192-0908dd2d_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-coco_pt-in1k_210e-256x192-0908dd2d_20230123.json) |
+| [pose_cspnext_s_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.697 | 0.886 | 0.776 | 0.753 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-coco_pt-in1k_210e-256x192-92dbfc1d_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-coco_pt-in1k_210e-256x192-92dbfc1d_20230123.json) |
+| [pose_cspnext_m_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.732 | 0.896 | 0.806 | 0.785 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco_pt-in1k_210e-256x192-95f5967e_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco_pt-in1k_210e-256x192-95f5967e_20230123.json) |
+| [pose_cspnext_l_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py) | 256x192 | 0.750 | 0.904 | 0.822 | 0.800 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-coco_pt-in1k_210e-256x192-661cdd8c_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-coco_pt-in1k_210e-256x192-661cdd8c_20230123.json) |
+| [pose_cspnext_t_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.655 | 0.884 | 0.731 | 0.689 | 0.890 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.json) |
+| [pose_cspnext_s_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.700 | 0.905 | 0.783 | 0.733 | 0.918 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.json) |
+| [pose_cspnext_m_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.748 | 0.925 | 0.818 | 0.777 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.json) |
+| [pose_cspnext_l_udp_aic_coco](/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py) | 256x192 | 0.772 | 0.936 | 0.839 | 0.799 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.json) |
+
+Note that, UDP also adopts the unbiased encoding/decoding algorithm of [DARK](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/techniques.html#darkpose-cvpr-2020).
+
+Flip test and detector is not used in the result of aic-coco training.
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..aab5c44e1b651eb86335f0afee53872e5a5c5c34
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/cspnext_udp_coco.yml
@@ -0,0 +1,139 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: &id001
+ - CSPNeXt
+ - UDP
+ Training Data: COCO
+ Name: cspnext-tiny_udp_8xb256-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.665
+ AP@0.5: 0.874
+ AP@0.75: 0.723
+ AR: 0.723
+ AR@0.5: 0.917
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-coco_pt-in1k_210e-256x192-0908dd2d_20230123.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: cspnext-s_udp_8xb256-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.697
+ AP@0.5: 0.886
+ AP@0.75: 0.776
+ AR: 0.753
+ AR@0.5: 0.929
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-coco_pt-in1k_210e-256x192-92dbfc1d_20230123.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: cspnext-m_udp_8xb256-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.732
+ AP@0.5: 0.896
+ AP@0.75: 0.806
+ AR: 0.785
+ AR@0.5: 0.937
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco_pt-in1k_210e-256x192-95f5967e_20230123.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: cspnext-l_udp_8xb256-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.750
+ AP@0.5: 0.904
+ AP@0.75: 0.822
+ AR: 0.8
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-coco_pt-in1k_210e-256x192-661cdd8c_20230123.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-tiny_udp_8xb256-210e_aic-coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data:
+ - COCO
+ - AIC
+ Name: cspnext-tiny_udp_8xb256-210e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.655
+ AP@0.5: 0.884
+ AP@0.75: 0.731
+ AR: 0.689
+ AR@0.5: 0.89
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-tiny_udp-aic-coco_210e-256x192-cbed682d_20230130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-s_udp_8xb256-210e_aic-coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data:
+ - COCO
+ - AIC
+ Name: cspnext-s_udp_8xb256-210e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.7
+ AP@0.5: 0.905
+ AP@0.75: 0.783
+ AR: 0.733
+ AR@0.5: 0.918
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-s_udp-aic-coco_210e-256x192-92f5a029_20230130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-m_udp_8xb256-210e_aic-coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data:
+ - COCO
+ - AIC
+ Name: cspnext-m_udp_8xb256-210e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.925
+ AP@0.75: 0.818
+ AR: 0.777
+ AR@0.5: 0.933
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/cspnext-l_udp_8xb256-210e_aic-coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data:
+ - COCO
+ - AIC
+ Name: cspnext-l_udp_8xb256-210e_aic-coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.772
+ AP@0.5: 0.936
+ AP@0.75: 0.839
+ AR: 0.799
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..dc7dee47c3ec6917f3fffb034e36df3a5a226504
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.md
@@ -0,0 +1,42 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py) | 256x256 | 0.726 | 0.896 | 0.799 | 0.780 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256-4ec713ba_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256_20200709.log.json) |
+| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py) | 384x384 | 0.746 | 0.900 | 0.812 | 0.797 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384-be91ba2b_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384_20200812.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6d9cfd91e99a8d53a437f4d71e318fb1226b18e6
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hourglass_coco.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: Hourglass
+ Paper:
+ Title: Stacked hourglass networks for human pose estimation
+ URL: https://link.springer.com/chapter/10.1007/978-3-319-46484-8_29
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hourglass.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: &id001
+ - Hourglass
+ Training Data: COCO
+ Name: td-hm_hourglass52_8xb32-210e_coco-256x256
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.726
+ AP@0.5: 0.896
+ AP@0.75: 0.799
+ AR: 0.780
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_256x256-4ec713ba_20200709.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hourglass52_8xb32-210e_coco-384x384
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.746
+ AP@0.5: 0.900
+ AP@0.75: 0.812
+ AR: 0.797
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_coco_384x384-be91ba2b_20200812.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..87309d2e7ceab17aa055a121379e7ef5c62d3c66
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.md
@@ -0,0 +1,43 @@
+
+
+
+HRFormer (NIPS'2021)
+
+```bibtex
+@article{yuan2021hrformer,
+ title={HRFormer: High-Resolution Vision Transformer for Dense Predict},
+ author={Yuan, Yuhui and Fu, Rao and Huang, Lang and Lin, Weihong and Zhang, Chao and Chen, Xilin and Wang, Jingdong},
+ journal={Advances in Neural Information Processing Systems},
+ volume={34},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hrformer_small](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py) | 256x192 | 0.738 | 0.904 | 0.812 | 0.793 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192-5310d898_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192_20220316.log.json) |
+| [pose_hrformer_small](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py) | 384x288 | 0.757 | 0.905 | 0.824 | 0.807 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288-98d237ed_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288_20220316.log.json) |
+| [pose_hrformer_base](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py) | 256x192 | 0.754 | 0.906 | 0.827 | 0.807 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192-6f5f1169_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192_20220316.log.json) |
+| [pose_hrformer_base](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py) | 384x288 | 0.774 | 0.909 | 0.842 | 0.823 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_384x288-ecf0758d_20220316.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192_20220316.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5ac7dc3636a7a020a396db422d8de1521a172cd1
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrformer_coco.yml
@@ -0,0 +1,72 @@
+Collections:
+- Name: HRFormer
+ Paper:
+ Title: 'HRFormer: High-Resolution Vision Transformer for Dense Predict'
+ URL: https://proceedings.neurips.cc/paper/2021/hash/3bbfdde8842a5c44a0323518eec97cbe-Abstract.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hrformer.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py
+ In Collection: HRFormer
+ Metadata:
+ Architecture: &id001
+ - HRFormer
+ Training Data: COCO
+ Name: td-hm_hrformer-small_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.738
+ AP@0.5: 0.904
+ AP@0.75: 0.812
+ AR: 0.793
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_256x192-5310d898_20220316.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py
+ In Collection: HRFormer
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrformer-small_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.757
+ AP@0.5: 0.905
+ AP@0.75: 0.824
+ AR: 0.807
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_small_coco_384x288-98d237ed_20220316.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py
+ In Collection: HRFormer
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrformer-base_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.754
+ AP@0.5: 0.906
+ AP@0.75: 0.827
+ AR: 0.807
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_256x192-6f5f1169_20220316.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py
+ In Collection: HRFormer
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrformer-base_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.774
+ AP@0.5: 0.909
+ AP@0.75: 0.842
+ AR: 0.823
+ AR@0.5: 0.945
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrformer/hrformer_base_coco_384x288-ecf0758d_20220316.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..efe9cd27b91e2189760dec73682b997e2be58b95
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.md
@@ -0,0 +1,62 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+Albumentations (Information'2020)
+
+```bibtex
+@article{buslaev2020albumentations,
+ title={Albumentations: fast and flexible image augmentations},
+ author={Buslaev, Alexander and Iglovikov, Vladimir I and Khvedchenya, Eugene and Parinov, Alex and Druzhinin, Mikhail and Kalinin, Alexandr A},
+ journal={Information},
+ volume={11},
+ number={2},
+ pages={125},
+ year={2020},
+ publisher={Multidisciplinary Digital Publishing Institute}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [coarsedropout](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py) | 256x192 | 0.753 | 0.908 | 0.822 | 0.805 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout-0f16a0ce_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout_20210320.log.json) |
+| [gridmask](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py) | 256x192 | 0.752 | 0.906 | 0.825 | 0.804 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask-868180df_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask_20210320.log.json) |
+| [photometric](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py) | 256x192 | 0.754 | 0.908 | 0.825 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric-308cf591_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric_20210320.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7a29de4f64a702fddb2f42d9527c1762879f89b1
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_augmentation_coco.yml
@@ -0,0 +1,56 @@
+Collections:
+- Name: Albumentations
+ Paper:
+ Title: 'Albumentations: fast and flexible image augmentations'
+ URL: https://www.mdpi.com/649002
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/albumentations.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py
+ In Collection: Albumentations
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.908
+ AP@0.75: 0.822
+ AR: 0.805
+ AR@0.5: 0.944
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_coarsedropout-0f16a0ce_20210320.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py
+ In Collection: Albumentations
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.752
+ AP@0.5: 0.906
+ AP@0.75: 0.825
+ AR: 0.804
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_gridmask-868180df_20210320.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py
+ In Collection: Albumentations
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.754
+ AP@0.5: 0.908
+ AP@0.75: 0.825
+ AR: 0.805
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/augmentation/hrnet_w32_coco_256x192_photometric-308cf591_20210320.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..51fbf1322e474f8dd5cb8ca46de7c14a9bb26540
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.md
@@ -0,0 +1,43 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py) | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192_20220909.log) |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py) | 384x288 | 0.761 | 0.908 | 0.826 | 0.811 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288-ca5956af_20220909.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288_20220909.log) |
+| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py) | 256x192 | 0.756 | 0.908 | 0.826 | 0.809 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192_20220913.log) |
+| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py) | 384x288 | 0.767 | 0.911 | 0.832 | 0.817 | 0.947 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288-c161b7de_20220915.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288_20220915.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a0e5debe859ccc059e892b28d85aadb11f1a1857
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco.yml
@@ -0,0 +1,124 @@
+Collections:
+- Name: HRNet
+ Paper:
+ Title: Deep high-resolution representation learning for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hrnet.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.746
+ AP@0.5: 0.904
+ AP@0.75: 0.819
+ AR: 0.799
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.76
+ AP@0.5: 0.906
+ AP@0.75: 0.83
+ AR: 0.81
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288-ca5956af_20220909.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w48_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.756
+ AP@0.5: 0.907
+ AP@0.75: 0.825
+ AR: 0.806
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w48_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.767
+ AP@0.5: 0.91
+ AP@0.75: 0.831
+ AR: 0.816
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288-c161b7de_20220915.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data:
+ - COCO
+ - AI Challenger
+ Name: td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.757
+ AP@0.5: 0.907
+ AP@0.75: 0.829
+ AR: 0.809
+ AR@0.5: 0.944
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge-b05435b9_20221025.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data:
+ - COCO
+ - AI Challenger
+ Name: td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.756
+ AP@0.5: 0.906
+ AP@0.75: 0.826
+ AR: 0.807
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine-4ce66880_20221026.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.749
+ AP@0.5: 0.907
+ AP@0.75: 0.822
+ AR: 0.802
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192-f1e84e3b_20220914.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco_aic.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco_aic.md
new file mode 100644
index 0000000000000000000000000000000000000000..fd88e25e64c5aec1d0e354ecb5d426e13de02ffa
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_coco_aic.md
@@ -0,0 +1,61 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+
+AI Challenger (ArXiv'2017)
+
+```bibtex
+@article{wu2017ai,
+ title={Ai challenger: A large-scale dataset for going deeper in image understanding},
+ author={Wu, Jiahong and Zheng, He and Zhao, Bo and Li, Yixin and Yan, Baoming and Liang, Rui and Wang, Wenjia and Zhou, Shipei and Lin, Guosen and Fu, Yanwei and others},
+ journal={arXiv preprint arXiv:1711.06475},
+ year={2017}
+}
+```
+
+
+
+MMPose supports training model with combined datasets. [coco-aic-merge](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py) and [coco-aic-combine](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py) are two examples.
+
+- [coco-aic-merge](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py) leverages AIC data with partial keypoints as auxiliary data to train a COCO model
+- [coco-aic-combine](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py) constructs a combined dataset whose keypoints are the union of COCO and AIC keypoints to train a model that predicts keypoints of both datasets.
+
+Evaluation results on COCO val2017 of models trained with solely COCO dataset and combined dataset as shown below. These models are evaluated with detector having human AP of 56.4 on COCO val2017 dataset.
+
+| Train Set | Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :------------------------------------------- | :------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------: | :------------------------------------: |
+| [coco](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py) | pose_hrnet_w32 | 256x192 | 0.749 | 0.906 | 0.821 | 0.804 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192_20220909.log) |
+| [coco-aic-merge](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py) | pose_hrnet_w32 | 256x192 | 0.757 | 0.907 | 0.829 | 0.809 | 0.944 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge-b05435b9_20221025.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge_20221025.log) |
+| [coco-aic-combine](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py) | pose_hrnet_w32 | 256x192 | 0.756 | 0.906 | 0.826 | 0.807 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine-4ce66880_20221026.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine_20221026.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..c18382ec68035c0f7d560d782bbb44b5af2d5024
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.md
@@ -0,0 +1,60 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hrnet_w32_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py) | 256x192 | 0.757 | 0.907 | 0.825 | 0.807 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192-0e00bf12_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192_20220914.log) |
+| [pose_hrnet_w32_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py) | 384x288 | 0.766 | 0.907 | 0.829 | 0.815 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288-9bab4c9b_20220917.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288_20220917.log) |
+| [pose_hrnet_w48_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py) | 256x192 | 0.764 | 0.907 | 0.831 | 0.814 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192-e1ebdd6f_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192_20220913.log) |
+| [pose_hrnet_w48_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py) | 384x288 | 0.772 | 0.911 | 0.833 | 0.821 | 0.948 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288-39c3c381_20220916.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288_20220916.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9f14e9ffad78d3d36f5f89f2166c6b3cda7ab2ff
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_dark_coco.yml
@@ -0,0 +1,73 @@
+Collections:
+- Name: DarkPose
+ Paper:
+ Title: Distribution-aware coordinate representation for human pose estimation
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Zhang_Distribution-Aware_Coordinate_Representation_for_Human_Pose_Estimation_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/dark.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ - DarkPose
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.757
+ AP@0.5: 0.907
+ AP@0.75: 0.825
+ AR: 0.807
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192-0e00bf12_20220914.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.766
+ AP@0.5: 0.907
+ AP@0.75: 0.829
+ AR: 0.815
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288-9bab4c9b_20220917.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.764
+ AP@0.5: 0.907
+ AP@0.75: 0.831
+ AR: 0.814
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192-e1ebdd6f_20220913.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.772
+ AP@0.5: 0.911
+ AP@0.75: 0.833
+ AR: 0.821
+ AR@0.5: 0.948
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288-39c3c381_20220916.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_fp16_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_fp16_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..3e52624dc760eaf8bc4d6c7f75a29c4e1747a6e0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_fp16_coco.md
@@ -0,0 +1,56 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+FP16 (ArXiv'2017)
+
+```bibtex
+@article{micikevicius2017mixed,
+ title={Mixed precision training},
+ author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
+ journal={arXiv preprint arXiv:1710.03740},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hrnet_w32_fp16](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py) | 256x192 | 0.749 | 0.907 | 0.822 | 0.802 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192-f1e84e3b_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192_20220914.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..2b85d85a25125cb8eef083a87e597591850a1402
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.md
@@ -0,0 +1,63 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_hrnet_w32_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py) | 256x192 | 0.762 | 0.907 | 0.829 | 0.810 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192-73ede547_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192_20220914.log) |
+| [pose_hrnet_w32_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py) | 384x288 | 0.768 | 0.909 | 0.832 | 0.815 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288-9a3f7c85_20220914.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288_20220914.log) |
+| [pose_hrnet_w48_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py) | 256x192 | 0.768 | 0.908 | 0.833 | 0.817 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192-3feaef8f_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192_20220913.log) |
+| [pose_hrnet_w48_udp](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py) | 384x288 | 0.773 | 0.911 | 0.836 | 0.821 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288-70d7ab01_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288_20220913.log) |
+| [pose_hrnet_w32_udp_regress](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py) | 256x192 | 0.759 | 0.907 | 0.827 | 0.813 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192-9c0b77b4_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192_20220226.log) |
+
+Note that, UDP also adopts the unbiased encoding/decoding algorithm of [DARK](https://mmpose.readthedocs.io/en/latest/model_zoo_papers/techniques.html#darkpose-cvpr-2020).
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..01cba761ec4a97c8d84273de7f2cf720de62ed5b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/hrnet_udp_coco.yml
@@ -0,0 +1,90 @@
+Collections:
+- Name: UDP
+ Paper:
+ Title: 'The Devil Is in the Details: Delving Into Unbiased Data Processing for
+ Human Pose Estimation'
+ URL: http://openaccess.thecvf.com/content_CVPR_2020/html/Huang_The_Devil_Is_in_the_Details_Delving_Into_Unbiased_Data_CVPR_2020_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/udp.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ - UDP
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.762
+ AP@0.5: 0.907
+ AP@0.75: 0.829
+ AR: 0.810
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192-73ede547_20220914.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.768
+ AP@0.5: 0.909
+ AP@0.75: 0.832
+ AR: 0.815
+ AR@0.5: 0.945
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288-9a3f7c85_20220914.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.768
+ AP@0.5: 0.908
+ AP@0.75: 0.833
+ AR: 0.817
+ AR@0.5: 0.945
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192-3feaef8f_20220913.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.773
+ AP@0.5: 0.911
+ AP@0.75: 0.836
+ AR: 0.821
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288-70d7ab01_20220913.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.759
+ AP@0.5: 0.907
+ AP@0.75: 0.827
+ AR: 0.813
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192-9c0b77b4_20220926.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..28f608d54af3d7098886a077dbdfd5b7f4e50b4f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.md
@@ -0,0 +1,42 @@
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [LiteHRNet-18](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py) | 256x192 | 0.642 | 0.867 | 0.719 | 0.705 | 0.911 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192-6bace359_20211230.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192_20211230.log.json) |
+| [LiteHRNet-18](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py) | 384x288 | 0.676 | 0.876 | 0.746 | 0.735 | 0.919 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288-8d4dac48_20211230.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288_20211230.log.json) |
+| [LiteHRNet-30](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py) | 256x192 | 0.676 | 0.880 | 0.756 | 0.736 | 0.922 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192-4176555b_20210626.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192_20210626.log.json) |
+| [LiteHRNet-30](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py) | 384x288 | 0.700 | 0.883 | 0.776 | 0.758 | 0.926 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288-a3aef5c4_20210626.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288_20210626.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f92360587237823bde9ce0f042c08c8f5915ca3f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/litehrnet_coco.yml
@@ -0,0 +1,72 @@
+Collections:
+- Name: LiteHRNet
+ Paper:
+ Title: 'Lite-HRNet: A Lightweight High-Resolution Network'
+ URL: https://arxiv.org/abs/2104.06403
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/litehrnet.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: &id001
+ - LiteHRNet
+ Training Data: COCO
+ Name: td-hm_litehrnet-18_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.642
+ AP@0.5: 0.867
+ AP@0.75: 0.719
+ AR: 0.705
+ AR@0.5: 0.911
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_256x192-6bace359_20211230.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_litehrnet-18_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.676
+ AP@0.5: 0.876
+ AP@0.75: 0.746
+ AR: 0.735
+ AR@0.5: 0.919
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_coco_384x288-8d4dac48_20211230.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_litehrnet-30_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.676
+ AP@0.5: 0.88
+ AP@0.75: 0.756
+ AR: 0.736
+ AR@0.5: 0.922
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_256x192-4176555b_20210626.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_litehrnet-30_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.7
+ AP@0.5: 0.883
+ AP@0.75: 0.776
+ AR: 0.758
+ AR@0.5: 0.926
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_coco_384x288-a3aef5c4_20210626.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..aed9fd0246bf4d6f0d3379d7317478ab9013eef2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.md
@@ -0,0 +1,41 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_mobilenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py) | 256x192 | 0.648 | 0.874 | 0.725 | 0.709 | 0.918 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192-55a04c35_20221016.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192_20221016.log) |
+| [pose_mobilenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py) | 384x288 | 0.677 | 0.882 | 0.746 | 0.734 | 0.920 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288-d3ab1457_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288_20221013.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c7993fe516dd6a79895ed08f50c624d23c4ee0aa
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mobilenetv2_coco.yml
@@ -0,0 +1,35 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - MobilenetV2
+ Training Data: COCO
+ Name: td-hm_mobilenetv2_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.648
+ AP@0.5: 0.874
+ AP@0.75: 0.725
+ AR: 0.709
+ AR@0.5: 0.918
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192-55a04c35_20221016.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_mobilenetv2_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.677
+ AP@0.5: 0.882
+ AP@0.75: 0.746
+ AR: 0.734
+ AR@0.5: 0.920
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288-d3ab1457_20221013.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..d86bc2c2ada7c560e1e2770a1e864c27a8417d3c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.md
@@ -0,0 +1,42 @@
+
+
+
+MSPN (ArXiv'2019)
+
+```bibtex
+@article{li2019rethinking,
+ title={Rethinking on Multi-Stage Networks for Human Pose Estimation},
+ author={Li, Wenbo and Wang, Zhicheng and Yin, Binyi and Peng, Qixiang and Du, Yuming and Xiao, Tianzi and Yu, Gang and Lu, Hongtao and Wei, Yichen and Sun, Jian},
+ journal={arXiv preprint arXiv:1901.00148},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [mspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.723 | 0.895 | 0.794 | 0.788 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192-8fbfb5d0_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192_20201123.log.json) |
+| [2xmspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.754 | 0.903 | 0.826 | 0.816 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192-c8765a5c_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192_20201123.log.json) |
+| [3xmspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.758 | 0.904 | 0.830 | 0.821 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192-e348f18e_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192_20201123.log.json) |
+| [4xmspn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.765 | 0.906 | 0.835 | 0.826 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192-7b837afb_20201123.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192_20201123.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..77eca18b6f50220650671a6f2b88eabd06a14baf
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/mspn_coco.yml
@@ -0,0 +1,72 @@
+Collections:
+- Name: MSPN
+ Paper:
+ Title: Rethinking on Multi-Stage Networks for Human Pose Estimation
+ URL: https://arxiv.org/abs/1901.00148
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/mspn.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py
+ In Collection: MSPN
+ Metadata:
+ Architecture: &id001
+ - MSPN
+ Training Data: COCO
+ Name: td-hm_mspn50_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.723
+ AP@0.5: 0.895
+ AP@0.75: 0.794
+ AR: 0.788
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mspn/mspn50_coco_256x192-8fbfb5d0_20201123.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py
+ In Collection: MSPN
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_2xmspn50_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.754
+ AP@0.5: 0.903
+ AP@0.75: 0.826
+ AR: 0.816
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mspn/2xmspn50_coco_256x192-c8765a5c_20201123.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py
+ In Collection: MSPN
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_3xmspn50_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.758
+ AP@0.5: 0.904
+ AP@0.75: 0.83
+ AR: 0.821
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mspn/3xmspn50_coco_256x192-e348f18e_20201123.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py
+ In Collection: MSPN
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_4xmspn50_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.765
+ AP@0.5: 0.906
+ AP@0.75: 0.835
+ AR: 0.826
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mspn/4xmspn50_coco_256x192-7b837afb_20201123.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..8a375a4c2022442f0ce1cb19820da0cea5a1e802
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.md
@@ -0,0 +1,57 @@
+
+
+
+PVT (ICCV'2021)
+
+```bibtex
+@inproceedings{wang2021pyramid,
+ title={Pyramid vision transformer: A versatile backbone for dense prediction without convolutions},
+ author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
+ booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+ pages={568--578},
+ year={2021}
+}
+```
+
+
+
+
+PVTV2 (CVMJ'2022)
+
+```bibtex
+@article{wang2022pvt,
+ title={PVT v2: Improved baselines with Pyramid Vision Transformer},
+ author={Wang, Wenhai and Xie, Enze and Li, Xiang and Fan, Deng-Ping and Song, Kaitao and Liang, Ding and Lu, Tong and Luo, Ping and Shao, Ling},
+ journal={Computational Visual Media},
+ pages={1--10},
+ year={2022},
+ publisher={Springer}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_pvt-s](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py) | 256x192 | 0.714 | 0.896 | 0.794 | 0.773 | 0.936 | [ckpt](https://download.openmmlab.com/mmpose/top_down/pvt/pvt_small_coco_256x192-4324a49d_20220501.pth) | [log](https://download.openmmlab.com/mmpose/top_down/pvt/pvt_small_coco_256x192_20220501.log.json) |
+| [pose_pvtv2-b2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py) | 256x192 | 0.737 | 0.905 | 0.812 | 0.791 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/pvt/pvtv2_b2_coco_256x192-b4212737_20220501.pth) | [log](https://download.openmmlab.com/mmpose/top_down/pvt/pvtv2_b2_coco_256x192_20220501.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2b4303d7040486806f98e01edb6d296538c3089f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/pvt_coco.yml
@@ -0,0 +1,35 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - PVT
+ Training Data: COCO
+ Name: td-hm_pvt-s_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.714
+ AP@0.5: 0.896
+ AP@0.75: 0.794
+ AR: 0.773
+ AR@0.5: 0.936
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/pvt/pvt_small_coco_256x192-4324a49d_20220501.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_pvtv2-b2_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.905
+ AP@0.75: 0.812
+ AR: 0.791
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/pvt/pvtv2_b2_coco_256x192-b4212737_20220501.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..cb7ada4d6b72dfea2a029773148ed852ba00b1a8
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.md
@@ -0,0 +1,46 @@
+
+
+
+ResNeSt (ArXiv'2020)
+
+```bibtex
+@article{zhang2020resnest,
+ title={ResNeSt: Split-Attention Networks},
+ author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander},
+ journal={arXiv preprint arXiv:2004.08955},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnest_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py) | 256x192 | 0.720 | 0.899 | 0.800 | 0.775 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192-6e65eece_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192_20210320.log.json) |
+| [pose_resnest_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py) | 384x288 | 0.737 | 0.900 | 0.811 | 0.789 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288-dcd20436_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288_20210320.log.json) |
+| [pose_resnest_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py) | 256x192 | 0.725 | 0.900 | 0.807 | 0.781 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192-2ffcdc9d_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192_20210320.log.json) |
+| [pose_resnest_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py) | 384x288 | 0.745 | 0.905 | 0.818 | 0.798 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288-80660658_20210320.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288_20210320.log.json) |
+| [pose_resnest_200](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py) | 256x192 | 0.731 | 0.905 | 0.812 | 0.787 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192-db007a48_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192_20210517.log.json) |
+| [pose_resnest_200](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py) | 384x288 | 0.753 | 0.907 | 0.827 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288-b5bb76cb_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288_20210517.log.json) |
+| [pose_resnest_269](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py) | 256x192 | 0.737 | 0.907 | 0.819 | 0.792 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192-2a7882ac_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192_20210517.log.json) |
+| [pose_resnest_269](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py) | 384x288 | 0.754 | 0.908 | 0.828 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288-b142b9fb_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288_20210517.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..082c6a0aa278868876a472edbb747813b95281ac
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnest_coco.yml
@@ -0,0 +1,131 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNeSt
+ Training Data: COCO
+ Name: td-hm_resnest50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.720
+ AP@0.5: 0.899
+ AP@0.75: 0.8
+ AR: 0.775
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_256x192-6e65eece_20210320.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnest50_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.9
+ AP@0.75: 0.811
+ AR: 0.789
+ AR@0.5: 0.937
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest50_coco_384x288-dcd20436_20210320.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnest101_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.725
+ AP@0.5: 0.9
+ AP@0.75: 0.807
+ AR: 0.781
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_256x192-2ffcdc9d_20210320.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnest101_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.745
+ AP@0.5: 0.905
+ AP@0.75: 0.818
+ AR: 0.798
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest101_coco_384x288-80660658_20210320.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnest200_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.731
+ AP@0.5: 0.905
+ AP@0.75: 0.812
+ AR: 0.787
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_256x192-db007a48_20210517.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnest200_8xb16-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.907
+ AP@0.75: 0.827
+ AR: 0.805
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest200_coco_384x288-b5bb76cb_20210517.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnest269_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.907
+ AP@0.75: 0.819
+ AR: 0.792
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_256x192-2a7882ac_20210517.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnest269_8xb16-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.754
+ AP@0.5: 0.908
+ AP@0.75: 0.828
+ AR: 0.805
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnest/resnest269_coco_384x288-b142b9fb_20210517.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ce6da38c6728ae80f1733743977ef641511e1b2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.md
@@ -0,0 +1,62 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.718 | 0.898 | 0.796 | 0.774 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192-04af38ce_20220923.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192_20220923.log) |
+| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py) | 384x288 | 0.731 | 0.900 | 0.799 | 0.782 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288-7b8db90e_20220923.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288_20220923.log) |
+| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py) | 256x192 | 0.728 | 0.904 | 0.809 | 0.783 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192_20220926.log) |
+| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py) | 384x288 | 0.749 | 0.906 | 0.817 | 0.799 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192_20220926.log) |
+| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py) | 256x192 | 0.736 | 0.904 | 0.818 | 0.791 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192-0345f330_20220928.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192_20220928.log) |
+| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py) | 384x288 | 0.750 | 0.908 | 0.821 | 0.800 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288-7fbb906f_20220927.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288_20220927.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..296be8898bc73d00923eef74d7275802ae9f7c9e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_coco.yml
@@ -0,0 +1,121 @@
+Collections:
+- Name: SimpleBaseline2D
+ Paper:
+ Title: Simple baselines for human pose estimation and tracking
+ URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/simplebaseline2d.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: COCO
+ Name: td-hm_res50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.718
+ AP@0.5: 0.898
+ AP@0.75: 0.796
+ AR: 0.774
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192-04af38ce_20220923.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res50_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.731
+ AP@0.5: 0.9
+ AP@0.75: 0.799
+ AR: 0.782
+ AR@0.5: 0.937
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288-7b8db90e_20220923.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res101_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.728
+ AP@0.5: 0.904
+ AP@0.75: 0.809
+ AR: 0.783
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res101_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.749
+ AP@0.5: 0.906
+ AP@0.75: 0.817
+ AR: 0.799
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192-065d3625_20220926.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res152_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.736
+ AP@0.5: 0.904
+ AP@0.75: 0.818
+ AR: 0.791
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192-0345f330_20220928.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res152_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.75
+ AP@0.5: 0.908
+ AP@0.75: 0.821
+ AR: 0.8
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288-7fbb906f_20220927.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res50_fp16-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.716
+ AP@0.5: 0.898
+ AP@0.75: 0.798
+ AR: 0.772
+ AR@0.5: 0.937
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192-463da051_20220927.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f1b0107f30e336df3004788a174d4bfd2f7aef0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.md
@@ -0,0 +1,79 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnet_50_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py) | 256x192 | 0.724 | 0.897 | 0.797 | 0.777 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192-c129dcb6_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192_20220926.log) |
+| [pose_resnet_50_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py) | 384x288 | 0.735 | 0.902 | 0.801 | 0.786 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288-8b90b538_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288_20220926.log) |
+| [pose_resnet_101_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py) | 256x192 | 0.733 | 0.900 | 0.810 | 0.786 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192-528ec248_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192_20220926.log) |
+| [pose_resnet_101_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py) | 384x288 | 0.749 | 0.905 | 0.818 | 0.799 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288-487d40a4_20220926.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288_20220926.log) |
+| [pose_resnet_152_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py) | 256x192 | 0.743 | 0.906 | 0.819 | 0.796 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192-f754df5f_20221031.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192_20221031.log) |
+| [pose_resnet_152_dark](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py) | 384x288 | 0.755 | 0.907 | 0.825 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288-329f8454_20221031.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288_20221031.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..02e4a7f43f61b709a6ede2f3a42ab5ac91e56cd8
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_dark_coco.yml
@@ -0,0 +1,100 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ - DarkPose
+ Training Data: COCO
+ Name: td-hm_res50_dark-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.724
+ AP@0.5: 0.897
+ AP@0.75: 0.797
+ AR: 0.777
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192-c129dcb6_20220926.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res50_dark-8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.735
+ AP@0.5: 0.902
+ AP@0.75: 0.801
+ AR: 0.786
+ AR@0.5: 0.938
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288-8b90b538_20220926.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res101_dark-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.733
+ AP@0.5: 0.9
+ AP@0.75: 0.81
+ AR: 0.786
+ AR@0.5: 0.938
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192-528ec248_20220926.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res101_dark-8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.749
+ AP@0.5: 0.905
+ AP@0.75: 0.818
+ AR: 0.799
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288-487d40a4_20220926.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res152_dark-8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.743
+ AP@0.5: 0.906
+ AP@0.75: 0.819
+ AR: 0.796
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192-f754df5f_20221031.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_res152_dark-8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.757
+ AP@0.5: 0.907
+ AP@0.75: 0.825
+ AR: 0.805
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288-329f8454_20221031.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_fp16_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_fp16_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..2731ca8534b509b694f6d5f6958ad6f080c171c1
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnet_fp16_coco.md
@@ -0,0 +1,73 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+FP16 (ArXiv'2017)
+
+```bibtex
+@article{micikevicius2017mixed,
+ title={Mixed precision training},
+ author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
+ journal={arXiv preprint arXiv:1710.03740},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnet_50_fp16](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py) | 256x192 | 0.716 | 0.898 | 0.798 | 0.772 | 0.937 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192-463da051_20220927.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192_20220927.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..106720153251651f5ae1a53ace2a7333c5882898
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.md
@@ -0,0 +1,45 @@
+
+
+
+ResNetV1D (CVPR'2019)
+
+```bibtex
+@inproceedings{he2019bag,
+ title={Bag of tricks for image classification with convolutional neural networks},
+ author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={558--567},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnetv1d_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py) | 256x192 | 0.722 | 0.897 | 0.796 | 0.777 | 0.936 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192-27545d63_20221020.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192_20221020.log) |
+| [pose_resnetv1d_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py) | 384x288 | 0.730 | 0.899 | 0.800 | 0.782 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288-0646b46e_20221020.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288_20221020.log) |
+| [pose_resnetv1d_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py) | 256x192 | 0.732 | 0.901 | 0.808 | 0.785 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192-ee9e7212_20221021.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192_20221021.log) |
+| [pose_resnetv1d_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py) | 384x288 | 0.748 | 0.906 | 0.817 | 0.798 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288-d0b5875f_20221028.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288_20221028.log) |
+| [pose_resnetv1d_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py) | 256x192 | 0.737 | 0.904 | 0.814 | 0.790 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192-fd49f947_20221021.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192_20221021.log) |
+| [pose_resnetv1d_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py) | 384x288 | 0.751 | 0.907 | 0.821 | 0.801 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288-b9a99602_20221022.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288_20221022.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..765c8aaabc2fa08b5ca343a5f0bd8ac6a94c764b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnetv1d_coco.yml
@@ -0,0 +1,99 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNetV1D
+ Training Data: COCO
+ Name: td-hm_resnetv1d50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.722
+ AP@0.5: 0.897
+ AP@0.75: 0.796
+ AR: 0.777
+ AR@0.5: 0.936
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192-27545d63_20221020.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnetv1d50_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.73
+ AP@0.5: 0.899
+ AP@0.75: 0.8
+ AR: 0.782
+ AR@0.5: 0.935
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288-0646b46e_20221020.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnetv1d101_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.732
+ AP@0.5: 0.901
+ AP@0.75: 0.808
+ AR: 0.785
+ AR@0.5: 0.940
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192-ee9e7212_20221021.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnetv1d101_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.906
+ AP@0.75: 0.817
+ AR: 0.798
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288-d0b5875f_20221028.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnetv1d152_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.904
+ AP@0.75: 0.814
+ AR: 0.790
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192-fd49f947_20221021.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnetv1d152_8xb48-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.751
+ AP@0.5: 0.907
+ AP@0.75: 0.821
+ AR: 0.801
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288-b9a99602_20221022.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..8862fddf6ca5c3ffe0c9df407787a1d7a0312c36
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.md
@@ -0,0 +1,45 @@
+
+
+
+ResNext (CVPR'2017)
+
+```bibtex
+@inproceedings{xie2017aggregated,
+ title={Aggregated residual transformations for deep neural networks},
+ author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1492--1500},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_resnext_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py) | 256x192 | 0.715 | 0.897 | 0.791 | 0.771 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192-dcff15f6_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192_20200727.log.json) |
+| [pose_resnext_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py) | 384x288 | 0.724 | 0.899 | 0.794 | 0.777 | 0.936 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288-412c848f_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288_20200727.log.json) |
+| [pose_resnext_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py) | 256x192 | 0.726 | 0.900 | 0.801 | 0.781 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192-c7eba365_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192_20200727.log.json) |
+| [pose_resnext_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py) | 384x288 | 0.744 | 0.903 | 0.815 | 0.794 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288-f5eabcd6_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288_20200727.log.json) |
+| [pose_resnext_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py) | 256x192 | 0.730 | 0.903 | 0.808 | 0.785 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192-102449aa_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192_20200727.log.json) |
+| [pose_resnext_152](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py) | 384x288 | 0.742 | 0.904 | 0.810 | 0.794 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288-806176df_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288_20200727.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1ebb616ecdcbfc4f94c2a0ded053e9ef18e66c45
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/resnext_coco.yml
@@ -0,0 +1,99 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNext
+ Training Data: COCO
+ Name: td-hm_resnext50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.715
+ AP@0.5: 0.897
+ AP@0.75: 0.791
+ AR: 0.771
+ AR@0.5: 0.935
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_256x192-dcff15f6_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnext50_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.724
+ AP@0.5: 0.899
+ AP@0.75: 0.794
+ AR: 0.777
+ AR@0.5: 0.936
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext50_coco_384x288-412c848f_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnext101_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.726
+ AP@0.5: 0.9
+ AP@0.75: 0.801
+ AR: 0.781
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_256x192-c7eba365_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnext101_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.744
+ AP@0.5: 0.903
+ AP@0.75: 0.815
+ AR: 0.794
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext101_coco_384x288-f5eabcd6_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnext152_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.73
+ AP@0.5: 0.903
+ AP@0.75: 0.808
+ AR: 0.785
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_256x192-102449aa_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_resnext152_8xb48-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.742
+ AP@0.5: 0.904
+ AP@0.75: 0.81
+ AR: 0.794
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_coco_384x288-806176df_20200727.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..40f570c3c142266e13024c30ebd35ca2fcc1d00f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.md
@@ -0,0 +1,44 @@
+
+
+
+RSN (ECCV'2020)
+
+```bibtex
+@misc{cai2020learning,
+ title={Learning Delicate Local Representations for Multi-Person Pose Estimation},
+ author={Yuanhao Cai and Zhicheng Wang and Zhengxiong Luo and Binyi Yin and Angang Du and Haoqian Wang and Xinyu Zhou and Erjin Zhou and Xiangyu Zhang and Jian Sun},
+ year={2020},
+ eprint={2003.04030},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [rsn_18](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py) | 256x192 | 0.704 | 0.887 | 0.781 | 0.773 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192-9049ed09_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192_20221013.log) |
+| [rsn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.724 | 0.894 | 0.799 | 0.790 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192-c35901d5_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192_20221013.log) |
+| [2xrsn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.748 | 0.900 | 0.821 | 0.810 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192-9ede341e_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192_20221013.log) |
+| [3xrsn_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py) | 256x192 | 0.750 | 0.900 | 0.824 | 0.814 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192-c3e3c4fe_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192_20221013.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2974aaf2c0d84117178ad5c017ba0acbea6b024f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/rsn_coco.yml
@@ -0,0 +1,72 @@
+Collections:
+- Name: RSN
+ Paper:
+ Title: Learning Delicate Local Representations for Multi-Person Pose Estimation
+ URL: https://link.springer.com/chapter/10.1007/978-3-030-58580-8_27
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/rsn.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py
+ In Collection: RSN
+ Metadata:
+ Architecture: &id001
+ - RSN
+ Training Data: COCO
+ Name: td-hm_rsn18_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.704
+ AP@0.5: 0.887
+ AP@0.75: 0.781
+ AR: 0.773
+ AR@0.5: 0.927
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192-9049ed09_20221013.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py
+ In Collection: RSN
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_rsn50_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.724
+ AP@0.5: 0.894
+ AP@0.75: 0.799
+ AR: 0.79
+ AR@0.5: 0.935
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192-c35901d5_20221013.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py
+ In Collection: RSN
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_2xrsn50_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.9
+ AP@0.75: 0.821
+ AR: 0.81
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192-9ede341e_20221013.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py
+ In Collection: RSN
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_3xrsn50_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.75
+ AP@0.5: 0.9
+ AP@0.75: 0.824
+ AR: 0.814
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192-c3e3c4fe_20221013.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..5fb5833e236c1136456a0f8cf4ada0ad47b3caa9
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.md
@@ -0,0 +1,43 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_scnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py) | 256x192 | 0.728 | 0.899 | 0.807 | 0.784 | 0.938 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192-6920f829_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192_20200709.log.json) |
+| [pose_scnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py) | 384x288 | 0.751 | 0.906 | 0.818 | 0.802 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288-9cacd0ea_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288_20200709.log.json) |
+| [pose_scnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py) | 256x192 | 0.733 | 0.902 | 0.811 | 0.789 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192-6d348ef9_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192_20200709.log.json) |
+| [pose_scnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py) | 384x288 | 0.752 | 0.906 | 0.823 | 0.804 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288-0b6e631b_20200709.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288_20200709.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cf68c67f90621472b71916e876d79794df3d583c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/scnet_coco.yml
@@ -0,0 +1,66 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SCNet
+ Training Data: COCO
+ Name: td-hm_scnet50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.728
+ AP@0.5: 0.899
+ AP@0.75: 0.807
+ AR: 0.784
+ AR@0.5: 0.938
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_256x192-6920f829_20200709.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_scnet50_coco_384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.751
+ AP@0.5: 0.906
+ AP@0.75: 0.818
+ AR: 0.802
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_coco_384x288-9cacd0ea_20200709.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_scnet101_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.733
+ AP@0.5: 0.902
+ AP@0.75: 0.811
+ AR: 0.789
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_256x192-6d348ef9_20200709.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_scnet101_8xb48-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.752
+ AP@0.5: 0.906
+ AP@0.75: 0.823
+ AR: 0.804
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_coco_384x288-0b6e631b_20200709.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..b704d9d1902f6dbdb6dd80517d4c44f35ae86097
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.md
@@ -0,0 +1,47 @@
+
+
+
+SEResNet (CVPR'2018)
+
+```bibtex
+@inproceedings{hu2018squeeze,
+ title={Squeeze-and-excitation networks},
+ author={Hu, Jie and Shen, Li and Sun, Gang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={7132--7141},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_seresnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py) | 256x192 | 0.729 | 0.903 | 0.807 | 0.784 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_256x192-25058b66_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_256x192_20200727.log.json) |
+| [pose_seresnet_50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py) | 384x288 | 0.748 | 0.904 | 0.819 | 0.799 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_384x288-bc0b7680_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_384x288_20200727.log.json) |
+| [pose_seresnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py) | 256x192 | 0.734 | 0.905 | 0.814 | 0.790 | 0.941 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_256x192-83f29c4d_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_256x192_20200727.log.json) |
+| [pose_seresnet_101](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py) | 384x288 | 0.754 | 0.907 | 0.823 | 0.805 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_384x288-48de1709_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_384x288_20200727.log.json) |
+| [pose_seresnet_152\*](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py) | 256x192 | 0.730 | 0.899 | 0.810 | 0.787 | 0.939 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_256x192-1c628d79_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_256x192_20200727.log.json) |
+| [pose_seresnet_152\*](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py) | 384x288 | 0.753 | 0.906 | 0.824 | 0.806 | 0.945 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_384x288-58b23ee8_20200727.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_384x288_20200727.log.json) |
+
+Note that * means without imagenet pre-training.
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..945e84e223fc6ee0fa8820e331dea7df91bd8650
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/seresnet_coco.yml
@@ -0,0 +1,98 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SEResNet
+ Training Data: COCO
+ Name: td-hm_seresnet50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.729
+ AP@0.5: 0.903
+ AP@0.75: 0.807
+ AR: 0.784
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_256x192-25058b66_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_seresnet50_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.748
+ AP@0.5: 0.904
+ AP@0.75: 0.819
+ AR: 0.799
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_coco_384x288-bc0b7680_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_seresnet101_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.734
+ AP@0.5: 0.905
+ AP@0.75: 0.814
+ AR: 0.79
+ AR@0.5: 0.941
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_256x192-83f29c4d_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_seresnet101_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.754
+ AP@0.5: 0.907
+ AP@0.75: 0.823
+ AR: 0.805
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_coco_384x288-48de1709_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_seresnet152_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.73
+ AP@0.5: 0.899
+ AP@0.75: 0.81
+ AR: 0.787
+ AR@0.5: 0.939
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_256x192-1c628d79_20200727.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_seresnet152_8xb48-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.753
+ AP@0.5: 0.906
+ AP@0.75: 0.824
+ AR: 0.806
+ AR@0.5: 0.945
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_coco_384x288-58b23ee8_20200727.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..0c8be860ab7d2a58b3ba813347d754b9f5a98268
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.md
@@ -0,0 +1,41 @@
+
+
+
+ShufflenetV1 (CVPR'2018)
+
+```bibtex
+@inproceedings{zhang2018shufflenet,
+ title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
+ author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={6848--6856},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_shufflenetv1](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py) | 256x192 | 0.587 | 0.849 | 0.654 | 0.654 | 0.896 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192-7a7ea4f4_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192_20221013.log) |
+| [pose_shufflenetv1](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py) | 384x288 | 0.626 | 0.862 | 0.696 | 0.687 | 0.903 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288-8342f8ba_20221013.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288_20221013.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fbdc89936d59ecdbdfbc410f4b92f00070423145
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv1_coco.yml
@@ -0,0 +1,35 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ShufflenetV1
+ Training Data: COCO
+ Name: td-hm_shufflenetv1_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.587
+ AP@0.5: 0.849
+ AP@0.75: 0.654
+ AR: 0.654
+ AR@0.5: 0.896
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192-7a7ea4f4_20221013.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_shufflenetv1_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.626
+ AP@0.5: 0.862
+ AP@0.75: 0.696
+ AR: 0.687
+ AR@0.5: 0.903
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288-8342f8ba_20221013.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..f613f4fef145e8444b207a608b661b11aba31983
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.md
@@ -0,0 +1,41 @@
+
+
+
+ShufflenetV2 (ECCV'2018)
+
+```bibtex
+@inproceedings{ma2018shufflenet,
+ title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
+ author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={116--131},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_shufflenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py) | 256x192 | 0.602 | 0.857 | 0.672 | 0.668 | 0.902 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192-51fb931e_20221014.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192_20221014.log) |
+| [pose_shufflenetv2](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py) | 384x288 | 0.638 | 0.866 | 0.707 | 0.699 | 0.910 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288-d30ab55c_20221014.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288_20221014.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cdda3a8667ee0e22146a257dfd25c514a50dc6f2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/shufflenetv2_coco.yml
@@ -0,0 +1,35 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ShufflenetV2
+ Training Data: COCO
+ Name: td-hm_shufflenetv2_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.602
+ AP@0.5: 0.857
+ AP@0.75: 0.672
+ AR: 0.668
+ AR@0.5: 0.902
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192-51fb931e_20221014.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_shufflenetv2_8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.638
+ AP@0.5: 0.866
+ AP@0.75: 0.707
+ AR: 0.699
+ AR@0.5: 0.91
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288-d30ab55c_20221014.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..5bcc5bd187526b01e711fe3049e3007146409cd5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.md
@@ -0,0 +1,78 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+Swin (ICCV'2021)
+
+```bibtex
+@inproceedings{liu2021swin,
+ title={Swin transformer: Hierarchical vision transformer using shifted windows},
+ author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining},
+ booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+ pages={10012--10022},
+ year={2021}
+}
+```
+
+
+
+
+
+
+FPN (CVPR'2017)
+
+```bibtex
+@inproceedings{lin2017feature,
+ title={Feature pyramid networks for object detection},
+ author={Lin, Tsung-Yi and Doll{\'a}r, Piotr and Girshick, Ross and He, Kaiming and Hariharan, Bharath and Belongie, Serge},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2117--2125},
+ year={2017}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [pose_swin_t](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py) | 256x192 | 0.724 | 0.901 | 0.806 | 0.782 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_t_p4_w7_coco_256x192-eaefe010_20220503.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_t_p4_w7_coco_256x192_20220503.log.json) |
+| [pose_swin_b](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py) | 256x192 | 0.737 | 0.904 | 0.820 | 0.794 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_256x192-7432be9e_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_256x192_20220705.log.json) |
+| [pose_swin_b](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py) | 384x288 | 0.759 | 0.910 | 0.832 | 0.811 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_384x288-3abf54f9_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_384x288_20220705.log.json) |
+| [pose_swin_l](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py) | 256x192 | 0.743 | 0.906 | 0.821 | 0.798 | 0.943 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_256x192-642a89db_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_256x192_20220705.log.json) |
+| [pose_swin_l](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py) | 384x288 | 0.763 | 0.912 | 0.830 | 0.814 | 0.949 | [ckpt](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_384x288-c36b7845_20220705.pth) | [log](https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_384x288_20220705.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..09ede5fa5c4ec01b77d997b4b318527fa0e27daf
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/swin_coco.yml
@@ -0,0 +1,99 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - Swin
+ Training Data: COCO
+ Name: td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.724
+ AP@0.5: 0.901
+ AP@0.75: 0.806
+ AR: 0.782
+ AR@0.5: 0.94
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_t_p4_w7_coco_256x192-eaefe010_20220503.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.737
+ AP@0.5: 0.904
+ AP@0.75: 0.82
+ AR: 0.794
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_256x192-7432be9e_20220705.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.759
+ AP@0.5: 0.91
+ AP@0.75: 0.832
+ AR: 0.811
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_coco_384x288-3abf54f9_20220705.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.743
+ AP@0.5: 0.906
+ AP@0.75: 0.821
+ AR: 0.798
+ AR@0.5: 0.943
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_256x192-642a89db_20220705.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.763
+ AP@0.5: 0.912
+ AP@0.75: 0.83
+ AR: 0.814
+ AR@0.5: 0.949
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_l_p4_w7_coco_384x288-c36b7845_20220705.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/swin_b_p4_w7_fpn_coco_256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: topdown_heatmap_swin_b_p4_w7_fpn_coco_256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.741
+ AP@0.5: 0.907
+ AP@0.75: 0.821
+ AR: 0.798
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/swin/swin_b_p4_w7_fpn_coco_256x192-a3b91c45_20220705.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..7af125c24d81c4bfa81cdafa3cb95f9729511b66
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xmspn50_8xb32-210e_coco-256x192.py
@@ -0,0 +1,152 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [15, 11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=2,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN'),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='torchvision://resnet50',
+ )),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=2,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3] + [1, 2, 3, 4],
+ loss=([
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 2,
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0680f6995eee3dc9a345eab0353f5dc65c023f0f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_2xrsn50_8xb32-210e_coco-256x192.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [15, 11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=2,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ num_steps=4,
+ norm_cfg=dict(type='BN'),
+ ),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=2,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3] + [1, 2, 3, 4],
+ loss=([
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 2,
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..41162f01e5ac5c63977c11ea70b49372ef2b8476
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xmspn50_8xb32-210e_coco-256x192.py
@@ -0,0 +1,152 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [15, 11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=3,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN'),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='torchvision://resnet50',
+ )),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=3,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3] * 2 + [1, 2, 3, 4],
+ loss=([
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 3,
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..99326451c6d05162bc3df0c8d71e8305baf574fd
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_3xrsn50_8xb32-210e_coco-256x192.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [15, 11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=3,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ num_steps=4,
+ norm_cfg=dict(type='BN'),
+ ),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=3,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3] * 2 + [1, 2, 3, 4],
+ loss=([
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 3,
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..999245e74dfc87985e34e3122979fe02486c5b4f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_4xmspn50_8xb32-210e_coco-256x192.py
@@ -0,0 +1,152 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [15, 11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=4,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN'),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='torchvision://resnet50',
+ )),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=4,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3] * 3 + [1, 2, 3, 4],
+ loss=([
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ]) * 4,
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..13eb5f373a22892f453e0237004f79eecd6d866a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py
@@ -0,0 +1,153 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.75,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch='base',
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_base.pth'),
+ ),
+ neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=17,
+ deconv_out_channels=[],
+ deconv_kernel_sizes=[],
+ final_layer=dict(kernel_size=3, padding=1),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec,
+ ),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8725fa2ca0c7aa4af44d6fcf2dbe50ed5b5c3d7c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.75,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch='base',
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.3,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_base.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=17,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d94f97c1bcd3337bb7397db6f92884df9fc0438
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py
@@ -0,0 +1,153 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=32,
+ layer_decay_rate=0.85,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmpretrain.VisionTransformer',
+ arch='huge',
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ with_cls_token=False,
+ out_type='featmap',
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_huge.pth'),
+ ),
+ neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=17,
+ deconv_out_channels=[],
+ deconv_kernel_sizes=[],
+ final_layer=dict(kernel_size=3, padding=1),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec,
+ ),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1953188a19827607647edd68266e466ce545aa0e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=32,
+ layer_decay_rate=0.85,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch='huge',
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.55,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_huge.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=17,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8086b09410716a881a2cf9dd0773414937e05ea6
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py
@@ -0,0 +1,153 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=24,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch='large',
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.5,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_large.pth'),
+ ),
+ neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=17,
+ deconv_out_channels=[],
+ deconv_kernel_sizes=[],
+ final_layer=dict(kernel_size=3, padding=1),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec,
+ ),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..43d5df71545129679aa6f06f6a3fe35caa0fb56d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=24,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch='large',
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.5,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_large.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=17,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b57b0d3735ff02fdc3ccae9e750f146dbecda3f0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch={
+ 'embed_dims': 384,
+ 'num_layers': 12,
+ 'num_heads': 12,
+ 'feedforward_channels': 384 * 4
+ },
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.1,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_small.pth'),
+ ),
+ neck=dict(type='FeatureMapProcessor', scale_factor=4.0, apply_relu=True),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=384,
+ out_channels=17,
+ deconv_out_channels=[],
+ deconv_kernel_sizes=[],
+ final_layer=dict(kernel_size=3, padding=1),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec,
+ ),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d08a31a022fed56a34e8e47dd2bd518c410b884
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py
@@ -0,0 +1,155 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+custom_imports = dict(
+ imports=['mmpose.engine.optim_wrappers.layer_decay_optim_wrapper'],
+ allow_failed_imports=False)
+
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1),
+ paramwise_cfg=dict(
+ num_layers=12,
+ layer_decay_rate=0.8,
+ custom_keys={
+ 'bias': dict(decay_multi=0.0),
+ 'pos_embed': dict(decay_mult=0.0),
+ 'relative_position_bias_table': dict(decay_mult=0.0),
+ 'norm': dict(decay_mult=0.0),
+ },
+ ),
+ constructor='LayerDecayOptimWrapperConstructor',
+ clip_grad=dict(max_norm=1., norm_type=2),
+)
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=1))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='mmcls.VisionTransformer',
+ arch={
+ 'embed_dims': 384,
+ 'num_layers': 12,
+ 'num_heads': 12,
+ 'feedforward_channels': 384 * 4
+ },
+ img_size=(256, 192),
+ patch_size=16,
+ qkv_bias=True,
+ drop_path_rate=0.1,
+ with_cls_token=False,
+ output_cls_token=False,
+ patch_cfg=dict(padding=2),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'v1/pretrained_models/mae_pretrain_vit_small.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=384,
+ out_channels=17,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+data_root = 'data/coco/'
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..4051f4c5ec52fe170d5a6a050e867fe5ebb255a3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_alexnet_8xb64-210e_coco-256x192.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(40, 56), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='AlexNet', num_classes=-1),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=256,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..38b23cf7182c45a507b87c4a372fd2e174e32eb1
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb32-210e_coco-384x288.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(36, 48), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=17,
+ feat_channels=128,
+ num_stages=6),
+ head=dict(
+ type='CPMHead',
+ in_channels=17,
+ out_channels=17,
+ num_stages=6,
+ deconv_out_channels=None,
+ final_layer=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..17f7eb9677fbf0d285628e059835a45f443caeef
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_cpm_8xb64-210e_coco-256x192.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(24, 32), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=17,
+ feat_channels=128,
+ num_stages=6),
+ head=dict(
+ type='CPMHead',
+ in_channels=17,
+ out_channels=17,
+ num_stages=6,
+ deconv_out_channels=None,
+ final_layer=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..b9d49c8e6a7df8160db26ff6a0cbabe20b6f4a4a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ head=dict(
+ type='CPMHead',
+ in_channels=256,
+ out_channels=17,
+ num_stages=1,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9932ff9e3773a591650ee94a95da2784bf562eb
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hourglass52_8xb32-210e_coco-384x384.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(384, 384), heatmap_size=(96, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ head=dict(
+ type='CPMHead',
+ in_channels=256,
+ out_channels=17,
+ num_stages=1,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b81dbdaac0c4df6eed9f287379b25e81ab6ce7d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-256x192.py
@@ -0,0 +1,174 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ ),
+ paramwise_cfg=dict(
+ custom_keys={'relative_position_bias_table': dict(decay_mult=0.)}))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRFormer',
+ in_channels=3,
+ norm_cfg=norm_cfg,
+ extra=dict(
+ drop_path_rate=0.2,
+ with_rpe=True,
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(2, ),
+ num_channels=(64, ),
+ num_heads=[2],
+ mlp_ratios=[4]),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2),
+ num_channels=(78, 156),
+ num_heads=[2, 4],
+ mlp_ratios=[4, 4],
+ window_sizes=[7, 7]),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2),
+ num_channels=(78, 156, 312),
+ num_heads=[2, 4, 8],
+ mlp_ratios=[4, 4, 4],
+ window_sizes=[7, 7, 7]),
+ stage4=dict(
+ num_modules=2,
+ num_branches=4,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2, 2),
+ num_channels=(78, 156, 312, 624),
+ num_heads=[2, 4, 8, 16],
+ mlp_ratios=[4, 4, 4, 4],
+ window_sizes=[7, 7, 7, 7])),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrformer_base-32815020_20220226.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=78,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..351685464c9560dd748da728372dbcf46a8dfc70
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-base_8xb32-210e_coco-384x288.py
@@ -0,0 +1,174 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ ),
+ paramwise_cfg=dict(
+ custom_keys={'relative_position_bias_table': dict(decay_mult=0.)}))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRFormer',
+ in_channels=3,
+ norm_cfg=norm_cfg,
+ extra=dict(
+ drop_path_rate=0.2,
+ with_rpe=True,
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(2, ),
+ num_channels=(64, ),
+ num_heads=[2],
+ mlp_ratios=[4]),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2),
+ num_channels=(78, 156),
+ num_heads=[2, 4],
+ mlp_ratios=[4, 4],
+ window_sizes=[7, 7]),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2),
+ num_channels=(78, 156, 312),
+ num_heads=[2, 4, 8],
+ mlp_ratios=[4, 4, 4],
+ window_sizes=[7, 7, 7]),
+ stage4=dict(
+ num_modules=2,
+ num_branches=4,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2, 2),
+ num_channels=(78, 156, 312, 624),
+ num_heads=[2, 4, 8, 16],
+ mlp_ratios=[4, 4, 4, 4],
+ window_sizes=[7, 7, 7, 7])),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrformer_base-32815020_20220226.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=78,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c59395c8ad5365285c3a26d9fbeb3855b050433
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-256x192.py
@@ -0,0 +1,174 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ ),
+ paramwise_cfg=dict(
+ custom_keys={'relative_position_bias_table': dict(decay_mult=0.)}))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRFormer',
+ in_channels=3,
+ norm_cfg=norm_cfg,
+ extra=dict(
+ drop_path_rate=0.1,
+ with_rpe=True,
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(2, ),
+ num_channels=(64, ),
+ num_heads=[2],
+ num_mlp_ratios=[4]),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2),
+ num_channels=(32, 64),
+ num_heads=[1, 2],
+ mlp_ratios=[4, 4],
+ window_sizes=[7, 7]),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2),
+ num_channels=(32, 64, 128),
+ num_heads=[1, 2, 4],
+ mlp_ratios=[4, 4, 4],
+ window_sizes=[7, 7, 7]),
+ stage4=dict(
+ num_modules=2,
+ num_branches=4,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2, 2),
+ num_channels=(32, 64, 128, 256),
+ num_heads=[1, 2, 4, 8],
+ mlp_ratios=[4, 4, 4, 4],
+ window_sizes=[7, 7, 7, 7])),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrformer_small-09516375_20220226.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..eee3521a7c617e30efa16224520bda00fe2e64e7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrformer-small_8xb32-210e_coco-384x288.py
@@ -0,0 +1,174 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ ),
+ paramwise_cfg=dict(
+ custom_keys={'relative_position_bias_table': dict(decay_mult=0.)}))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRFormer',
+ in_channels=3,
+ norm_cfg=norm_cfg,
+ extra=dict(
+ drop_path_rate=0.1,
+ with_rpe=True,
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(2, ),
+ num_channels=(64, ),
+ num_heads=[2],
+ num_mlp_ratios=[4]),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2),
+ num_channels=(32, 64),
+ num_heads=[1, 2],
+ mlp_ratios=[4, 4],
+ window_sizes=[7, 7]),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2),
+ num_channels=(32, 64, 128),
+ num_heads=[1, 2, 4],
+ mlp_ratios=[4, 4, 4],
+ window_sizes=[7, 7, 7]),
+ stage4=dict(
+ num_modules=2,
+ num_branches=4,
+ block='HRFORMERBLOCK',
+ num_blocks=(2, 2, 2, 2),
+ num_channels=(32, 64, 128, 256),
+ num_heads=[1, 2, 4, 8],
+ mlp_ratios=[4, 4, 4, 4],
+ window_sizes=[7, 7, 7, 7])),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrformer_small-09516375_20220226.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..ea486d830a5d397f0e65958c832933a3de6fee6d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae15d35ee11973169434b0b6d6b03ec46c9530a4
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5d2ed0bfd422568e71aca13c7be56217dd5d381
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-combine.py
@@ -0,0 +1,221 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco/AP', rule='greater', max_keep_ckpts=3))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# keypoint mappings
+keypoint_mapping_coco = [
+ (0, 0),
+ (1, 1),
+ (2, 2),
+ (3, 3),
+ (4, 4),
+ (5, 5),
+ (6, 6),
+ (7, 7),
+ (8, 8),
+ (9, 9),
+ (10, 10),
+ (11, 11),
+ (12, 12),
+ (13, 13),
+ (14, 14),
+ (15, 15),
+ (16, 16),
+]
+
+keypoint_mapping_aic = [
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ (12, 17),
+ (13, 18),
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=19,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ output_keypoint_indices=[
+ target for _, target in keypoint_mapping_coco
+ ]))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_coco)
+ ],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root='data/aic/',
+ data_mode=data_mode,
+ ann_file='annotations/aic_train.json',
+ data_prefix=dict(img='ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=19,
+ mapping=keypoint_mapping_aic)
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco_aic.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py
new file mode 100644
index 0000000000000000000000000000000000000000..847a40da2f08516a24e8bb765aac454a5cf0dc5f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-aic-256x192-merge.py
@@ -0,0 +1,187 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=[],
+)
+
+dataset_aic = dict(
+ type='AicDataset',
+ data_root='data/aic/',
+ data_mode=data_mode,
+ ann_file='annotations/aic_train.json',
+ data_prefix=dict(img='ai_challenger_keypoint_train_20170902/'
+ 'keypoint_train_images_20170902/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=17,
+ mapping=[
+ (0, 6),
+ (1, 8),
+ (2, 10),
+ (3, 5),
+ (4, 7),
+ (5, 9),
+ (6, 12),
+ (7, 14),
+ (8, 16),
+ (9, 11),
+ (10, 13),
+ (11, 15),
+ ])
+ ],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(from_file='configs/_base_/datasets/coco.py'),
+ datasets=[dataset_coco, dataset_aic],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3ac0bd58901ec998641eec822561abb97779fc0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_coarsedropout-8xb64-210e_coco-256x192.py
@@ -0,0 +1,165 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/'
+ 'body_2d_keypoint/topdown_heatmap/coco/'
+ 'td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(
+ type='CoarseDropout',
+ max_holes=8,
+ max_height=40,
+ max_width=40,
+ min_holes=1,
+ min_height=10,
+ min_width=10,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..7273a0503bd7e67505820de75a4be106922f43f0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-256x192.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..67b13b8babfe0ac672902f42212b66c5254433a2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_dark-8xb64-210e_coco-384x288.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(288, 384),
+ heatmap_size=(72, 96),
+ sigma=3,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..306d0aeb44b8014c3fa31743ff92b55b3b417927
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_fp16-8xb64-210e_coco-256x192.py
@@ -0,0 +1,7 @@
+_base_ = ['./td-hm_hrnet-w32_8xb64-210e_coco-256x192.py']
+
+# fp16 settings
+optim_wrapper = dict(
+ type='AmpOptimWrapper',
+ loss_scale='dynamic',
+)
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d380ad243db94d0ef80a55cee830fe28954c3b0e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_gridmask-8xb64-210e_coco-256x192.py
@@ -0,0 +1,162 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/'
+ 'body_2d_keypoint/topdown_heatmap/coco/'
+ 'td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(
+ type='GridDropout',
+ unit_size_min=10,
+ unit_size_max=40,
+ random_offset=True,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0bc7486ca27f2e58a41077527de9add9d9600b3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_photometric-8xb64-210e_coco-256x192.py
@@ -0,0 +1,153 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/'
+ 'body_2d_keypoint/topdown_heatmap/coco/'
+ 'td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PhotometricDistortion'),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..143a686ef7536a6cfccdbdf431de9188062caa3e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..113a91e18ce1fd3a934199f872ee6989c1e7cf95
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-8xb64-210e_coco-384x288.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d147de838a2fce6b0293ede36ecac81b51942036
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_udp-regress-8xb64-210e_coco-256x192.py
@@ -0,0 +1,155 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ heatmap_type='combined')
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=3 * 17,
+ deconv_out_channels=None,
+ loss=dict(type='CombinedTargetMSELoss', use_target_weight=True),
+ decoder=codec),
+ train_cfg=dict(compute_acc=False),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='udp_combined',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c5ff70ab47a0cf027c04983e6c1f3640ba56802
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..f83b7d31a43bd0d84d55fbc2825438efa607fff0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..daf3cbaddc15d9ded726a3ce7183f2364ddb74c6
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-256x192.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..eec52999c960c693c92b472cbff1d89d752dd2f1
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_dark-8xb32-210e_coco-384x288.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(288, 384),
+ heatmap_size=(72, 96),
+ sigma=3,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b705cb7fb3b59f158be04b4496e2a49922213f4f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..cfa17ef098e5b471aba21b9d1a53dc154d8125cb
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_udp-8xb32-210e_coco-384x288.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size'], use_udp=True),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..caa7c267a09ea1080980dfeba1f26c22b9655169
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb32-210e_coco-384x288.py
@@ -0,0 +1,140 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=40,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f5a564d115bf7c94b6706ce337acbbccd94fb34
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-18_8xb64-210e_coco-256x192.py
@@ -0,0 +1,140 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=40,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..663593552563dbe296ac3c780fda650dd8298c41
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb32-210e_coco-384x288.py
@@ -0,0 +1,140 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(3, 8, 3),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=40,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b5d347cd9537af2a690ee3c6d02323a8c53bbd8
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_litehrnet-30_8xb64-210e_coco-256x192.py
@@ -0,0 +1,140 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(3, 8, 3),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=40,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff8eaccb7e093a16416ea52983d6cb7feb6d7814
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-256x192.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='mmcls://mobilenet_v2',
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..d01e4c6c3dc9924079d35bde2445fb93b3541cba
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mobilenetv2_8xb64-210e_coco-384x288.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='mmcls://mobilenet_v2',
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0e2e9893c6429c99b847747170690654411e68b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_mspn50_8xb32-210e_coco-256x192.py
@@ -0,0 +1,152 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MSPN',
+ unit_channels=256,
+ num_stages=1,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ norm_cfg=dict(type='BN'),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='torchvision://resnet50',
+ )),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=1,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3],
+ loss=[
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ],
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b474b3f2fe7a5db3571846f7ab54c5c05c33136
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvt-s_8xb64-210e_coco-256x192.py
@@ -0,0 +1,127 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='PyramidVisionTransformer',
+ num_layers=[3, 4, 6, 3],
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://github.com/whai362/PVT/'
+ 'releases/download/v2/pvt_small.pth'),
+ ),
+ neck=dict(type='FeatureMapProcessor', select_index=3),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=512,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8921e68030e89110afe8c44717b051b02616a13
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_pvtv2-b2_8xb64-210e_coco-256x192.py
@@ -0,0 +1,128 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='PyramidVisionTransformerV2',
+ embed_dims=64,
+ num_layers=[3, 4, 6, 3],
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://github.com/whai362/PVT/'
+ 'releases/download/v2/pvt_v2_b2.pth'),
+ ),
+ neck=dict(type='FeatureMapProcessor', select_index=3),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=512,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd13e4a4222f21baa200c4c8ccb17986aacfc935
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb32-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..5486548481df742e6bc53bd32d65501971e356f5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..39b7b3220d64d2ac905288c6bf2c0dd1ca2be7f1
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-256x192.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7c99503d46a7e0dc4402250e073b6ce9128d121
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res101_dark-8xb64-210e_coco-384x288.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(288, 384),
+ heatmap_size=(72, 96),
+ sigma=3,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..beccab1bd105b618b601d5d331cc0fc680df1bf7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..25d5039f05e3d9b2387be6bc0690e5d3904faded
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_8xb32-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..acd91192447b4ef5f41745db0c4b93357b53b778
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-256x192.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..49bd2b224bea33419d392931391ba90806ee24a7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res152_dark-8xb32-210e_coco-384x288.py
@@ -0,0 +1,126 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(288, 384),
+ heatmap_size=(72, 96),
+ sigma=3,
+ unbiased=True,
+ blur_kernel_size=17)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..7dbe1b43f77f35fb6564b9d6322a1b8c08d93a60
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..d74cc1392d27911a1e3d2b3239840717da5a4fb5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fdec305b10c5aaa202957650a81975158d0d1b9c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-256x192.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..b34ad210f37ce883b21377192fbe035a7c1fcd56
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_dark-8xb64-210e_coco-384x288.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(288, 384),
+ heatmap_size=(72, 96),
+ sigma=3,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..66a6a27822fb72e7aef421bf1bf2230598c26125
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_res50_fp16-8xb64-210e_coco-256x192.py
@@ -0,0 +1,7 @@
+_base_ = ['./td-hm_res50_8xb64-210e_coco-256x192.py']
+
+# fp16 settings
+optim_wrapper = dict(
+ type='AmpOptimWrapper',
+ loss_scale='dynamic',
+)
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bfbace9f6313fe89201ba5c243e51b4aa90ca27
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb32-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..030ae95d634e40f172dae07eb2bef163084906a3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest101_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdcdb6c75fb74e65ca53797eb33039f6d36357ce
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb16-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=128)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=200,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest200'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=16,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=16,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a5e1e8e4a570e09b9fd3a5f096584275bfb8858
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest200_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=200,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest200'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..b519e9d2ef951298da6f3d4794d5c8660e83159d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb16-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=128)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=269,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest269'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=16,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=16,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3588d1fa31e29ec960a35050ff8659e712712ec
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest269_8xb32-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=269,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest269'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..43295bb41f1b4b2c87119baec30b7efc9ecb80d9
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..e45320b036372894e9ddd0bcee6c457e86a8ecee
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnest50_8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeSt',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnest50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fc55228face0a1586627e3ffa823ffe645c812a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb32-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet101_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c8cc4e808c2fff488bc4b5c977a34d7978a6d03
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d101_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet101_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a85a7f80c43b090426182ab9c3acaa5659b0f4d5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb32-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet152_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a728ce806415f8da3afd036835171b64976a41a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d152_8xb48-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=384)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet152_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=48,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..c241cdd3ddbee8398c3da8d96d7d3d46bce99f24
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet50_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d1cea135b49cf1d70e10194685c394dc2c8bc1a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnetv1d50_8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet50_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..508233371b4a819fe0b14a01798cfe48e6b32303
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb32-210e_coco-384x288.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeXt',
+ depth=101,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='mmcls://resnext101_32x4d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..eafed7f07526dce3b46bcd800272764a1614a051
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext101_8xb64-210e_coco-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeXt',
+ depth=101,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='mmcls://resnext101_32x4d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..27c2c263b05193b10f0c0af2235b81d86cca1bc4
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb32-210e_coco-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeXt',
+ depth=152,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='mmcls://resnext152_32x4d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..c02caeb7461f1fb312d02cfe7496c57a8b9b11e2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext152_8xb48-210e_coco-384x288.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=384)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeXt',
+ depth=152,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='mmcls://resnext152_32x4d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=48,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b088a44ca6a043abac5e52596486362124d244c5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeXt',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnext50_32x4d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..9f97235218992e772298fcb74c1494331eeb50a7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_resnext50_8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeXt',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnext50_32x4d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..18d16bd26784ad9f706e39bd83c25fc913ef4b08
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn18_8xb32-210e_coco-256x192.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-2,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 190, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=1,
+ num_units=4,
+ num_blocks=[2, 2, 2, 2],
+ num_steps=4,
+ norm_cfg=dict(type='BN'),
+ ),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=1,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3],
+ loss=[
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ],
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..069cb413123be20dee06dd8014b583dfa267fa46
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_rsn50_8xb32-210e_coco-256x192.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+# multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
+kernel_sizes = [11, 9, 7, 5]
+codec = [
+ dict(
+ type='MegviiHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ kernel_size=kernel_size) for kernel_size in kernel_sizes
+]
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='RSN',
+ unit_channels=256,
+ num_stages=1,
+ num_units=4,
+ num_blocks=[3, 4, 6, 3],
+ num_steps=4,
+ norm_cfg=dict(type='BN'),
+ ),
+ head=dict(
+ type='MSPNHead',
+ out_shape=(64, 48),
+ unit_channels=256,
+ out_channels=17,
+ num_stages=1,
+ num_units=4,
+ norm_cfg=dict(type='BN'),
+ # each sub list is for a stage
+ # and each element in each list is for a unit
+ level_indices=[0, 1, 2, 3],
+ loss=[
+ dict(
+ type='KeypointMSELoss',
+ use_target_weight=True,
+ loss_weight=0.25)
+ ] * 3 + [
+ dict(
+ type='KeypointOHKMMSELoss',
+ use_target_weight=True,
+ loss_weight=1.)
+ ],
+ decoder=codec[-1]),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='GenerateTarget', multilevel=True, encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec[0]['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=4,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json',
+ nms_mode='none')
+test_evaluator = val_evaluator
+
+# fp16 settings
+fp16 = dict(loss_scale='dynamic')
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..544c87242f5f3e7e4a0b129aa927e21a8c5a4430
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb32-210e_coco-256x192.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=101,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet101-94250a77.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=1,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..1af2e44ef013ea525d0d7cfe19312c07a1b5ae93
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet101_8xb48-210e_coco-384x288.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=101,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet101-94250a77.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=48,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..efa1ad924cf5da56fc0ab69cee89eee48355376d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb32-210e_coco-384x288.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=1,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=1,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d784d80296e085f201da67e7f45732af6fe8938
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_scnet50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..b515b744c4c9b43126b2e85b9c32b5663016be70
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb32-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6d9fab2eda60ecef464a645b572866d1954cbcf
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet101_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..a0ef9bf5711f01625f3faf0d46122dae2eca8c35
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb32-210e_coco-256x192.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=152,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..13524c121772b7a73b79dd7d9c2fd4fd6e5ad882
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet152_8xb48-210e_coco-384x288.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=384)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=152,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=48,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..93fb78fac56a697164a383229436c79de5392be5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa2002a70a94d7104d07ec2c921a6c1123f859ab
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_seresnet50_8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..029f48d3d90bdc113066c67200cbe15772bd0b9b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ShuffleNetV1',
+ groups=3,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v1'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=960,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..f06c325bd1213995bb51bd9c1e477de0604e4cb7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv1_8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ShuffleNetV1',
+ groups=3,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v1'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=960,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..333998490e38105e4f73a55af6358e868943117a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ShuffleNetV2',
+ widen_factor=1.0,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v2'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7be5484e8d56f6001a3c1e5de91dd1b8c32821f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_shufflenetv2_8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ShuffleNetV2',
+ widen_factor=1.0,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v2'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..81877b893f69b66a2263a4d5dfea8407d56668af
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-256x192.py
@@ -0,0 +1,139 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SwinTransformer',
+ embed_dims=128,
+ depths=[2, 2, 18, 2],
+ num_heads=[4, 8, 16, 32],
+ window_size=7,
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.3,
+ patch_norm=True,
+ out_indices=(3, ),
+ with_cp=False,
+ convert_weights=True,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://github.com/SwinTransformer/storage/releases/'
+ 'download/v1.0.0/swin_base_patch4_window7_224_22k.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c1d5fa12f97259031d65030e5abee8cb61d372d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-b-p4-w7_8xb32-210e_coco-384x288.py
@@ -0,0 +1,139 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SwinTransformer',
+ embed_dims=128,
+ depths=[2, 2, 18, 2],
+ num_heads=[4, 8, 16, 32],
+ window_size=12,
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.3,
+ patch_norm=True,
+ out_indices=(3, ),
+ with_cp=False,
+ convert_weights=True,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://github.com/SwinTransformer/storage/releases/'
+ 'download/v1.0.0/swin_base_patch4_window12_384_22k.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..14d08a49f865a901b0832f40dd2819b8ee43d58c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-256x192.py
@@ -0,0 +1,148 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ ),
+ paramwise_cfg=dict(
+ custom_keys={
+ 'absolute_pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SwinTransformer',
+ embed_dims=192,
+ depths=[2, 2, 18, 2],
+ num_heads=[6, 12, 24, 48],
+ window_size=7,
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.5,
+ patch_norm=True,
+ out_indices=(3, ),
+ with_cp=False,
+ convert_weights=True,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://github.com/SwinTransformer/storage/releases/'
+ 'download/v1.0.0/swin_base_patch4_window7_224_22k.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1536,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..692c8df1a616dabbcb93a9be67f4626862eae172
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-l-p4-w7_8xb32-210e_coco-384x288.py
@@ -0,0 +1,148 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ optimizer=dict(
+ type='AdamW',
+ lr=5e-4,
+ betas=(0.9, 0.999),
+ weight_decay=0.01,
+ ),
+ paramwise_cfg=dict(
+ custom_keys={
+ 'absolute_pos_embed': dict(decay_mult=0.),
+ 'relative_position_bias_table': dict(decay_mult=0.),
+ 'norm': dict(decay_mult=0.)
+ }))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SwinTransformer',
+ embed_dims=192,
+ depths=[2, 2, 18, 2],
+ num_heads=[6, 12, 24, 48],
+ window_size=7,
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.5,
+ patch_norm=True,
+ out_indices=(3, ),
+ with_cp=False,
+ convert_weights=True,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://github.com/SwinTransformer/storage/releases/'
+ 'download/v1.0.0/swin_base_patch4_window12_384_22k.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1536,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..068ee0649f4cf97f5887ff5b17f44d6e1e1609b3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_swin-t-p4-w7_8xb32-210e_coco-256x192.py
@@ -0,0 +1,139 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SwinTransformer',
+ embed_dims=96,
+ depths=[2, 2, 6, 2],
+ num_heads=[3, 6, 12, 24],
+ window_size=7,
+ mlp_ratio=4,
+ qkv_bias=True,
+ qk_scale=None,
+ drop_rate=0.,
+ attn_drop_rate=0.,
+ drop_path_rate=0.2,
+ patch_norm=True,
+ out_indices=(3, ),
+ with_cp=False,
+ convert_weights=True,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://github.com/SwinTransformer/storage/releases/'
+ 'download/v1.0.0/swin_tiny_patch4_window7_224.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b85adb998bb5f2660ef00d1d395a6ca8bb4763c0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='VGG',
+ depth=16,
+ norm_cfg=dict(type='BN'),
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://vgg16_bn'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=512,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..04fcc1ad2ef3152e217fa20bc0a325d44b1e6f0d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ head=dict(
+ type='ViPNASHead',
+ in_channels=160,
+ out_channels=17,
+ deconv_out_channels=(160, 160, 160),
+ deconv_num_groups=(160, 160, 160),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8190d7ffd2ca650f939935487551f0a62a8bf078
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ViPNAS_ResNet', depth=50),
+ head=dict(
+ type='ViPNASHead',
+ in_channels=608,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..a2c19453f3e3e1be0490c6e55becd2ba4ae14f04
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.md
@@ -0,0 +1,39 @@
+
+
+
+VGG (ICLR'2015)
+
+```bibtex
+@article{simonyan2014very,
+ title={Very deep convolutional networks for large-scale image recognition},
+ author={Simonyan, Karen and Zisserman, Andrew},
+ journal={arXiv preprint arXiv:1409.1556},
+ year={2014}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [vgg](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py) | 256x192 | 0.699 | 0.890 | 0.769 | 0.754 | 0.927 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192-7e7c58d6_20210517.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192_20210517.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..166fa05fcddc05ffe60a996ec63bb747d58ea7dd
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vgg_coco.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vgg16-bn_8xb64-210e_coco-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - VGG
+ Training Data: COCO
+ Name: td-hm_vgg16-bn_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.699
+ AP@0.5: 0.89
+ AP@0.75: 0.769
+ AR: 0.754
+ AR@0.5: 0.927
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vgg/vgg16_bn_coco_256x192-7e7c58d6_20210517.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..b6a178865bd6844c0f73fbe7db43aa4be795dc71
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.md
@@ -0,0 +1,40 @@
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [S-ViPNAS-MobileNetV3](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py) | 256x192 | 0.700 | 0.887 | 0.783 | 0.758 | 0.929 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192-e0987441_20221010.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192_20221010.log) |
+| [S-ViPNAS-Res50](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.711 | 0.894 | 0.787 | 0.769 | 0.934 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192-35d4bff9_20220917.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192_20220917.log) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cbdaa5bcabf800b60b14a044e5de0e71f753017a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vipnas_coco.yml
@@ -0,0 +1,40 @@
+Collections:
+- Name: ViPNAS
+ Paper:
+ Title: 'ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search'
+ URL: https://arxiv.org/abs/2105.10154
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/vipnas.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: &id001
+ - ViPNAS
+ Training Data: COCO
+ Name: td-hm_vipnas-mbv3_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.7
+ AP@0.5: 0.887
+ AP@0.75: 0.783
+ AR: 0.758
+ AR@0.5: 0.929
+ Task: Body 2D Keypoint
+ Weights: (https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-mbv3_8xb64-210e_coco-256x192-e0987441_20221010.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-hm_vipnas-res50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.711
+ AP@0.5: 0.894
+ AP@0.75: 0.787
+ AR: 0.769
+ AR@0.5: 0.934
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_vipnas-res50_8xb64-210e_coco-256x192-35d4bff9_20220917.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..f9266001d5d49b223c5b00b488db5a5545b9dae8
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.md
@@ -0,0 +1,62 @@
+To utilize ViTPose, you'll need to have [MMClassification](https://github.com/open-mmlab/mmclassification). To install the required version, run the following command:
+
+```shell
+mim install 'mmcls>=1.0.0rc5'
+```
+
+
+
+
+
+ViTPose (NeurIPS'2022)
+
+```bibtex
+@inproceedings{
+ xu2022vitpose,
+ title={Vi{TP}ose: Simple Vision Transformer Baselines for Human Pose Estimation},
+ author={Yufei Xu and Jing Zhang and Qiming Zhang and Dacheng Tao},
+ booktitle={Advances in Neural Information Processing Systems},
+ year={2022},
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+> With classic decoder
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ViTPose-S](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py) | 256x192 | 0.739 | 0.903 | 0.816 | 0.792 | 0.942 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.json) |
+| [ViTPose-B](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py) | 256x192 | 0.757 | 0.905 | 0.829 | 0.810 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.json) |
+| [ViTPose-L](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py) | 256x192 | 0.782 | 0.914 | 0.850 | 0.834 | 0.952 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192-53609f55_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192-53609f55_20230314.json) |
+| [ViTPose-H](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py) | 256x192 | 0.788 | 0.917 | 0.855 | 0.839 | 0.954 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192-e32adcd4_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192-e32adcd4_20230314.json) |
+| [ViTPose-H\*](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py) | 256x192 | 0.790 | 0.916 | 0.857 | 0.840 | 0.953 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_3rdparty_coco-256x192-5b738c8e_20230314) | - |
+
+*Models with * are converted from the [official repo](https://github.com/ViTAE-Transformer/ViTPose). The config files of these models are only for validation.*
+
+> With simple decoder
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [ViTPose-S](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.736 | 0.900 | 0.811 | 0.790 | 0.940 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192-4c101a76_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192-4c101a76_20230314.json) |
+| [ViTPose-B](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.756 | 0.906 | 0.826 | 0.809 | 0.946 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192-0b8234ea_20230407.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192-0b8234ea_20230407.json) |
+| [ViTPose-L](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.781 | 0.914 | 0.853 | 0.833 | 0.952 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192-3a7ee9e1_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192-3a7ee9e1_20230314.json) |
+| [ViTPose-H](/configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py) | 256x192 | 0.789 | 0.916 | 0.856 | 0.839 | 0.953 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6d1cc7db15b055d418adf9bc429b5319e81b71df
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/coco/vitpose_coco.yml
@@ -0,0 +1,155 @@
+Collections:
+- Name: ViTPose
+ Paper:
+ Title: 'ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation'
+ URL: https://arxiv.org/abs/2204.12484
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/vitpose.md
+ Metadata:
+ Training Resources: 8x A100 GPUs
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Metadata:
+ Architecture: &id001
+ - ViTPose
+ - Classic Head
+ Model Size: Small
+ Training Data: COCO
+ Name: td-hm_ViTPose-small_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.739
+ AP@0.5: 0.903
+ AP@0.75: 0.816
+ AR: 0.792
+ AR@0.5: 0.942
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small_8xb64-210e_coco-256x192-62d7a712_20230314.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Metadata:
+ Architecture: *id001
+ Model Size: Base
+ Training Data: COCO
+ Name: td-hm_ViTPose-base_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.757
+ AP@0.5: 0.905
+ AP@0.75: 0.829
+ AR: 0.81
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base_8xb64-210e_coco-256x192-216eae50_20230314.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Metadata:
+ Architecture: *id001
+ Model Size: Large
+ Training Data: COCO
+ Name: td-hm_ViTPose-large_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.782
+ AP@0.5: 0.914
+ AP@0.75: 0.850
+ AR: 0.834
+ AR@0.5: 0.952
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large_8xb64-210e_coco-256x192-53609f55_20230314.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Metadata:
+ Architecture: *id001
+ Model Size: Huge
+ Training Data: COCO
+ Name: td-hm_ViTPose-huge_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.788
+ AP@0.5: 0.917
+ AP@0.75: 0.855
+ AR: 0.839
+ AR@0.5: 0.954
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge_8xb64-210e_coco-256x192-e32adcd4_20230314.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Alias: vitpose-s
+ Metadata:
+ Architecture: &id002
+ - ViTPose
+ - Simple Head
+ Model Size: Small
+ Training Data: COCO
+ Name: td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.736
+ AP@0.5: 0.900
+ AP@0.75: 0.811
+ AR: 0.790
+ AR@0.5: 0.940
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-small-simple_8xb64-210e_coco-256x192-4c101a76_20230314.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Alias:
+ - vitpose
+ - vitpose-b
+ Metadata:
+ Architecture: *id002
+ Model Size: Base
+ Training Data: COCO
+ Name: td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.756
+ AP@0.5: 0.906
+ AP@0.75: 0.826
+ AR: 0.809
+ AR@0.5: 0.946
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-base-simple_8xb64-210e_coco-256x192-0b8234ea_20230407.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Alias: vitpose-l
+ Metadata:
+ Architecture: *id002
+ Model Size: Large
+ Training Data: COCO
+ Name: td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.781
+ AP@0.5: 0.914
+ AP@0.75: 0.853
+ AR: 0.833
+ AR@0.5: 0.952
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-large-simple_8xb64-210e_coco-256x192-3a7ee9e1_20230314.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192.py
+ In Collection: ViTPose
+ Alias: vitpose-h
+ Metadata:
+ Architecture: *id002
+ Model Size: Huge
+ Training Data: COCO
+ Name: td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.789
+ AP@0.5: 0.916
+ AP@0.75: 0.856
+ AR: 0.839
+ AR@0.5: 0.953
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1ba19a130b13f8c2c670ae15cff5e21303bc89e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py
@@ -0,0 +1,217 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=14,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/',
+# f'{data_root}': 's3://openmmlab/datasets/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='crowdpose/annotations/mmpose_crowdpose_test.json',
+ bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json',
+ data_prefix=dict(img='pose/CrowdPose/images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='crowdpose/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'crowdpose/annotations/mmpose_crowdpose_test.json',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..24c35348389b3f532fffa418a21b6edce6d21cb0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.md
@@ -0,0 +1,56 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: |
+| [pose_cspnext_m](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py) | 256x192 | 0.662 | 0.821 | 0.723 | 0.759 | 0.675 | 0.539 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-crowdpose_pt-in1k_210e-256x192-f591079f_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-crowdpose_pt-in1k_210e-256x192-f591079f_20230123.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6e5b4cd691ccb083db97b33b3531b0e69f39af12
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext_udp_crowdpose.yml
@@ -0,0 +1,20 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/cspnext-m_udp_8xb64-210e_crowpose-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture:
+ - UDP
+ - CSPNeXt
+ Training Data: CrowdPose
+ Name: cspnext-m_udp_8xb64-210e_crowpose-256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.662
+ AP (E): 0.759
+ AP (H): 0.539
+ AP (M): 0.675
+ AP@0.5: 0.821
+ AP@0.75: 0.723
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-crowdpose_pt-in1k_210e-256x192-f591079f_20230123.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..c0d24d47175d592b536d905881df852959406eed
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.md
@@ -0,0 +1,38 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.675 | 0.825 | 0.729 | 0.770 | 0.687 | 0.553 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192-960be101_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192_20201227.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c37fa9154feab028c1dd3d3511fccabcc2805042
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/hrnet_crowdpose.yml
@@ -0,0 +1,19 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture:
+ - HRNet
+ Training Data: CrowdPose
+ Name: td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.675
+ AP (E): 0.77
+ AP (H): 0.553
+ AP (M): 0.687
+ AP@0.5: 0.825
+ AP@0.75: 0.729
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_crowdpose_256x192-960be101_20201227.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.md
new file mode 100644
index 0000000000000000000000000000000000000000..56a771806d361061652b57e624b13169db1bb410
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.md
@@ -0,0 +1,58 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+CrowdPose (CVPR'2019)
+
+```bibtex
+@article{li2018crowdpose,
+ title={CrowdPose: Efficient Crowded Scenes Pose Estimation and A New Benchmark},
+ author={Li, Jiefeng and Wang, Can and Zhu, Hao and Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu},
+ journal={arXiv preprint arXiv:1812.00324},
+ year={2018}
+}
+```
+
+
+
+Results on CrowdPose test with [YOLOv3](https://github.com/eriklindernoren/PyTorch-YOLOv3) human detector
+
+| Arch | Input Size | AP | AP50 | AP75 | AP (E) | AP (M) | AP (H) | ckpt | log |
+| :--------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :----: | :----: | :----: | :--------------------------------------------: | :-------------------------------------------: |
+| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.637 | 0.808 | 0.692 | 0.738 | 0.650 | 0.506 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192-c6a526b6_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192_20201227.log.json) |
+| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.647 | 0.810 | 0.703 | 0.745 | 0.658 | 0.521 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192-8f5870f4_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192_20201227.log.json) |
+| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py) | 320x256 | 0.661 | 0.821 | 0.714 | 0.759 | 0.672 | 0.534 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256-c88c512a_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256_20201227.log.json) |
+| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py) | 256x192 | 0.656 | 0.818 | 0.712 | 0.754 | 0.666 | 0.533 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192-dbd49aba_20201227.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192_20201227.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..1477c28deb33691632ccdb33035ed8075e43e241
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/resnet_crowdpose.yml
@@ -0,0 +1,71 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: CrowdPose
+ Name: td-hm_res50_8xb64-210e_crowdpose-256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.637
+ AP (E): 0.738
+ AP (H): 0.506
+ AP (M): 0.65
+ AP@0.5: 0.808
+ AP@0.75: 0.692
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_crowdpose_256x192-c6a526b6_20201227.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: td-hm_res101_8xb64-210e_crowdpose-256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.647
+ AP (E): 0.745
+ AP (H): 0.521
+ AP (M): 0.658
+ AP@0.5: 0.81
+ AP@0.75: 0.703
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_256x192-8f5870f4_20201227.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: td-hm_res101_8xb64-210e_crowdpose-320x256
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.661
+ AP (E): 0.759
+ AP (H): 0.534
+ AP (M): 0.672
+ AP@0.5: 0.821
+ AP@0.75: 0.714
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_crowdpose_320x256-c88c512a_20201227.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: CrowdPose
+ Name: td-hm_res152_8xb64-210e_crowdpose-256x192
+ Results:
+ - Dataset: CrowdPose
+ Metrics:
+ AP: 0.656
+ AP (E): 0.754
+ AP (H): 0.533
+ AP (M): 0.666
+ AP@0.5: 0.818
+ AP@0.75: 0.712
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_crowdpose_256x192-dbd49aba_20201227.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..3117314a43ab214da46a83c5621f1860bcb3f57f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_hrnet-w32_8xb64-210e_crowdpose-256x192.py
@@ -0,0 +1,152 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=14,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_test.json',
+ bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..79cae1d130a3713944069e37a3258811b068e655
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-256x192.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=14,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_test.json',
+ bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..eac5caf859095d3867fdd45fde58774b8c5ce54e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res101_8xb64-210e_crowdpose-320x256.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 320), heatmap_size=(64, 80), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=14,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_test.json',
+ bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b99439535a54b4bc69ca5ee270aa5a0d7fa26bf
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res152_8xb64-210e_crowdpose-256x192.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=14,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_test.json',
+ bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d669b2e2670657a25def5234037e371bede0882d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/crowdpose/td-hm_res50_8xb64-210e_crowdpose-256x192.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='crowdpose/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=14,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CrowdPoseDataset'
+data_mode = 'topdown'
+data_root = 'data/crowdpose/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_trainval.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mmpose_crowdpose_test.json',
+ bbox_file='data/crowdpose/annotations/det_for_crowd_test_0.1_0.5.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=data_root + 'annotations/mmpose_crowdpose_test.json',
+ use_area=False,
+ iou_type='keypoints_crowd',
+ prefix='crowdpose')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.md
new file mode 100644
index 0000000000000000000000000000000000000000..29df027e3f76c0801c1c89303e776f78d5c6047e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.md
@@ -0,0 +1,56 @@
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+
+
+
+JHMDB (ICCV'2013)
+
+```bibtex
+@inproceedings{Jhuang:ICCV:2013,
+ title = {Towards understanding action recognition},
+ author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
+ booktitle = {International Conf. on Computer Vision (ICCV)},
+ month = Dec,
+ pages = {3192-3199},
+ year = {2013}
+}
+```
+
+
+
+Results on Sub-JHMDB dataset
+
+The models are pre-trained on MPII dataset only. NO test-time augmentation (multi-scale /rotation testing) is used.
+
+- Normalized by Person Size
+
+| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: |
+| Sub1 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py) | 368x368 | 96.1 | 91.9 | 81.0 | 78.9 | 96.6 | 90.8 | 87.3 | 89.5 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368_20201122.log.json) |
+| Sub2 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py) | 368x368 | 98.1 | 93.6 | 77.1 | 70.9 | 94.0 | 89.1 | 84.7 | 87.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368_20201122.log.json) |
+| Sub3 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py) | 368x368 | 97.9 | 94.9 | 87.3 | 84.0 | 98.6 | 94.4 | 86.2 | 92.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368_20201122.log.json) |
+| Average | cpm | 368x368 | 97.4 | 93.5 | 81.5 | 77.9 | 96.4 | 91.4 | 86.1 | 89.8 | - | - |
+
+- Normalized by Torso Size
+
+| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: |
+| Sub1 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py) | 368x368 | 89.0 | 63.0 | 54.0 | 54.9 | 68.2 | 63.1 | 61.2 | 66.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368_20201122.log.json) |
+| Sub2 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py) | 368x368 | 90.3 | 57.9 | 46.8 | 44.3 | 60.8 | 58.2 | 62.4 | 61.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368_20201122.log.json) |
+| Sub3 | [cpm](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py) | 368x368 | 91.0 | 72.6 | 59.9 | 54.0 | 73.2 | 68.5 | 65.8 | 70.3 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368_20201122.log.json) |
+| Average | cpm | 368x368 | 90.1 | 64.5 | 53.6 | 51.1 | 67.4 | 63.3 | 63.1 | 65.7 | - | - |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f9f6d7568b50b65bd8f5754539f861b15daca8c7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/cpm_jhmdb.yml
@@ -0,0 +1,116 @@
+Models:
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: &id001
+ - CPM
+ Training Data: JHMDB
+ Name: td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 87.3
+ Elb: 81
+ Head: 96.1
+ Hip: 96.6
+ Knee: 90.8
+ Mean: 89.5
+ Sho: 91.9
+ Wri: 78.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 84.7
+ Elb: 77.1
+ Head: 98.1
+ Hip: 94.0
+ Knee: 89.1
+ Mean: 87.4
+ Sho: 93.6
+ Wri: 70.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 86.2
+ Elb: 87.3
+ Head: 97.9
+ Hip: 98.6
+ Knee: 94.4
+ Mean: 92.4
+ Sho: 94.9
+ Wri: 84.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 61.2
+ Elb: 54.0
+ Head: 89.0
+ Hip: 68.2
+ Knee: 63.1
+ Mean: 66.0
+ Sho: 63.0
+ Wri: 54.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 62.4
+ Elb: 46.8
+ Head: 90.3
+ Hip: 60.8
+ Knee: 58.2
+ Mean: 61.1
+ Sho: 57.9
+ Wri: 44.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub2_368x368-fc742f1f_20201122.pth
+- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 65.8
+ Elb: 59.9
+ Head: 91.0
+ Hip: 73.2
+ Knee: 68.5
+ Mean: 70.3
+ Sho: 72.6
+ Wri: 54.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub3_368x368-49337155_20201122.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.md
new file mode 100644
index 0000000000000000000000000000000000000000..22422e731653b1b01840feb0f79eb82459bb968f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.md
@@ -0,0 +1,81 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+JHMDB (ICCV'2013)
+
+```bibtex
+@inproceedings{Jhuang:ICCV:2013,
+ title = {Towards understanding action recognition},
+ author = {H. Jhuang and J. Gall and S. Zuffi and C. Schmid and M. J. Black},
+ booktitle = {International Conf. on Computer Vision (ICCV)},
+ month = Dec,
+ pages = {3192-3199},
+ year = {2013}
+}
+```
+
+
+
+Results on Sub-JHMDB dataset
+
+The models are pre-trained on MPII dataset only. *NO* test-time augmentation (multi-scale /rotation testing) is used.
+
+- Normalized by Person Size
+
+| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: |
+| Sub1 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py) | 256x256 | 99.1 | 98.0 | 93.8 | 91.3 | 99.4 | 96.5 | 92.8 | 96.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py) | 256x256 | 99.3 | 97.1 | 90.6 | 87.0 | 98.9 | 96.3 | 94.1 | 95.0 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py) | 256x256 | 99.0 | 97.9 | 94.0 | 91.6 | 99.7 | 98.0 | 94.7 | 96.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 | 256x256 | 99.2 | 97.7 | 92.8 | 90.0 | 99.3 | 96.9 | 93.9 | 96.0 | - | - |
+| Sub1 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py) | 256x256 | 99.1 | 98.5 | 94.6 | 92.0 | 99.4 | 94.6 | 92.5 | 96.1 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py) | 256x256 | 99.3 | 97.8 | 91.0 | 87.0 | 99.1 | 96.5 | 93.8 | 95.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py) | 256x256 | 98.8 | 98.4 | 94.3 | 92.1 | 99.8 | 97.5 | 93.8 | 96.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 (2 Deconv.) | 256x256 | 99.1 | 98.2 | 93.3 | 90.4 | 99.4 | 96.2 | 93.4 | 96.0 | - | - |
+
+- Normalized by Torso Size
+
+| Split | Arch | Input Size | Head | Sho | Elb | Wri | Hip | Knee | Ank | Mean | ckpt | log |
+| :------ | :------------------------------------------------: | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :-------------------------------------------------: | :------------------------------------------------: |
+| Sub1 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py) | 256x256 | 93.3 | 83.2 | 74.4 | 72.7 | 85.0 | 81.2 | 78.9 | 81.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py) | 256x256 | 94.1 | 74.9 | 64.5 | 62.5 | 77.9 | 71.9 | 78.6 | 75.5 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py) | 256x256 | 97.0 | 82.2 | 74.9 | 70.7 | 84.7 | 83.7 | 84.2 | 82.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 | 256x256 | 94.8 | 80.1 | 71.3 | 68.6 | 82.5 | 78.9 | 80.6 | 80.1 | - | - |
+| Sub1 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py) | 256x256 | 92.4 | 80.6 | 73.2 | 70.5 | 82.3 | 75.4 | 75.0 | 79.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256_20201122.log.json) |
+| Sub2 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py) | 256x256 | 93.4 | 73.6 | 63.8 | 60.5 | 75.1 | 68.4 | 75.5 | 73.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256_20201122.log.json) |
+| Sub3 | [pose_resnet_50 (2 Deconv.)](/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py) | 256x256 | 96.1 | 81.2 | 72.6 | 67.9 | 83.6 | 80.9 | 81.5 | 81.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256_20201122.log.json) |
+| Average | pose_resnet_50 (2 Deconv.) | 256x256 | 94.0 | 78.5 | 69.9 | 66.3 | 80.3 | 74.9 | 77.3 | 78.0 | - | - |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d7480d12a0ce45db7ea28af0b02e2493b34c04b8
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/resnet_jhmdb.yml
@@ -0,0 +1,231 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: JHMDB
+ Name: td-hm_res50_8xb64-20e_jhmdb-sub1-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 92.8
+ Elb: 93.8
+ Head: 99.1
+ Hip: 99.4
+ Knee: 96.5
+ Mean: 96.1
+ Sho: 98.0
+ Wri: 91.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50_8xb64-20e_jhmdb-sub2-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 94.1
+ Elb: 90.6
+ Head: 99.3
+ Hip: 98.9
+ Knee: 96.3
+ Mean: 95.0
+ Sho: 97.1
+ Wri: 87.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50_8xb64-20e_jhmdb-sub3-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 94.7
+ Elb: 94.0
+ Head: 99.0
+ Hip: 99.7
+ Knee: 98.0
+ Mean: 96.7
+ Sho: 97.9
+ Wri: 91.6
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 92.5
+ Elb: 94.6
+ Head: 99.1
+ Hip: 99.4
+ Knee: 94.6
+ Mean: 96.1
+ Sho: 98.5
+ Wri: 92.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 93.8
+ Elb: 91.0
+ Head: 99.3
+ Hip: 99.1
+ Knee: 96.5
+ Mean: 95.2
+ Sho: 97.8
+ Wri: 87.0
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 93.8
+ Elb: 94.3
+ Head: 98.8
+ Hip: 99.8
+ Knee: 97.5
+ Mean: 96.7
+ Sho: 98.4
+ Wri: 92.1
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50_8xb64-20e_jhmdb-sub1-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 78.9
+ Elb: 74.4
+ Head: 93.3
+ Hip: 85.0
+ Knee: 81.2
+ Mean: 81.9
+ Sho: 83.2
+ Wri: 72.7
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub1_256x256-932cb3b4_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50_8xb64-20e_jhmdb-sub2-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 78.6
+ Elb: 64.5
+ Head: 94.1
+ Hip: 77.9
+ Knee: 71.9
+ Mean: 75.5
+ Sho: 74.9
+ Wri: 62.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub2_256x256-83d606f7_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50_8xb64-20e_jhmdb-sub3-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 84.2
+ Elb: 74.9
+ Head: 97.0
+ Hip: 84.7
+ Knee: 83.7
+ Mean: 82.9
+ Sho: 82.2
+ Wri: 70.7
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_jhmdb_sub3_256x256-c4ec1a0b_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 75.0
+ Elb: 73.2
+ Head: 92.4
+ Hip: 82.3
+ Knee: 75.4
+ Mean: 79.2
+ Sho: 80.6
+ Wri: 70.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub1_256x256-f0574a52_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 75.5
+ Elb: 63.8
+ Head: 93.4
+ Hip: 75.1
+ Knee: 68.4
+ Mean: 73.7
+ Sho: 73.6
+ Wri: 60.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub2_256x256-f63af0ff_20201122.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: JHMDB
+ Name: td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256
+ Results:
+ - Dataset: JHMDB
+ Metrics:
+ Ank: 81.5
+ Elb: 72.6
+ Head: 96.1
+ Hip: 83.6
+ Knee: 80.9
+ Mean: 81.2
+ Sho: 81.2
+ Wri: 67.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_2deconv_jhmdb_sub3_256x256-c4bc2ddb_20201122.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..479039f5428f7f5e736beb4cfe9c7b88c986e4ed
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub1-368x368.py
@@ -0,0 +1,127 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=40, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=40,
+ milestones=[20, 30],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=15,
+ feat_channels=128,
+ num_stages=6),
+ head=dict(
+ type='CPMHead',
+ in_channels=15,
+ out_channels=15,
+ num_stages=6,
+ deconv_out_channels=None,
+ final_layer=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub1_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub1_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..88b60e9f87dfc783610aa8222a4256d9625efc60
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub2-368x368.py
@@ -0,0 +1,127 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=40, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=40,
+ milestones=[20, 30],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=15,
+ feat_channels=128,
+ num_stages=6),
+ head=dict(
+ type='CPMHead',
+ in_channels=15,
+ out_channels=15,
+ num_stages=6,
+ deconv_out_channels=None,
+ final_layer=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub2_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub2_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..602b2bcfd6aac7df667da5d71ea9d8ea233778ad
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_cpm_8xb32-40e_jhmdb-sub3-368x368.py
@@ -0,0 +1,127 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=40, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=40,
+ milestones=[20, 30],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=15,
+ feat_channels=128,
+ num_stages=6),
+ head=dict(
+ type='CPMHead',
+ in_channels=15,
+ out_channels=15,
+ num_stages=6,
+ deconv_out_channels=None,
+ final_layer=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub3_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub3_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d104e1e86e0818947f86612dbbe8b4c9b30e31f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub1-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=40, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=40,
+ milestones=[20, 30],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(32, 32), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ResNet', depth=50),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=15,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub1_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub1_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..6135ce29ab3b070586d0324f95c37b272002459e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub2-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=40, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=40,
+ milestones=[20, 30],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(32, 32), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ResNet', depth=50),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=15,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub2_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub2_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..44d95b15b2a0e73eb93deefc32e5e3f093212648
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50-2deconv_8xb64-40e_jhmdb-sub3-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=40, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=40,
+ milestones=[20, 30],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(32, 32), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ResNet', depth=50),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=15,
+ deconv_out_channels=(256, 256),
+ deconv_kernel_sizes=(4, 4),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub3_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub3_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..9578a66c18b3b58a9cd85ecb4941913eac6175ea
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub1-256x256.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[8, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ResNet', depth=50),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=15,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub1_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub1_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..856c89e660b7e2e866c4bd48eff32bf9faff731d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub2-256x256.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[8, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ResNet', depth=50),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=15,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub2_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub2_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..73065968848a063b462504c55e4a2ac85ffd49d9
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/jhmdb/td-hm_res50_8xb64-20e_jhmdb-sub3-256x256.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[8, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ResNet', depth=50),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=15,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth' # noqa: E501
+
+# base dataset settings
+dataset_type = 'JhmdbDataset'
+data_mode = 'topdown'
+data_root = 'data/jhmdb/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub3_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/Sub3_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='JhmdbPCKAccuracy', thr=0.2, norm_item=['bbox', 'torso']),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..0c2888bb88431497c30d7a046595488c7edaa87b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+CPM (CVPR'2016)
+
+```bibtex
+@inproceedings{wei2016convolutional,
+ title={Convolutional pose machines},
+ author={Wei, Shih-En and Ramakrishna, Varun and Kanade, Takeo and Sheikh, Yaser},
+ booktitle={Proceedings of the IEEE conference on Computer Vision and Pattern Recognition},
+ pages={4724--4732},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [cpm](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py) | 368x368 | 0.876 | 0.285 | [ckpt](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth) | [log](https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368_20200822.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3e2e439253e2eccb96d42ec887d6889abe520d65
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cpm_mpii.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py
+ In Collection: CPM
+ Metadata:
+ Architecture:
+ - CPM
+ Training Data: MPII
+ Name: td-hm_cpm_8xb64-210e_mpii-368x368
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.876
+ Mean@0.1: 0.285
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_mpii_368x368-116e62b8_20200822.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc8d6fdcea8d717c9ecbc70fb966364dea14257e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,210 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 210 to 420 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=1024)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/',
+# f'{data_root}': 's3://openmmlab/datasets/pose/MPI/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='PCK', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..80aec4c28e443d64a5db78d81508b182695f487d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.md
@@ -0,0 +1,57 @@
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py) | 256x256 | 0.902 | 0.303 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-mpii_pt-in1k_210e-256x256-68d0402f_20230208.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-mpii_pt-in1k_210e-256x256-68d0402f_20230208.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7256f3b15443a4f51a68a32c6f271ef6d8c58089
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext_udp_mpii.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/cspnext-m_udp_8xb64-210e_mpii-256x256.py
+ In Collection: UDP
+ Metadata:
+ Architecture:
+ - UDP
+ - CSPNeXt
+ Training Data: MPII
+ Name: cspnext-m_udp_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.902
+ Mean@0.1: 0.303
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-mpii_pt-in1k_210e-256x256-68d0402f_20230208.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..98e795de4ff558bf49270e9c1ed6890e9de1f311
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.md
@@ -0,0 +1,41 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py) | 256x256 | 0.889 | 0.317 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256-ae358435_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256_20200812.log.json) |
+| [pose_hourglass_52](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py) | 384x384 | 0.894 | 0.367 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384-04090bc3_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384_20200812.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..eb22cd98ce6536a4280cb188694d47d51c47e050
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hourglass_mpii.yml
@@ -0,0 +1,28 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: &id001
+ - Hourglass
+ Training Data: MPII
+ Name: td-hm_hourglass52_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.889
+ Mean@0.1: 0.317
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_256x256-ae358435_20200812.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_hourglass52_8xb32-210e_mpii-384x384
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.894
+ Mean@0.1: 0.367
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hourglass/hourglass52_mpii_384x384-04090bc3_20200812.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..a03a96ba2e5aa9a39bd1849ec933b4ac58e4f438
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.md
@@ -0,0 +1,57 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_hrnet_w32_dark](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py) | 256x256 | 0.904 | 0.354 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark-f1601c5b_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark_20200927.log.json) |
+| [pose_hrnet_w48_dark](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py) | 256x256 | 0.905 | 0.360 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark-0decd39f_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark_20200927.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0283b5c827de5a2f170460ddb7e159328faf9e76
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_dark_mpii.yml
@@ -0,0 +1,29 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ - DarkPose
+ Training Data: MPII
+ Name: td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.904
+ Mean@0.1: 0.354
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_dark-f1601c5b_20200927.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.905
+ Mean@0.1: 0.36
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_dark-0decd39f_20200927.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..7e8a69f64f5165db52c2d672464c080a73427215
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.md
@@ -0,0 +1,40 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py) | 256x256 | 0.900 | 0.334 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256-6c4f923f_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256_20200812.log.json) |
+| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py) | 256x256 | 0.901 | 0.337 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256-92cab7bd_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256_20200812.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f32129742da5306e1d9f1ea7b8048480a7efa12c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/hrnet_mpii.yml
@@ -0,0 +1,28 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: MPII
+ Name: td-hm_hrnet-w32_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.9
+ Mean@0.1: 0.334
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_mpii_256x256-6c4f923f_20200812.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_hrnet-w48_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.901
+ Mean@0.1: 0.337
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_mpii_256x256-92cab7bd_20200812.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..e6647569508429fe88683ddcb16c0eaebd3309a6
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [LiteHRNet-18](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py) | 256x256 | 0.859 | 0.260 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256-cabd7984_20210623.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256_20210623.log.json) |
+| [LiteHRNet-30](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py) | 256x256 | 0.869 | 0.271 | [ckpt](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256-faae8bd8_20210622.pth) | [log](https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256_20210622.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c4314b7a74d57bbe45cd635aba06313043cc912e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/litehrnet_mpii.yml
@@ -0,0 +1,28 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: &id001
+ - LiteHRNet
+ Training Data: MPII
+ Name: td-hm_litehrnet-18_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.859
+ Mean@0.1: 0.26
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet18_mpii_256x256-cabd7984_20210623.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_litehrnet-30_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.869
+ Mean@0.1: 0.271
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/litehrnet/litehrnet30_mpii_256x256-faae8bd8_20210622.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..8bb280a8210042fb5d5f7a3f085e7a7b98ad651b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_mobilenetv2](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py) | 256x256 | 0.854 | 0.234 | [ckpt](https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256-e068afa7_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256_20200812.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..afc54f79340332a73b079284f6920d9476972e7f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/mobilenetv2_mpii.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - MobilenetV2
+ Training Data: MPII
+ Name: td-hm_mobilenetv2_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.854
+ Mean@0.1: 0.234
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/mobilenetv2/mobilenetv2_mpii_256x256-e068afa7_20200812.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..b8d98c4d6e9b6e716ce543b7ddb3101eef58eb20
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.md
@@ -0,0 +1,58 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.882 | 0.286 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256_20200812.log.json) |
+| [pose_resnet_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.888 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256-416f5d71_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256_20200812.log.json) |
+| [pose_resnet_152](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py) | 256x256 | 0.889 | 0.303 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256-3ecba29d_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256_20200812.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ff92c4f7ce78a07c141c049902d6f2cd320e0dcb
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnet_mpii.yml
@@ -0,0 +1,42 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: MPII
+ Name: td-hm_res50_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.882
+ Mean@0.1: 0.286
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_mpii_256x256-418ffc88_20200812.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_res101_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.888
+ Mean@0.1: 0.29
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_mpii_256x256-416f5d71_20200812.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_res152_8xb32-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.889
+ Mean@0.1: 0.303
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_mpii_256x256-3ecba29d_20200812.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..23362650980ca29784bb769d3bd55538272b79d5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.md
@@ -0,0 +1,41 @@
+
+
+
+ResNetV1D (CVPR'2019)
+
+```bibtex
+@inproceedings{he2019bag,
+ title={Bag of tricks for image classification with convolutional neural networks},
+ author={He, Tong and Zhang, Zhi and Zhang, Hang and Zhang, Zhongyue and Xie, Junyuan and Li, Mu},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={558--567},
+ year={2019}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_resnetv1d_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.881 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256-2337a92e_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256_20200812.log.json) |
+| [pose_resnetv1d_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.883 | 0.295 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256-2851d710_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256_20200812.log.json) |
+| [pose_resnetv1d_152](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py) | 256x256 | 0.888 | 0.300 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256-8b10a87c_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256_20200812.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e98e722db1428a622c82c53255fb962c890d3f58
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnetv1d_mpii.yml
@@ -0,0 +1,42 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNetV1D
+ Training Data: MPII
+ Name: td-hm_resnetv1d50_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.881
+ Mean@0.1: 0.29
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d50_mpii_256x256-2337a92e_20200812.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_resnetv1d101_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.883
+ Mean@0.1: 0.295
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d101_mpii_256x256-2851d710_20200812.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_resnetv1d152_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.888
+ Mean@0.1: 0.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnetv1d/resnetv1d152_mpii_256x256-8b10a87c_20200812.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..bf9d5acf8c1e48aca593f1162c35e32e73b6774d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+ResNext (CVPR'2017)
+
+```bibtex
+@inproceedings{xie2017aggregated,
+ title={Aggregated residual transformations for deep neural networks},
+ author={Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1492--1500},
+ year={2017}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_resnext_152](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py) | 256x256 | 0.887 | 0.294 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256-df302719_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256_20200927.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..580dda77b013b7eae4ed288925741cb8d3f6f246
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/resnext_mpii.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNext
+ Training Data: MPII
+ Name: td-hm_resnext152_8xb32-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.887
+ Mean@0.1: 0.294
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnext/resnext152_mpii_256x256-df302719_20200927.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..cf0e4befffc4b023dfe84a23d7880cb959154f1e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.md
@@ -0,0 +1,40 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_scnet_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.888 | 0.290 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256-a54b6af5_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256_20200812.log.json) |
+| [pose_scnet_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.887 | 0.293 | [ckpt](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256-b4c2d184_20200812.pth) | [log](https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256_20200812.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b1ec80fd8005dd50847171cd7c916cece02dd94d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/scnet_mpii.yml
@@ -0,0 +1,29 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - SCNet
+ Training Data: MPII
+ Name: td-hm_scnet50_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.888
+ Mean@0.1: 0.29
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet50_mpii_256x256-a54b6af5_20200812.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_scnet101_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.887
+ Mean@0.1: 0.293
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/scnet/scnet101_mpii_256x256-b4c2d184_20200812.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..1c92ecf9ea19a05665faf7ff2feb62a1ec5753a2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.md
@@ -0,0 +1,43 @@
+
+
+
+SEResNet (CVPR'2018)
+
+```bibtex
+@inproceedings{hu2018squeeze,
+ title={Squeeze-and-excitation networks},
+ author={Hu, Jie and Shen, Li and Sun, Gang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={7132--7141},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_seresnet_50](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.884 | 0.292 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256-1bb21f79_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256_20200927.log.json) |
+| [pose_seresnet_101](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.884 | 0.295 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256-0ba14ff5_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256_20200927.log.json) |
+| [pose_seresnet_152\*](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py) | 256x256 | 0.884 | 0.287 | [ckpt](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256-6ea1e774_20200927.pth) | [log](https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256_20200927.log.json) |
+
+Note that * means without imagenet pre-training.
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e71050811a4a028626e7cbbb4563533c9fdd6e57
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/seresnet_mpii.yml
@@ -0,0 +1,42 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - SEResNet
+ Training Data: MPII
+ Name: td-hm_seresnet50_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.884
+ Mean@0.1: 0.292
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet50_mpii_256x256-1bb21f79_20200927.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_seresnet101_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.884
+ Mean@0.1: 0.295
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet101_mpii_256x256-0ba14ff5_20200927.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-hm_seresnet152_8xb32-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.884
+ Mean@0.1: 0.287
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/seresnet/seresnet152_mpii_256x256-6ea1e774_20200927.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..3cdaaaf5eaa1b2d9f311b9c63dc6ec37cdac2cdc
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+ShufflenetV1 (CVPR'2018)
+
+```bibtex
+@inproceedings{zhang2018shufflenet,
+ title={Shufflenet: An extremely efficient convolutional neural network for mobile devices},
+ author={Zhang, Xiangyu and Zhou, Xinyu and Lin, Mengxiao and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={6848--6856},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_shufflenetv1](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py) | 256x256 | 0.824 | 0.195 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256-dcc1c896_20200925.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256_20200925.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b9edecc42838b43aec08941d4d87c37c72d74de0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv1_mpii.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ShufflenetV1
+ Training Data: MPII
+ Name: td-hm_shufflenetv1_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.824
+ Mean@0.1: 0.195
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv1/shufflenetv1_mpii_256x256-dcc1c896_20200925.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..8ab7b026ba02ec179e07ec772f12b46966748fd9
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.md
@@ -0,0 +1,39 @@
+
+
+
+ShufflenetV2 (ECCV'2018)
+
+```bibtex
+@inproceedings{ma2018shufflenet,
+ title={Shufflenet v2: Practical guidelines for efficient cnn architecture design},
+ author={Ma, Ningning and Zhang, Xiangyu and Zheng, Hai-Tao and Sun, Jian},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={116--131},
+ year={2018}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [pose_shufflenetv2](/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py) | 256x256 | 0.828 | 0.205 | [ckpt](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256-4fb9df2d_20200925.pth) | [log](https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256_20200925.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..efa6e14f51b13c14c8a25b3929f0150e69a13589
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/shufflenetv2_mpii.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ShufflenetV2
+ Training Data: MPII
+ Name: td-hm_shufflenetv2_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.828
+ Mean@0.1: 0.205
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/shufflenetv2/shufflenetv2_mpii_256x256-4fb9df2d_20200925.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py
new file mode 100644
index 0000000000000000000000000000000000000000..794c49420ab69ae202685bb70c6d8ec8e1b2a02b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_cpm_8xb64-210e_mpii-368x368.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(368, 368), heatmap_size=(46, 46), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='CPM',
+ in_channels=3,
+ out_channels=16,
+ feat_channels=128,
+ num_stages=6),
+ head=dict(
+ type='CPMHead',
+ in_channels=16,
+ out_channels=16,
+ num_stages=6,
+ deconv_out_channels=None,
+ final_layer=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9546504e0d3ead0b6977c33a4172a2581532a7f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb32-210e_mpii-384x384.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(384, 384), heatmap_size=(96, 96), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ head=dict(
+ type='CPMHead',
+ in_channels=256,
+ out_channels=16,
+ num_stages=1,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd854a40a3f5d6def990488b5967058997d2348f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hourglass52_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ head=dict(
+ type='CPMHead',
+ in_channels=256,
+ out_channels=16,
+ num_stages=1,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..459f24f3bdbbdc4a93e43e16382b023d6ff76e50
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,146 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=16,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=16,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=16,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d47ed6fdc161e019c94b1ab64751a096a0d4537
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w32_dark-8xb64-210e_mpii-256x256.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=16,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e3fce96000a2ff5e1165a88a322e1cfd1226c0a
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,146 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=16,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..18b31539a33d542ae8a0f83b42835a7cf97ec5c2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_hrnet-w48_dark-8xb64-210e_mpii-256x256.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=16,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..bdab446f5038c6d86231d27284aee3b3723bea14
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-18_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,137 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=40,
+ out_channels=16,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..84089add2a0d2c6c7d7d8b75275cf097a9b68e7f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_litehrnet-30_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,137 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(3, 8, 3),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=40,
+ out_channels=16,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..41b9d3ba9ba964f34f1204d185e36dcbcb3821e0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_mobilenetv2_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://mobilenet_v2'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..def5d2fd1681262689afd40b20a0299e64118136
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res101_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..bf515d0d21e6796af7fc79fb39ec27cd0fb0c7b0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res152_8xb32-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..dee56ae77b0c7b7fa40690e712e7c7ad4648f279
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_res50_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cbf684e38c1358cd939621294765249e1e5d68e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d101_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet101_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..24653a9e56b982b150ced4157c486428a34f9d04
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d152_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet152_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..48bcfec5eb5017036168fae73396d809fcb3f567
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnetv1d50_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNetV1d',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://resnet50_v1d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..30afb101037cc31d9dd51ac02487e5ef749921c7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_resnext152_8xb32-210e_mpii-256x256.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNeXt',
+ depth=152,
+ init_cfg=dict(
+ type='Pretrained', checkpoint='mmcls://resnext152_32x4d'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb5c6b702c28300525db4137973889967af9d09c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet101_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=101,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet101-94250a77.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2f7723724b80d730f70d00f7649adb5935a10fc
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_scnet50_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..56b7fccb2e121fdd9734f9a43963f7fe1cc7511c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet101_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..79bb29e4b34fba243bca0635df2d8548e19ed76b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet152_8xb32-210e_mpii-256x256.py
@@ -0,0 +1,116 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=152,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..257dc360ad1ea41cec56d57bd4de19a59146a7a5
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_seresnet50_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SEResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://se-resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..83eaca208f237d6eff8b7930e36bc91213af4fdf
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv1_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ShuffleNetV1',
+ groups=3,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v1'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=960,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd05c23596c21c7aa2f491c7e95399f2ec1126c7
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/mpii/td-hm_shufflenetv2_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,117 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ShuffleNetV2',
+ widen_factor=1.0,
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://shufflenet_v2'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=16,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file='data/mpii/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.md
new file mode 100644
index 0000000000000000000000000000000000000000..5d26a103db205eca0a9466a2f362ed29b1c64d0f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.md
@@ -0,0 +1,55 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+Results on PoseTrack2018 val with ground-truth bounding boxes
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--------------------------------------------------- | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :---: | :---------------------------------------------------: | :--------------------------------------------------: |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py) | 256x192 | 86.2 | 89.0 | 84.5 | 79.2 | 82.3 | 82.5 | 78.7 | 83.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192_20201028.log.json) |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py) | 384x288 | 87.1 | 89.0 | 85.1 | 80.2 | 80.6 | 82.8 | 79.6 | 83.7 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py) | 256x192 | 88.3 | 90.2 | 86.0 | 81.0 | 80.7 | 83.3 | 80.6 | 84.6 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py) | 384x288 | 87.8 | 90.0 | 86.2 | 81.3 | 81.0 | 83.4 | 80.9 | 84.6 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288_20211130.log.json) |
+
+The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
+
+Results on PoseTrack2018 val with [MMDetection](https://github.com/open-mmlab/mmdetection) pre-trained [Cascade R-CNN](https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth) (X-101-64x4d-FPN) human detector
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--------------------------------------------------- | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :---: | :---------------------------------------------------: | :--------------------------------------------------: |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py) | 256x192 | 78.0 | 82.9 | 79.5 | 73.8 | 76.9 | 76.6 | 70.2 | 76.9 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192_20201028.log.json) |
+| [pose_hrnet_w32](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py) | 384x288 | 79.9 | 83.6 | 80.4 | 74.5 | 74.8 | 76.1 | 70.5 | 77.3 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py) | 256x192 | 80.1 | 83.4 | 80.6 | 74.8 | 74.3 | 76.8 | 70.5 | 77.4 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192_20211130.log.json) |
+| [pose_hrnet_w48](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py) | 384x288 | 80.2 | 83.8 | 80.9 | 75.2 | 74.7 | 76.7 | 71.7 | 77.8 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288_20211130.log.json) |
+
+The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a0dcc78f7c65b0e712caa6e4f4204bfc8a3d8626
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/hrnet_posetrack18.yml
@@ -0,0 +1,154 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 78.7
+ Elb: 84.5
+ Head: 86.2
+ Hip: 82.3
+ Knee: 82.5
+ Shou: 89
+ Total: 83.4
+ Wri: 79.2
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 79.6
+ Elb: 84.5
+ Head: 87.1
+ Hip: 80.6
+ Knee: 82.8
+ Shou: 89
+ Total: 83.7
+ Wri: 80.2
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 79.6
+ Elb: 85.1
+ Head: 88.3
+ Hip: 80.6
+ Knee: 82.8
+ Shou: 90.2
+ Total: 84.6
+ Wri: 81
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 80.6
+ Elb: 86.2
+ Head: 87.8
+ Hip: 81
+ Knee: 83.4
+ Shou: 90
+ Total: 84.6
+ Wri: 81.3
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 70.2
+ Elb: 79.5
+ Head: 78.0
+ Hip: 76.9
+ Knee: 76.6
+ Shou: 82.9
+ Total: 76.9
+ Wri: 73.8
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_256x192-1ee951c4_20201028.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 70.5
+ Elb: 80.4
+ Head: 79.9
+ Hip: 74.8
+ Knee: 76.1
+ Shou: 83.6
+ Total: 77.3
+ Wri: 74.5
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_posetrack18_384x288-806f00a3_20211130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 70.4
+ Elb: 80.6
+ Head: 80.1
+ Hip: 74.3
+ Knee: 76.8
+ Shou: 83.4
+ Total: 77.4
+ Wri: 74.8
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_256x192-b5d9b3f1_20211130.pth
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: PoseTrack18
+ Name: td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 71.7
+ Elb: 80.9
+ Head: 80.2
+ Hip: 74.7
+ Knee: 76.7
+ Shou: 83.8
+ Total: 77.8
+ Wri: 75.2
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_posetrack18_384x288-5fd6d3ff_20211130.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.md b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.md
new file mode 100644
index 0000000000000000000000000000000000000000..86f476e5b7d0cbfef712e822d660ca6a91f78849
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.md
@@ -0,0 +1,58 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+PoseTrack18 (CVPR'2018)
+
+```bibtex
+@inproceedings{andriluka2018posetrack,
+ title={Posetrack: A benchmark for human pose estimation and tracking},
+ author={Andriluka, Mykhaylo and Iqbal, Umar and Insafutdinov, Eldar and Pishchulin, Leonid and Milan, Anton and Gall, Juergen and Schiele, Bernt},
+ booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+ pages={5167--5176},
+ year={2018}
+}
+```
+
+
+
+Results on PoseTrack2018 val with ground-truth bounding boxes
+
+| Arch | Input Size | Head | Shou | Elb | Wri | Hip | Knee | Ankl | Total | ckpt | log |
+| :--------------------------------------------------- | :--------: | :--: | :--: | :--: | :--: | :--: | :--: | :--: | :---: | :---------------------------------------------------: | :--------------------------------------------------: |
+| [pose_resnet_50](/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py) | 256x192 | 86.5 | 87.7 | 82.5 | 75.8 | 80.1 | 78.8 | 74.2 | 81.2 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192_20201028.log.json) |
+
+The models are first pre-trained on COCO dataset, and then fine-tuned on PoseTrack18.
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.yml b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.yml
new file mode 100644
index 0000000000000000000000000000000000000000..478ffa247e660611a0f4eca6dbf594188ff9b7c2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/resnet_posetrack18.yml
@@ -0,0 +1,22 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: PoseTrack18
+ Name: td-hm_res50_8xb64-20e_posetrack18-256x192
+ Results:
+ - Dataset: PoseTrack18
+ Metrics:
+ Ankl: 74.2
+ Elb: 82.5
+ Head: 86.5
+ Hip: 80.1
+ Knee: 78.8
+ Shou: 87.7
+ Total: 81.2
+ Wri: 75.8
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_posetrack18_256x192-a62807c7_20201028.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe8e385f1daac0ac4df7a805203a88e87f487730
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-256x192.py
@@ -0,0 +1,155 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[10, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='posetrack18/Total AP', rule='greater', interval=1))
+
+# load from the pretrained model
+load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-256x192-81c58e40_20220909.pth' # noqa: E501
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'PoseTrack18Dataset'
+data_mode = 'topdown'
+data_root = 'data/posetrack18/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_val.json',
+ # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+ filter_cfg=dict(bbox_score_thr=0.4),
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='PoseTrack18Metric',
+ ann_file=data_root + 'annotations/posetrack18_val.json',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..513207441068ff0dcf37a98e995d3be47baf4817
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w32_8xb64-20e_posetrack18-384x288.py
@@ -0,0 +1,155 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[10, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='posetrack18/Total AP', rule='greater', interval=1))
+
+# load from the pretrained model
+load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w32_8xb64-210e_coco-384x288-ca5956af_20220909.pth' # noqa: E501
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'PoseTrack18Dataset'
+data_mode = 'topdown'
+data_root = 'data/posetrack18/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_val.json',
+ # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+ filter_cfg=dict(bbox_score_thr=0.4),
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='PoseTrack18Metric',
+ ann_file=data_root + 'annotations/posetrack18_val.json',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..cac23f14e47b4ba1f6ed5cb6c43ea6c11c5e89ad
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-256x192.py
@@ -0,0 +1,155 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[10, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='posetrack18/Total AP', rule='greater', interval=1))
+
+# load from the pretrained model
+load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth' # noqa: E501
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'PoseTrack18Dataset'
+data_mode = 'topdown'
+data_root = 'data/posetrack18/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_val.json',
+ # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+ filter_cfg=dict(bbox_score_thr=0.4),
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='PoseTrack18Metric',
+ ann_file=data_root + 'annotations/posetrack18_val.json',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ee99469fed8ae914e7aa91b3a32281f9f18ca1b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_hrnet-w48_8xb64-20e_posetrack18-384x288.py
@@ -0,0 +1,155 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[10, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='posetrack18/Total AP', rule='greater', interval=1))
+
+# load from the pretrained model
+load_from = 'https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_heatmap/coco/td-hm_hrnet-w48_8xb32-210e_coco-384x288-c161b7de_20220915.pth' # noqa: E501
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=17,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'PoseTrack18Dataset'
+data_mode = 'topdown'
+data_root = 'data/posetrack18/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_val.json',
+ # comment `bbox_file` and '`filter_cfg` if use gt bbox for evaluation
+ bbox_file='data/posetrack18/annotations/'
+ 'posetrack18_val_human_detections.json',
+ filter_cfg=dict(bbox_score_thr=0.4),
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='PoseTrack18Metric',
+ ann_file=data_root + 'annotations/posetrack18_val.json',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8e529d120733235c82e8088cb983127cf35f95d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_heatmap/posetrack18/td-hm_res50_8xb64-20e_posetrack18-256x192.py
@@ -0,0 +1,126 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=20, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=20,
+ milestones=[10, 15],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='posetrack18/Total AP', rule='greater', interval=1))
+
+# load from the pretrained model
+load_from = 'https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_256x192-ec54d7f3_20200709.pth' # noqa: E501
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=17,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'PoseTrack18Dataset'
+data_mode = 'topdown'
+data_root = 'data/posetrack18/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/posetrack18_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='PoseTrack18Metric',
+ ann_file=data_root + 'annotations/posetrack18_val.json',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/README.md b/mmpose/configs/body_2d_keypoint/topdown_regression/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..adc278ce0e5363f1c7afdaac6ee6d3b05ef3a9d3
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/README.md
@@ -0,0 +1,32 @@
+# Top-down regression-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, regression based methods directly regress the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Deeppose: Human pose estimation via deep neural networks](http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html).
+
+
+

+
+
+## Results and Models
+
+### COCO Dataset
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Model | Input Size | AP | AR | Details and Download |
+| :--------------: | :--------: | :---: | :---: | :-------------------------------------------------------: |
+| ResNet-152+RLE | 256x192 | 0.731 | 0.805 | [resnet_rle_coco.md](./coco/resnet_rle_coco.md) |
+| ResNet-101+RLE | 256x192 | 0.722 | 0.768 | [resnet_rle_coco.md](./coco/resnet_rle_coco.md) |
+| ResNet-50+RLE | 256x192 | 0.706 | 0.768 | [resnet_rle_coco.md](./coco/resnet_rle_coco.md) |
+| MobileNet-v2+RLE | 256x192 | 0.593 | 0.644 | [mobilenetv2_rle_coco.md](./coco/mobilenetv2_rle_coco.md) |
+| ResNet-152 | 256x192 | 0.584 | 0.688 | [resnet_coco.md](./coco/resnet_coco.md) |
+| ResNet-101 | 256x192 | 0.562 | 0.670 | [resnet_coco.md](./coco/resnet_coco.md) |
+| ResNet-50 | 256x192 | 0.528 | 0.639 | [resnet_coco.md](./coco/resnet_coco.md) |
+
+### MPII Dataset
+
+| Model | Input Size | PCKh@0.5 | PCKh@0.1 | Details and Download |
+| :-----------: | :--------: | :------: | :------: | :---------------------------------------------: |
+| ResNet-50+RLE | 256x256 | 0.861 | 0.277 | [resnet_rle_mpii.md](./mpii/resnet_rle_mpii.md) |
+| ResNet-152 | 256x256 | 0.850 | 0.208 | [resnet_mpii.md](./mpii/resnet_mpii.md) |
+| ResNet-101 | 256x256 | 0.841 | 0.200 | [resnet_mpii.md](./mpii/resnet_mpii.md) |
+| ResNet-50 | 256x256 | 0.826 | 0.180 | [resnet_mpii.md](./mpii/resnet_mpii.md) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..eddf5a79d31d974bf9a0e1d0fe128b32f5fa6065
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.md
@@ -0,0 +1,74 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+RLE (ICCV'2021)
+
+```bibtex
+@inproceedings{li2021human,
+ title={Human pose regression with residual log-likelihood estimation},
+ author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
+ booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+ pages={11025--11034},
+ year={2021}
+}
+```
+
+
+
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [deeppose_mobilenetv2_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.593 | 0.836 | 0.660 | 0.644 | 0.877 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c0f470432b444bdc5dced66291bc91d7a8bd18a2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/mobilenetv2_rle_coco.yml
@@ -0,0 +1,20 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py
+ In Collection: RLE
+ Metadata:
+ Architecture: &id001
+ - DeepPose
+ - RLE
+ - MobileNet
+ Training Data: COCO
+ Name: td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.593
+ AP@0.5: 0.836
+ AP@0.75: 0.66
+ AR: 0.644
+ AR@0.5: 0.877
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192-39b73bd5_20220922.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..77ed459aeda6a43d01b0219812c90509b8414282
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.md
@@ -0,0 +1,59 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py) | 256x192 | 0.541 | 0.824 | 0.601 | 0.649 | 0.893 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.log.json) |
+| [deeppose_resnet_101](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py) | 256x192 | 0.562 | 0.831 | 0.629 | 0.670 | 0.900 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_20210205.log.json) |
+| [deeppose_resnet_152](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py) | 256x192 | 0.584 | 0.842 | 0.659 | 0.688 | 0.907 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_20210205.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e66b3043c6dfe8f9171e21b31cb6d3ae6d283932
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_coco.yml
@@ -0,0 +1,57 @@
+Collections:
+- Name: DeepPose
+ Paper:
+ Title: "Deeppose: Human pose estimation via deep neural networks"
+ URL: http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/algorithms/deeppose.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture: &id001
+ - DeepPose
+ - ResNet
+ Training Data: COCO
+ Name: td-reg_res50_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.541
+ AP@0.5: 0.824
+ AP@0.75: 0.601
+ AR: 0.649
+ AR@0.5: 0.893
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192-72ef04f3_20220913.pth
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-reg_res101_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.562
+ AP@0.5: 0.831
+ AP@0.75: 0.629
+ AR: 0.67
+ AR@0.5: 0.9
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192-2f247111_20210205.pth
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-reg_res152_8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.584
+ AP@0.5: 0.842
+ AP@0.75: 0.659
+ AR: 0.688
+ AR@0.5: 0.907
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192-7df89a88_20210205.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md
new file mode 100644
index 0000000000000000000000000000000000000000..d3f4f5a2883ef69ddbfd11921a630758151a0be2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.md
@@ -0,0 +1,78 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+RLE (ICCV'2021)
+
+```bibtex
+@inproceedings{li2021human,
+ title={Human pose regression with residual log-likelihood estimation},
+ author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
+ booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+ pages={11025--11034},
+ year={2021}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log |
+| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
+| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.706 | 0.888 | 0.776 | 0.753 | 0.924 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.log.json) |
+| [deeppose_resnet_50_rle_pretrained](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py) | 256x192 | 0.719 | 0.891 | 0.788 | 0.764 | 0.925 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.log.json) |
+| [deeppose_resnet_101_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.722 | 0.894 | 0.794 | 0.768 | 0.930 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle-16c3d461_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle_20220615.log.json) |
+| [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py) | 256x192 | 0.731 | 0.897 | 0.805 | 0.777 | 0.933 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle-c05bdccf_20220615.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle_20220615.log.json) |
+| [deeppose_resnet_152_rle](/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py) | 384x288 | 0.749 | 0.901 | 0.815 | 0.793 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle-b77c4c37_20220624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle_20220624.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.yml
new file mode 100644
index 0000000000000000000000000000000000000000..97ae41b8f2af552b5f0e77264baf86f308912ecc
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/resnet_rle_coco.yml
@@ -0,0 +1,90 @@
+Collections:
+- Name: RLE
+ Paper:
+ Title: Human pose regression with residual log-likelihood estimation
+ URL: https://arxiv.org/abs/2107.11291
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/techniques/rle.md
+Models:
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py
+ In Collection: RLE
+ Metadata:
+ Architecture: &id001
+ - DeepPose
+ - RLE
+ - ResNet
+ Training Data: COCO
+ Name: td-reg_res50_rle-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.706
+ AP@0.5: 0.888
+ AP@0.75: 0.776
+ AR: 0.753
+ AR@0.5: 0.924
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192-d37efd64_20220913.pth
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py
+ In Collection: RLE
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.719
+ AP@0.5: 0.891
+ AP@0.75: 0.788
+ AR: 0.764
+ AR@0.5: 0.925
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192-2cb494ee_20220913.pth
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py
+ In Collection: RLE
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-reg_res101_rle-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.722
+ AP@0.5: 0.894
+ AP@0.75: 0.794
+ AR: 0.768
+ AR@0.5: 0.93
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_coco_256x192_rle-16c3d461_20220615.pth
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py
+ In Collection: RLE
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-reg_res152_rle-8xb64-210e_coco-256x192
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.731
+ AP@0.5: 0.897
+ AP@0.75: 0.805
+ AR: 0.777
+ AR@0.5: 0.933
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_256x192_rle-c05bdccf_20220615.pth
+- Config: configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py
+ In Collection: RLE
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO
+ Name: td-reg_res152_rle-8xb64-210e_coco-384x288
+ Results:
+ - Dataset: COCO
+ Metrics:
+ AP: 0.749
+ AP@0.5: 0.901
+ AP@0.75: 0.815
+ AR: 0.793
+ AR@0.5: 0.935
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_coco_384x288_rle-b77c4c37_20220624.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..97f5d926c66be84ef1bc8fb8f1f187730cebd46d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_mobilenetv2_rle-pretrained-8xb64-210e_coco-256x192.py
@@ -0,0 +1,126 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/top_down/'
+ 'mobilenetv2/mobilenetv2_coco_256x192-d1e58e7b_20200727.pth')),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=1280,
+ num_joints=17,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ),
+)
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json',
+ score_mode='bbox_rle')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..94f35d0fc36c749638ff397f5af5eb50a006894f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_8xb64-210e_coco-256x192.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..21b4a3cdcbab80fa080ca90581a6ab3ee44fdbe4
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res101_rle-8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json',
+ score_mode='bbox_rle')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa56fba4987e9f4c6c4f0e284e5949c0c6f46d6c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_8xb64-210e_coco-256x192.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2a832b652b33aaa629fdb4a07863f223051461f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json',
+ score_mode='bbox_rle')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d319e927eb21ddcb71e40ecae1050c3421871d2
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res152_rle-8xb64-210e_coco-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(288, 384))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json',
+ score_mode='bbox_rle')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa7e487acf470dfbd988979ffb7570f72d409df0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_8xb64-210e_coco-256x192.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..db530f6ec4f065fa16228ee66fee33db5afddc4f
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-8xb64-210e_coco-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json',
+ score_mode='bbox_rle')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6b74aba7f3c138901d35652c9b7f19bebf23cceb
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/coco/td-reg_res50_rle-pretrained-8xb64-210e_coco-256x192.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=1e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=train_cfg['max_epochs'],
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(192, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/td-hm_res50_8xb64-210e_coco-256x192.pth'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=2048,
+ num_joints=17,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+test_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_train2017.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/person_keypoints_val2017.json',
+ bbox_file=f'{data_root}person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=test_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))
+
+# evaluators
+val_evaluator = dict(
+ type='CocoMetric',
+ ann_file=f'{data_root}annotations/person_keypoints_val2017.json',
+ score_mode='bbox_rle')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..150fd48020f0e47e63e5e2356bc91ae29499c546
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.md
@@ -0,0 +1,58 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [deeppose_resnet_50](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py) | 256x256 | 0.826 | 0.180 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256-c63cd0b6_20210203.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_20210203.log.json) |
+| [deeppose_resnet_101](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py) | 256x256 | 0.841 | 0.200 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256-87516a90_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256_20210205.log.json) |
+| [deeppose_resnet_152](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py) | 256x256 | 0.850 | 0.208 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256-15f5e6f9_20210205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256_20210205.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a744083e97a054b07c89b0d283189ef51f236bf0
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_mpii.yml
@@ -0,0 +1,42 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture: &id001
+ - DeepPose
+ - ResNet
+ Training Data: MPII
+ Name: td-reg_res50_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.826
+ Mean@0.1: 0.18
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256-c63cd0b6_20210203.pth
+- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-reg_res101_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.841
+ Mean@0.1: 0.2
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res101_mpii_256x256-87516a90_20210205.pth
+- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture: *id001
+ Training Data: MPII
+ Name: td-reg_res152_8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.85
+ Mean@0.1: 0.208
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res152_mpii_256x256-15f5e6f9_20210205.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.md b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.md
new file mode 100644
index 0000000000000000000000000000000000000000..bf3a67a49a41be72076fa2831902aa194d17d346
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.md
@@ -0,0 +1,73 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+RLE (ICCV'2021)
+
+```bibtex
+@inproceedings{li2021human,
+ title={Human pose regression with residual log-likelihood estimation},
+ author={Li, Jiefeng and Bian, Siyuan and Zeng, Ailing and Wang, Can and Pang, Bo and Liu, Wentao and Lu, Cewu},
+ booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
+ pages={11025--11034},
+ year={2021}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+MPII (CVPR'2014)
+
+```bibtex
+@inproceedings{andriluka14cvpr,
+ author = {Mykhaylo Andriluka and Leonid Pishchulin and Peter Gehler and Schiele, Bernt},
+ title = {2D Human Pose Estimation: New Benchmark and State of the Art Analysis},
+ booktitle = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
+ year = {2014},
+ month = {June}
+}
+```
+
+
+
+Results on MPII val set
+
+| Arch | Input Size | Mean | Mean@0.1 | ckpt | log |
+| :---------------------------------------------------------- | :--------: | :---: | :------: | :---------------------------------------------------------: | :---------------------------------------------------------: |
+| [deeppose_resnet_50_rle](/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py) | 256x256 | 0.861 | 0.277 | [ckpt](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_rle-5f92a619_20220504.pth) | [log](https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_rle_20220504.log.json) |
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.yml b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a03586d42cd7690154d336444e948ea317dbfc9c
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/resnet_rle_mpii.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py
+ In Collection: RLE
+ Metadata:
+ Architecture:
+ - DeepPose
+ - RLE
+ - ResNet
+ Training Data: MPII
+ Name: td-reg_res50_rle-8xb64-210e_mpii-256x256
+ Results:
+ - Dataset: MPII
+ Metrics:
+ Mean: 0.861
+ Mean@0.1: 0.277
+ Task: Body 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/deeppose/deeppose_res50_mpii_256x256_rle-5f92a619_20220504.pth
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c7821f91b1161491ad2166b36bd582e194f384b
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res101_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,116 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=16,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1a19b0d6e720c9f60e19d62a8712e532390cc84
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res152_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,118 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=16,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+file_client_args = dict(backend='disk')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', file_client_args=file_client_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..901fd4b8d61c2aa7a5cc920d1590acf8a4ece88d
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_8xb64-210e_mpii-256x256.py
@@ -0,0 +1,116 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=16,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d46484755dec533fe5519a782de86404bf9986e
--- /dev/null
+++ b/mmpose/configs/body_2d_keypoint/topdown_regression/mpii/td-reg_res50_rle-8xb64-210e_mpii-256x256.py
@@ -0,0 +1,116 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RLEHead',
+ in_channels=2048,
+ num_joints=16,
+ loss=dict(type='RLELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'MpiiDataset'
+data_mode = 'topdown'
+data_root = 'data/mpii/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform', shift_prob=0),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/mpii_val.json',
+ headbox_file=f'{data_root}/annotations/mpii_gt_val.mat',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
+
+# evaluators
+val_evaluator = dict(type='MpiiPCKAccuracy')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/body_3d_keypoint/README.md b/mmpose/configs/body_3d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b67f7ce7ac7570c8903cb437080358ab34984f88
--- /dev/null
+++ b/mmpose/configs/body_3d_keypoint/README.md
@@ -0,0 +1,13 @@
+# Human Body 3D Pose Estimation
+
+3D human body pose estimation aims at predicting the X, Y, Z coordinates of human body joints. Based on the camera number to capture the images or videos, existing works can be further divided into multi-view methods and single-view (monocular) methods.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/3d_body_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/en/3d_human_pose_demo.md) to run demos.
+
+
diff --git a/mmpose/configs/face_2d_keypoint/README.md b/mmpose/configs/face_2d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9f9370a754902883013e479779a5db7acb2c9699
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/README.md
@@ -0,0 +1,16 @@
+# 2D Face Landmark Detection
+
+2D face landmark detection (also referred to as face alignment) is defined as the task of detecting the face keypoints from an input image.
+
+Normally, the input images are cropped face images, where the face locates at the center;
+or the rough location (or the bounding box) of the hand is provided.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_face_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/en/2d_face_demo.md) to run demos.
+
+
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/README.md b/mmpose/configs/face_2d_keypoint/rtmpose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..d0c7f55fb42d9501dbfac7511e1097d4a5aa8c1d
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/README.md
@@ -0,0 +1,32 @@
+# RTMPose
+
+Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency.
+In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose.
+Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries.
+To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device.
+
+## Results and Models
+
+### COCO-WholeBody-Face Dataset
+
+Results on COCO-WholeBody-Face val set
+
+| Model | Input Size | NME | Details and Download |
+| :-------: | :--------: | :----: | :------------------------------------------------------------------------------------: |
+| RTMPose-m | 256x256 | 0.0466 | [rtmpose_coco_wholebody_face.md](./coco_wholebody_face/rtmpose_coco_wholebody_face.md) |
+
+### WFLW Dataset
+
+Results on WFLW dataset
+
+| Model | Input Size | NME | Details and Download |
+| :-------: | :--------: | :--: | :---------------------------------------: |
+| RTMPose-m | 256x256 | 4.01 | [rtmpose_wflw.md](./wflw/rtmpose_wflw.md) |
+
+### LaPa Dataset
+
+Results on LaPa dataset
+
+| Model | Input Size | NME | Details and Download |
+| :-------: | :--------: | :--: | :---------------------------------------: |
+| RTMPose-m | 256x256 | 1.29 | [rtmpose_lapa.md](./lapa/rtmpose_lapa.md) |
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..41c9309707229734a53d3c93a439624068fb08b3
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 60
+stage2_num_epochs = 10
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=1)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=68,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyFaceDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='NME', rule='less', max_keep_ckpts=1, interval=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..77d99bc63f7452b80e2983341794326a20c80fa1
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.md
@@ -0,0 +1,39 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [pose_rtmpose_m](/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0466 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-face_pt-aic-coco_60e-256x256-62026ef2_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-face_pt-aic-coco_60e-256x256-62026ef2_20230228.json) |
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fdc2599e713aa710c102a71c67906090600ef6d6
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose_coco_wholebody_face.yml
@@ -0,0 +1,14 @@
+Models:
+- Config: configs/face_2d_keypoint/rtmpose/coco_wholebody_face/rtmpose-m_8xb32-60e_coco-wholebody-face-256x256.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture:
+ - RTMPose
+ Training Data: COCO-WholeBody-Face
+ Name: rtmpose-m_8xb32-60e_coco-wholebody-face-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0466
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-face_pt-aic-coco_60e-256x256-62026ef2_20230228.pth
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4124ff6d8db6956df8ec0aabacc5a5d0a93db6e
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py
@@ -0,0 +1,247 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 120
+stage2_num_epochs = 10
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=1)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=106,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'LapaDataset'
+data_mode = 'topdown'
+data_root = 'data/LaPa/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/',
+# f'{data_root}': 's3://openmmlab/datasets/pose/LaPa/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(type='PhotometricDistortion'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.2),
+ dict(type='MedianBlur', p=0.2),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/lapa_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/lapa_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/lapa_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='NME', rule='less', max_keep_ckpts=1, interval=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md
new file mode 100644
index 0000000000000000000000000000000000000000..9638de7551c0e0cabaa2ca1ba606bf8abc42b311
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.md
@@ -0,0 +1,40 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+LaPa (AAAI'2020)
+
+```bibtex
+@inproceedings{liu2020new,
+ title={A New Dataset and Boundary-Attention Semantic Segmentation for Face Parsing.},
+ author={Liu, Yinglu and Shi, Hailin and Shen, Hao and Si, Yue and Wang, Xiaobo and Mei, Tao},
+ booktitle={AAAI},
+ pages={11637--11644},
+ year={2020}
+}
+```
+
+
+
+Results on LaPa val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: |
+| [pose_rtmpose_m](/configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py) | 256x256 | 1.29 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.json) |
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml
new file mode 100644
index 0000000000000000000000000000000000000000..96acff8de6c25f064622a9711565ed0ffc594912
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/lapa/rtmpose_lapa.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/rtmpose/lapa/rtmpose-m_8xb64-120e_lapa-256x256.py
+ In Collection: RTMPose
+ Alias: face
+ Metadata:
+ Architecture:
+ - RTMPose
+ Training Data: LaPa
+ Name: rtmpose-m_8xb64-120e_lapa-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME: 1.29
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-lapa_pt-aic-coco_120e-256x256-762b1ae2_20230422.pth
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3f8c06e43b87c330612835c3db7af89583493d2
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py
@@ -0,0 +1,232 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 60
+stage2_num_epochs = 10
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=1)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=98,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/pose/WFLW/',
+# f'{data_root}': 's3://openmmlab/datasets/pose/WFLW/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='NME', rule='less', max_keep_ckpts=1, interval=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.md b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..b0070258da1b81c6ee5bd7ebe198eae968067f80
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.md
@@ -0,0 +1,42 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: |
+| [pose_rtmpose_m](/configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py) | 256x256 | 4.01 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-wflw_pt-aic-coco_60e-256x256-dc1dcdcf_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-wflw_pt-aic-coco_60e-256x256-dc1dcdcf_20230228.json) |
diff --git a/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.yml b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..deee03a7ddc8fb9205bbf505856a90633885cc38
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/rtmpose/wflw/rtmpose_wflw.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/rtmpose/wflw/rtmpose-m_8xb64-60e_wflw-256x256.py
+ In Collection: RTMPose
+ Alias: face
+ Metadata:
+ Architecture:
+ - RTMPose
+ Training Data: WFLW
+ Name: rtmpose-m_8xb64-60e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME: 4.01
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-wflw_pt-aic-coco_60e-256x256-dc1dcdcf_20230228.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.md
new file mode 100644
index 0000000000000000000000000000000000000000..ace8776c4e28e66559e4dcecec0785e6ef5a0771
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.md
@@ -0,0 +1,44 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+300W (IMAVIS'2016)
+
+```bibtex
+@article{sagonas2016300,
+ title={300 faces in-the-wild challenge: Database and results},
+ author={Sagonas, Christos and Antonakos, Epameinondas and Tzimiropoulos, Georgios and Zafeiriou, Stefanos and Pantic, Maja},
+ journal={Image and vision computing},
+ volume={47},
+ pages={3--18},
+ year={2016},
+ publisher={Elsevier}
+}
+```
+
+
+
+Results on 300W dataset
+
+The model is trained on 300W train.
+
+| Arch | Input Size | NME*common* | NME*challenge* | NME*full* | NME*test* | ckpt | log |
+| :--------------------------------- | :--------: | :--------------------: | :-----------------------: | :------------------: | :------------------: | :---------------------------------: | :--------------------------------: |
+| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py) | 256x256 | 2.92 | 5.64 | 3.45 | 4.10 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256-eea53406_20211019.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256_20211019.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.yml
new file mode 100644
index 0000000000000000000000000000000000000000..58dcb4832ac5824e375f5d8dc66f6648626528f8
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/hrnetv2_300w.yml
@@ -0,0 +1,23 @@
+Collections:
+- Name: HRNetv2
+ Paper:
+ Title: Deep High-Resolution Representation Learning for Visual Recognition
+ URL: https://ieeexplore.ieee.org/abstract/document/9052469/
+ README: https://github.com/open-mmlab/mmpose/blob/main/docs/src/papers/backbones/hrnetv2.md
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: 300W
+ Name: td-hm_hrnetv2-w18_8xb64-60e_300w-256x256
+ Results:
+ - Dataset: 300W
+ Metrics:
+ NME challenge: 5.64
+ NME common: 2.92
+ NME full: 3.45
+ NME test: 4.1
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_300w_256x256-eea53406_20211019.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..52473a4664cca8266f603729d1a631aa6dc5b4ca
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/300w/td-hm_hrnetv2-w18_8xb64-60e_300w-256x256.py
@@ -0,0 +1,161 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=60,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=1.5)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=68,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'Face300WDataset'
+data_mode = 'topdown'
+data_root = 'data/300w/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_300w_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_300w_valid.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..a8b7cf98fa119c4a1065484b24bd768196a3622d
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/README.md
@@ -0,0 +1,57 @@
+# Top-down heatmap-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html).
+
+
+

+
+
+## Results and Models
+
+### 300W Dataset
+
+Results on 300W dataset
+
+| Model | Input Size | NME*common* | NME*challenge* | NME*full* | NME*test* | Details and Download |
+| :---------: | :--------: | :--------------------: | :-----------------------: | :------------------: | :------------------: | :---------------------------------------: |
+| HRNetv2-w18 | 256x256 | 2.92 | 5.64 | 3.45 | 4.10 | [hrnetv2_300w.md](./300w/hrnetv2_300w.md) |
+
+### AFLW Dataset
+
+Results on AFLW dataset
+
+| Model | Input Size | NME*full* | NME*frontal* | Details and Download |
+| :--------------: | :--------: | :------------------: | :---------------------: | :-------------------------------------------------: |
+| HRNetv2-w18+Dark | 256x256 | 1.35 | 1.19 | [hrnetv2_dark_aflw.md](./aflw/hrnetv2_dark_aflw.md) |
+| HRNetv2-w18 | 256x256 | 1.41 | 1.27 | [hrnetv2_aflw.md](./aflw/hrnetv2_aflw.md) |
+
+### COCO-WholeBody-Face Dataset
+
+Results on COCO-WholeBody-Face val set
+
+| Model | Input Size | NME | Details and Download |
+| :--------------: | :--------: | :----: | :----------------------------------------------------------------------------------------------: |
+| HRNetv2-w18+Dark | 256x256 | 0.0513 | [hrnetv2_dark_coco_wholebody_face.md](./coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md) |
+| SCNet-50 | 256x256 | 0.0567 | [scnet_coco_wholebody_face.md](./coco_wholebody_face/scnet_coco_wholebody_face.md) |
+| HRNetv2-w18 | 256x256 | 0.0569 | [hrnetv2_coco_wholebody_face.md](./coco_wholebody_face/hrnetv2_coco_wholebody_face.md) |
+| ResNet-50 | 256x256 | 0.0582 | [resnet_coco_wholebody_face.md](./coco_wholebody_face/resnet_coco_wholebody_face.md) |
+| HourglassNet | 256x256 | 0.0587 | [hourglass_coco_wholebody_face.md](./coco_wholebody_face/hourglass_coco_wholebody_face.md) |
+| MobileNet-v2 | 256x256 | 0.0611 | [mobilenetv2_coco_wholebody_face.md](./coco_wholebody_face/mobilenetv2_coco_wholebody_face.md) |
+
+### COFW Dataset
+
+Results on COFW dataset
+
+| Model | Input Size | NME | Details and Download |
+| :---------: | :--------: | :--: | :---------------------------------------: |
+| HRNetv2-w18 | 256x256 | 3.48 | [hrnetv2_cofw.md](./cofw/hrnetv2_cofw.md) |
+
+### WFLW Dataset
+
+Results on WFLW dataset
+
+| Model | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | Details and Download |
+| :-----: | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------------------: |
+| HRNetv2-w18+Dark | 256x256 | 3.98 | 6.98 | 3.96 | 4.78 | 4.56 | 3.89 | 4.29 | [hrnetv2_dark_wflw.md](./wflw/hrnetv2_dark_wflw.md) |
+| HRNetv2-w18+AWing | 256x256 | 4.02 | 6.94 | 3.97 | 4.78 | 4.59 | 3.87 | 4.28 | [hrnetv2_awing_wflw.md](./wflw/hrnetv2_awing_wflw.md) |
+| HRNetv2-w18 | 256x256 | 4.06 | 6.97 | 3.99 | 4.83 | 4.58 | 3.94 | 4.33 | [hrnetv2_wflw.md](./wflw/hrnetv2_wflw.md) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..70c59ac2e4ca7a58db9057f01d2af3c17ce5785d
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.md
@@ -0,0 +1,43 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AFLW (ICCVW'2011)
+
+```bibtex
+@inproceedings{koestinger2011annotated,
+ title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
+ author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
+ booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
+ pages={2144--2151},
+ year={2011},
+ organization={IEEE}
+}
+```
+
+
+
+Results on AFLW dataset
+
+The model is trained on AFLW train and evaluated on AFLW full and frontal.
+
+| Arch | Input Size | NME*full* | NME*frontal* | ckpt | log |
+| :------------------------------------------------ | :--------: | :------------------: | :---------------------: | :-----------------------------------------------: | :-----------------------------------------------: |
+| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py) | 256x256 | 1.41 | 1.27 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256_20210125.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..06d2d43b9c1983c2c4d43d715b08721a822ffed3
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_aflw.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: AFLW
+ Name: td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256
+ Results:
+ - Dataset: AFLW
+ Metrics:
+ NME frontal: 1.27
+ NME full: 1.41
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_aflw_256x256-f2bbc62b_20210125.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..a51c473d3b243f7f773a851bb69c425b14443767
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.md
@@ -0,0 +1,60 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+AFLW (ICCVW'2011)
+
+```bibtex
+@inproceedings{koestinger2011annotated,
+ title={Annotated facial landmarks in the wild: A large-scale, real-world database for facial landmark localization},
+ author={Koestinger, Martin and Wohlhart, Paul and Roth, Peter M and Bischof, Horst},
+ booktitle={2011 IEEE international conference on computer vision workshops (ICCV workshops)},
+ pages={2144--2151},
+ year={2011},
+ organization={IEEE}
+}
+```
+
+
+
+Results on AFLW dataset
+
+The model is trained on AFLW train and evaluated on AFLW full and frontal.
+
+| Arch | Input Size | NME*full* | NME*frontal* | ckpt | log |
+| :------------------------------------------------ | :--------: | :------------------: | :---------------------: | :-----------------------------------------------: | :-----------------------------------------------: |
+| [pose_hrnetv2_w18_dark](/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py) | 256x256 | 1.35 | 1.19 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark-219606c0_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark_20210125.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..54c09538974835c5a701de61f41c812d2813940a
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/hrnetv2_dark_aflw.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: AFLW
+ Name: td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256
+ Results:
+ - Dataset: AFLW
+ Metrics:
+ NME frontal: 1.19
+ NME full: 1.34
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_aflw_256x256_dark-219606c0_20210125.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a157a01442f155d34f7fd330014028bc77c4f888
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_8xb64-60e_aflw-256x256.py
@@ -0,0 +1,156 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=60,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=19,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/aflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_aflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_aflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME', norm_mode='use_norm_item', norm_item='bbox_size')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..44100cebe60bbe023837dba7586f2c913b731918
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/aflw/td-hm_hrnetv2-w18_dark-8xb64-60e_aflw-256x256.py
@@ -0,0 +1,160 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=60,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=19,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'AFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/aflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_aflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_aflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME', norm_mode='use_norm_item', norm_item='bbox_size')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..6099dcf06dcf8a9e988b77623bd6f3a7ee7883a7
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.md
@@ -0,0 +1,39 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [pose_hourglass_52](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0587 | [ckpt](https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256-6994cf2e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..704c01983e4ab53f87a0a1ec798b49bf4b8b5e6f
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hourglass_coco_wholebody_face.yml
@@ -0,0 +1,14 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture:
+ - Hourglass
+ Training Data: COCO-WholeBody-Face
+ Name: td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0587
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hourglass/hourglass52_coco_wholebody_face_256x256-6994cf2e_20210909.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..d16ea2bc7fa50b3b8df57219bc5e3fada52c3558
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.md
@@ -0,0 +1,39 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0569 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256-c1ca469b_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0a4a38d5b78e5390d60c71ba43d663fafb51d279
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_coco_wholebody_face.yml
@@ -0,0 +1,14 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: COCO-WholeBody-Face
+ Name: td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0569
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_coco_wholebody_face_256x256-c1ca469b_20210909.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..fd059ee23cc17a82ed71cfc0ca089785ea6e150e
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.md
@@ -0,0 +1,56 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [pose_hrnetv2_w18_dark](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0513 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark-3d9a334e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark_20210909.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..cedc4950f9d2fba11e9a18ce2ca5942dcc2492eb
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/hrnetv2_dark_coco_wholebody_face.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: COCO-WholeBody-Face
+ Name: td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0513
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_coco_wholebody_face_256x256_dark-3d9a334e_20210909.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..d551a6c9abc3a4da60aaa90dac1ddbb9802ddd83
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.md
@@ -0,0 +1,38 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [pose_mobilenetv2](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0611 | [ckpt](https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256-4a3f096e_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2bd4352119546e2670f4b6bd16c12d37213b099b
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/mobilenetv2_coco_wholebody_face.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - MobilenetV2
+ Training Data: COCO-WholeBody-Face
+ Name: td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0611
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/mobilenetv2/mobilenetv2_coco_wholebody_face_256x256-4a3f096e_20210909.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..e4609385bdc230558581a05f8ee0fe73b6b248b2
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.md
@@ -0,0 +1,55 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [pose_res50](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0582 | [ckpt](https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256-5128edf5_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ef91a3da21c316335caf8d88c0ebde9a6e1bd4d7
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/resnet_coco_wholebody_face.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: COCO-WholeBody-Face
+ Name: td-hm_res50_8xb32-60e_coco-wholebody-face-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0582
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/resnet/res50_coco_wholebody_face_256x256-5128edf5_20210909.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md
new file mode 100644
index 0000000000000000000000000000000000000000..2710c2ff39bb02fa46949f89592c8c116234a63b
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.md
@@ -0,0 +1,38 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Face (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Face val set
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------ | :--------: | :----: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [pose_scnet_50](/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py) | 256x256 | 0.0567 | [ckpt](https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256-a0183f5f_20210909.pth) | [log](https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256_20210909.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d3b052ffc51e133706d36246caae80563ac7edcb
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/scnet_coco_wholebody_face.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - SCNet
+ Training Data: COCO-WholeBody-Face
+ Name: td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Face
+ Metrics:
+ NME: 0.0567
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/scnet/scnet50_coco_wholebody_face_256x256-a0183f5f_20210909.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e6f5c5c9084bf03ec95e203c57bad4a91ce7179
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hourglass52_8xb32-60e_coco-wholebody-face-256x256.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ head=dict(
+ type='CPMHead',
+ in_channels=256,
+ out_channels=68,
+ num_stages=1,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyFaceDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfeac90ced1307eeaa8fe9c83c59a3ae67b1cb23
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_8xb32-60e_coco-wholebody-face-256x256.py
@@ -0,0 +1,156 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=68,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyFaceDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c34f9aa5dc733f6dd1363212791b7f2c5b7f447
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_hrnetv2-w18_dark-8xb32-60e_coco-wholebody-face-256x256.py
@@ -0,0 +1,160 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=68,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyFaceDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f1a8629fc7448a4edc5e3a98b554b615efb7102
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_mobilenetv2_8xb32-60e_coco-wholebody-face-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://mobilenet_v2')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=68,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyFaceDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..0070e55d69d26b5e50edfef7868dc4faa5b0b5f4
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_res50_8xb32-60e_coco-wholebody-face-256x256.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=68,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyFaceDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f79f4b1d362b527cd684ae927e61cf17ec821cd
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/coco_wholebody_face/td-hm_scnet50_8xb32-60e_coco-wholebody-face-256x256.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=68,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyFaceDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.md
new file mode 100644
index 0000000000000000000000000000000000000000..b99f91f3d180287081543a27c6e61818092b3b1c
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.md
@@ -0,0 +1,42 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COFW (ICCV'2013)
+
+```bibtex
+@inproceedings{burgos2013robust,
+ title={Robust face landmark estimation under occlusion},
+ author={Burgos-Artizzu, Xavier P and Perona, Pietro and Doll{\'a}r, Piotr},
+ booktitle={Proceedings of the IEEE international conference on computer vision},
+ pages={1513--1520},
+ year={2013}
+}
+```
+
+
+
+Results on COFW dataset
+
+The model is trained on COFW train.
+
+| Arch | Input Size | NME | ckpt | log |
+| :------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :------------------------------------------------------------: |
+| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py) | 256x256 | 3.48 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256-49243ab8_20211019.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256_20211019.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..733e275685de62a483d48e2ec7eedf347d6d0e51
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/hrnetv2_cofw.yml
@@ -0,0 +1,14 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: COFW
+ Name: td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256
+ Results:
+ - Dataset: COFW
+ Metrics:
+ NME: 3.48
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_cofw_256x256-49243ab8_20211019.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c52342e950246755a9e5c0ed60302da936bb6fe
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/cofw/td-hm_hrnetv2-w18_8xb64-60e_cofw-256x256.py
@@ -0,0 +1,161 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=50,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=1.5)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=29,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'COFWDataset'
+data_mode = 'topdown'
+data_root = 'data/cofw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/cofw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/cofw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..53d5c3b36d7c61e9f6db3542b047adda70e70a86
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.md
@@ -0,0 +1,59 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+AdaptiveWingloss (ICCV'2019)
+
+```bibtex
+@inproceedings{wang2019adaptive,
+ title={Adaptive wing loss for robust face alignment via heatmap regression},
+ author={Wang, Xinyao and Bo, Liefeng and Fuxin, Li},
+ booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
+ pages={6971--6981},
+ year={2019}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: |
+| [pose_hrnetv2_w18_awing](/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py) | 256x256 | 4.02 | 6.94 | 3.97 | 4.78 | 4.59 | 3.87 | 4.28 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing-5af5055c_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing_20211212.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6ba45c82b7499f09e0664ef42589e46ec298aca9
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_awing_wflw.yml
@@ -0,0 +1,21 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - AdaptiveWingloss
+ Training Data: WFLW
+ Name: td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 4.59
+ NME expression: 4.28
+ NME illumination: 3.97
+ NME makeup: 3.87
+ NME occlusion: 4.78
+ NME pose: 6.94
+ NME test: 4.02
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_awing-5af5055c_20211212.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..476afb6c01c1a2f57c2030f673a913654b5a4698
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.md
@@ -0,0 +1,59 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: |
+| [pose_hrnetv2_w18_dark](/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py) | 256x256 | 3.98 | 6.98 | 3.96 | 4.78 | 4.56 | 3.89 | 4.29 | [ckpt](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark-3f8e0c2c_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark_20210125.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bbb82185cf0f4be034cb31f4a8166128d522938e
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_dark_wflw.yml
@@ -0,0 +1,21 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: WFLW
+ Name: td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 4.56
+ NME expression: 4.29
+ NME illumination: 3.96
+ NME makeup: 3.89
+ NME occlusion: 4.78
+ NME pose: 6.98
+ NME test: 3.98
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/darkpose/hrnetv2_w18_wflw_256x256_dark-3f8e0c2c_20210125.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..c9b8eec0669a73d60b2023eefa84204e71c7d1d0
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.md
@@ -0,0 +1,42 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train.
+
+| Arch | Input Size | NME*test* | NME*pose* | NME*illumination* | NME*occlusion* | NME*blur* | NME*makeup* | NME*expression* | ckpt | log |
+| :--------- | :--------: | :------------------: | :------------------: | :--------------------------: | :-----------------------: | :------------------: | :--------------------: | :------------------------: | :--------: | :-------: |
+| [pose_hrnetv2_w18](/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py) | 256x256 | 4.06 | 6.97 | 3.99 | 4.83 | 4.58 | 3.94 | 4.33 | [ckpt](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256-2bf032a6_20210125.pth) | [log](https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256_20210125.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9124324f8b8fe8ca7d1835471b130013cee13efa
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/hrnetv2_wflw.yml
@@ -0,0 +1,20 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: WFLW
+ Name: td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME blur: 4.58
+ NME expression: 4.33
+ NME illumination: 3.99
+ NME makeup: 3.94
+ NME occlusion: 4.83
+ NME pose: 6.97
+ NME test: 4.06
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/hrnetv2/hrnetv2_w18_wflw_256x256-2bf032a6_20210125.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae373c816aec3e09f7780f304ce11687e48b8e32
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_8xb64-60e_wflw-256x256.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=60,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=98,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..ada24a97bb7954d42b2d300ea9e8a14b494da938
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_awing-8xb64-60e_wflw-256x256.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=60,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=98,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='AdaptiveWingLoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..973a850f3fdf2ab6300e8e56c4e1b92b15d3f63a
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_heatmap/wflw/td-hm_hrnetv2-w18_dark-8xb64-60e_wflw-256x256.py
@@ -0,0 +1,162 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=2e-3,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=60,
+ milestones=[40, 55],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18'),
+ ),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=98,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_prob=0,
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/README.md b/mmpose/configs/face_2d_keypoint/topdown_regression/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5d20cb9a311c033eef1b8668b2d9d0e5c56e6514
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/README.md
@@ -0,0 +1,19 @@
+# Top-down regression-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, regression based methods directly regress the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Deeppose: Human pose estimation via deep neural networks](http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html).
+
+
+

+
+
+## Results and Models
+
+### WFLW Dataset
+
+Result on WFLW test set
+
+| Model | Input Size | NME | ckpt | log |
+| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [ResNet-50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py) | 256x256 | 4.88 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) |
+| [ResNet-50+WingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.67 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss_20210303.log.json) |
+| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.44 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..f1d9629d0ad0caced74cb3b0f4781080c302588f
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.md
@@ -0,0 +1,75 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+SoftWingloss (TIP'2021)
+
+```bibtex
+@article{lin2021structure,
+ title={Structure-Coherent Deep Feature Learning for Robust Face Alignment},
+ author={Lin, Chunze and Zhu, Beier and Wang, Quan and Liao, Renjie and Qian, Chen and Lu, Jiwen and Zhou, Jie},
+ journal={IEEE Transactions on Image Processing},
+ year={2021},
+ publisher={IEEE}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train set.
+
+| Model | Input Size | NME | ckpt | log |
+| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [ResNet-50+SoftWingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.44 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss_20211212.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7c65215ccc2c748b5f1a65efe4fc555faea73ed4
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_softwingloss_wflw.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ - SoftWingloss
+ Training Data: WFLW
+ Name: td-reg_res50_softwingloss_8xb64-210e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME: 4.44
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_softwingloss-4d34f22a_20211212.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..1ec3e76dbad52d30e8ce1c458592ec13e6c8ee31
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.md
@@ -0,0 +1,58 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train set.
+
+| Model | Input Size | NME | ckpt | log |
+| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [ResNet-50](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py) | 256x256 | 4.88 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_20210303.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..81c7b79a7e47fa1647b1bf1f5d78d34b1dc73faf
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wflw.yml
@@ -0,0 +1,15 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ Training Data: WFLW
+ Name: td-reg_res50_8x64e-210e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME: 4.88
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256-92d0ba7f_20210303.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.md b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.md
new file mode 100644
index 0000000000000000000000000000000000000000..51477143d11c1755f9280026641ba68b954ec99e
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.md
@@ -0,0 +1,76 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+Wingloss (CVPR'2018)
+
+```bibtex
+@inproceedings{feng2018wing,
+ title={Wing Loss for Robust Facial Landmark Localisation with Convolutional Neural Networks},
+ author={Feng, Zhen-Hua and Kittler, Josef and Awais, Muhammad and Huber, Patrik and Wu, Xiao-Jun},
+ booktitle={Computer Vision and Pattern Recognition (CVPR), 2018 IEEE Conference on},
+ year={2018},
+ pages ={2235-2245},
+ organization={IEEE}
+}
+```
+
+
+
+
+
+
+WFLW (CVPR'2018)
+
+```bibtex
+@inproceedings{wu2018look,
+ title={Look at boundary: A boundary-aware face alignment algorithm},
+ author={Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, Quan and Cai, Yici and Zhou, Qiang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={2129--2138},
+ year={2018}
+}
+```
+
+
+
+Results on WFLW dataset
+
+The model is trained on WFLW train set.
+
+| Model | Input Size | NME | ckpt | log |
+| :-------------------------------------------------------------- | :--------: | :--: | :------------------------------------------------------------: | :-----------------------------------------------------------: |
+| [ResNet-50+WingLoss](/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py) | 256x256 | 4.67 | [ckpt](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth) | [log](https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss_20210303.log.json) |
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.yml b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.yml
new file mode 100644
index 0000000000000000000000000000000000000000..49b409121a60336ae19f4d29d535017013f22c8f
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/resnet_wingloss_wflw.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py
+ In Collection: ResNet
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ - WingLoss
+ Training Data: WFLW
+ Name: td-reg_res50_wingloss_8xb64-210e_wflw-256x256
+ Results:
+ - Dataset: WFLW
+ Metrics:
+ NME: 4.67
+ Task: Face 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/face/deeppose/deeppose_res50_wflw_256x256_wingloss-f82a5e53_20210303.pth
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..2742f497b8fbdd7889281c660b9ccd804ccf754d
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_8xb64-210e_wflw-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=98,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# dataloaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less'))
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb4199073d712024f0495746ad902f4ea4dd9052
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_softwingloss_8xb64-210e_wflw-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=98,
+ loss=dict(type='SoftWingLoss', use_target_weight=True),
+ decoder=codec),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# dataloaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less'))
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab519cd401bbd07212e4834c9a5d655418b49fb1
--- /dev/null
+++ b/mmpose/configs/face_2d_keypoint/topdown_regression/wflw/td-reg_res50_wingloss_8xb64-210e_wflw-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=98,
+ loss=dict(type='WingLoss', use_target_weight=True),
+ decoder=codec),
+ train_cfg=dict(),
+ test_cfg=dict(
+ flip_test=True,
+ shift_coords=True,
+ ))
+
+# base dataset settings
+dataset_type = 'WFLWDataset'
+data_mode = 'topdown'
+data_root = 'data/wflw/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# dataloaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_train.json',
+ data_prefix=dict(img='images/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/face_landmarks_wflw_test.json',
+ data_prefix=dict(img='images/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='NME', rule='less'))
+
+# evaluators
+val_evaluator = dict(
+ type='NME',
+ norm_mode='keypoint_distance',
+)
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/README.md b/mmpose/configs/fashion_2d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e7d761067afc34b6a7249faa187752b39ca24ffd
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/README.md
@@ -0,0 +1,7 @@
+# 2D Fashion Landmark Detection
+
+2D fashion landmark detection (also referred to as fashion alignment) aims to detect the key-point located at the functional region of clothes, for example the neckline and the cuff.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_fashion_landmark.md) to prepare data.
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfashion2.md b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfashion2.md
new file mode 100644
index 0000000000000000000000000000000000000000..1dcfd593133c95f744869ff23bd2ec12a54e187c
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfashion2.md
@@ -0,0 +1,67 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+DeepFashion2 (CVPR'2019)
+
+```bibtex
+@article{DeepFashion2,
+ author = {Yuying Ge and Ruimao Zhang and Lingyun Wu and Xiaogang Wang and Xiaoou Tang and Ping Luo},
+ title={A Versatile Benchmark for Detection, Pose Estimation, Segmentation and Re-Identification of Clothing Images},
+ journal={CVPR},
+ year={2019}
+}
+```
+
+
+
+Results on DeepFashion2 val set
+
+| Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :-------------------- | :-------------------------------------------------: | :--------: | :-----: | :---: | :--: | :-------------------------------------------------: | :-------------------------------------------------: |
+| short_sleeved_shirt | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py) | 256x192 | 0.988 | 0.703 | 10.2 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_shirt_256x192-21e1c5da_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_shirt_256x192_20221208.log.json) |
+| long_sleeved_shirt | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py) | 256x192 | 0.973 | 0.587 | 16.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_shirt_256x192-8679e7e3_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_shirt_256x192_20221208.log.json) |
+| short_sleeved_outwear | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py) | 256x192 | 0.966 | 0.408 | 24.0 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_outwear_256x192-a04c1298_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_outwear_256x192_20221208.log.json) |
+| long_sleeved_outwear | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py) | 256x192 | 0.987 | 0.517 | 18.1 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_outwear_256x192-31fbaecf_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_outwear_256x192_20221208.log.json) |
+| vest | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py) | 256x192 | 0.981 | 0.643 | 12.7 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_256x192-4c48d05c_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_256x192_20221208.log.json) |
+| sling | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py) | 256x192 | 0.940 | 0.557 | 21.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_256x192-ebb2b736_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_256x192_20221208.log.json) |
+| shorts | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py) | 256x192 | 0.975 | 0.682 | 12.4 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_shorts_256x192-9ab23592_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_shorts_256x192_20221208.log.json) |
+| trousers | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py) | 256x192 | 0.973 | 0.625 | 14.8 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_trousers_256x192-3e632257_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_trousers_256x192_20221208.log.json) |
+| skirt | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py) | 256x192 | 0.952 | 0.653 | 16.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_skirt_256x192-09573469_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_skirt_256x192_20221208.log.json) |
+| short_sleeved_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py) | 256x192 | 0.980 | 0.603 | 15.6 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_dress_256x192-1345b07a_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_dress_256x192_20221208.log.json) |
+| long_sleeved_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py) | 256x192 | 0.976 | 0.518 | 20.1 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_dress_256x192-87bac74e_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_dress_256x192_20221208.log.json) |
+| vest_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py) | 256x192 | 0.980 | 0.600 | 16.0 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_dress_256x192-fb3fbd6f_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_dress_256x192_20221208.log.json) |
+| sling_dress | [pose_resnet_50](/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py) | 256x192 | 0.967 | 0.544 | 19.5 | [ckpt](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_dress_256x192-8ebae0eb_20221208.pth) | [log](https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_dress_256x192_20221208.log.json) |
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfasion2.yml b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfasion2.yml
new file mode 100644
index 0000000000000000000000000000000000000000..28825fa01100a9375f4640eb89575e829608ac37
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/res50_deepfasion2.yml
@@ -0,0 +1,185 @@
+Models:
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: DeepFashion2
+ Name: td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.703
+ EPE: 10.2
+ PCK@0.2: 0.988
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_shirt_256x192-21e1c5da_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.587
+ EPE: 16.5
+ PCK@0.2: 0.973
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_shirt_256x192-8679e7e3_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.408
+ EPE: 24.0
+ PCK@0.2: 0.966
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_outwear_256x192-a04c1298_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.517
+ EPE: 18.1
+ PCK@0.2: 0.987
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_outwear_256x192-31fbaecf_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_4xb64-210e_deepfasion2-vest-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.643
+ EPE: 12.7
+ PCK@0.2: 0.981
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_256x192-4c48d05c_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_4xb64-210e_deepfasion2-sling-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.557
+ EPE: 21.6
+ PCK@0.2: 0.94
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_256x192-ebb2b736_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.682
+ EPE: 12.4
+ PCK@0.2: 0.975
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_shorts_256x192-9ab23592_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.625
+ EPE: 14.8
+ PCK@0.2: 0.973
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_trousers_256x192-3e632257_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.653
+ EPE: 16.6
+ PCK@0.2: 0.952
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_skirt_256x192-09573469_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.603
+ EPE: 15.6
+ PCK@0.2: 0.98
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_short_sleeved_dress_256x192-1345b07a_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.518
+ EPE: 20.1
+ PCK@0.2: 0.976
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_long_sleeved_dress_256x192-87bac74e_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.6
+ EPE: 16.0
+ PCK@0.2: 0.98
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_vest_dress_256x192-fb3fbd6f_20221208.pth
+- Config: configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: DeepFashion2
+ Name: td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192
+ Results:
+ - Dataset: DeepFashion2
+ Metrics:
+ AUC: 0.544
+ EPE: 19.5
+ PCK@0.2: 0.967
+ Task: Fashion 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/fashion/resnet/res50_deepfashion2_sling_dress_256x192-8ebae0eb_20221208.pth
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..09dfaaa390bb2020e4a511d6ba111d35d5fa4378
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-long-sleeved-dress-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=64)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_long_sleeved_dress_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_long_sleeved_dress_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0e6f0c63218874f4e40bdd06eb0cbc57b9365a7
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-skirt-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=64)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_skirt_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_skirt_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bed7421991041145f028e2b91689b8c5125d205
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_1xb64-210e_deepfasion2-vest-dress-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=64)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_vest_dress_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_vest_dress_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..617e59ae74be40511256c2b9e358300ea2348f27
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_2xb64-210e_deepfasion2-trousers-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=128)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_trousers_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_trousers_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa3b2774fcaedf9c7ace5a335775011e6c0a7d29
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_3xb64-210e_deepfasion2-shorts-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=192)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_shorts_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_shorts_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bfcabaa5478596cc026309e5f57e6ea5db83abc
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-short-sleeved-dress-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_short_sleeved_dress_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_short_sleeved_dress_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f627eb182c90b57ae53a4a9141f00ed333d3e229
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_sling_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_sling_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b59607060c41a8ddbb4d38c5acc41e243cd2e96
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-sling-dress-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_sling_dress_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_sling_dress_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..4249d5a8971e80a4e068e51543b9191f36488542
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_4xb64-210e_deepfasion2-vest-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_vest_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_vest_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..4161952dcf31904e8df8c70ff25ca207c1cea2ae
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_6xb64-210e_deepfasion2-short-sleeved-shirt-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=384)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_short_sleeved_shirt_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_short_sleeved_shirt_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..36e0318bf7a954fdbd35a8b59219a6cde2396df2
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-outwear-256x192.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_long_sleeved_outwear_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/'
+ 'deepfashion2_long_sleeved_outwear_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..f82e3cb5fb04011130521a35080b00f01a70ac68
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-long-sleeved-shirt-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_long_sleeved_shirt_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/deepfashion2_long_sleeved_shirt_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..30db99de9e96eaede42332daae3d55f578b941f2
--- /dev/null
+++ b/mmpose/configs/fashion_2d_keypoint/topdown_heatmap/deepfashion2/td-hm_res50_8xb64-210e_deepfasion2-short-sleeved-outwear-256x192.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ logger=dict(type='LoggerHook', interval=10),
+ checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=294,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'DeepFashion2Dataset'
+data_mode = 'topdown'
+data_root = 'data/deepfasion2/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='train/deepfashion2_short_sleeved_outwear_train.json',
+ data_prefix=dict(img='train/image/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='validation/'
+ 'deepfashion2_short_sleeved_outwear_validation.json',
+ data_prefix=dict(img='validation/image/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/README.md b/mmpose/configs/hand_2d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6f7758290eb914b88662e685135748c1fb5f665d
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/README.md
@@ -0,0 +1,18 @@
+# 2D Hand Pose Estimation
+
+2D hand pose estimation is defined as the task of detecting the poses (or keypoints) of the hand from an input image.
+
+Normally, the input images are cropped hand images, where the hand locates at the center;
+or the rough location (or the bounding box) of the hand is provided.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_hand_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/en/2d_hand_demo.md) to run demos.
+
+
+
+
diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/README.md b/mmpose/configs/hand_2d_keypoint/rtmpose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..9687b7e72c98376a00570389e7f1d003b4ace8f0
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/rtmpose/README.md
@@ -0,0 +1,16 @@
+# RTMPose
+
+Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency.
+In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose.
+Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries.
+To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device.
+
+## Results and Models
+
+### COCO-WholeBody-Hand Dataset
+
+Results on COCO-WholeBody-Hand val set
+
+| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download |
+| :-------: | :--------: | :-----: | :---: | :--: | :------------------------------------------------------------------------------------: |
+| RTMPose-m | 256x256 | 0.815 | 0.837 | 4.51 | [rtmpose_coco_wholebody_hand.md](./coco_wholebody_hand/rtmpose_coco_wholebody_hand.md) |
diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d5438586eca47ad9ae006a03f146ef245a3b502
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,233 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=21,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5],
+ rotate_factor=180),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=180),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..b2a5957e6ec3850423188c4fde9fd4aeae9853ee
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.md
@@ -0,0 +1,39 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [rtmpose_m](/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.815 | 0.837 | 4.51 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-hand_pt-aic-coco_210e-256x256-99477206_20230228.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-hand_pt-aic-coco_210e-256x256-99477206_20230228.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..2f87733605b771ff0aa094c5702cdeec8c115e21
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose_coco_wholebody_hand.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/rtmpose/coco_wholebody_hand/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: RTMPose
+ Alias: hand
+ Metadata:
+ Architecture:
+ - RTMPose
+ Training Data: COCO-WholeBody-Hand
+ Name: rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.815
+ EPE: 4.51
+ PCK@0.2: 0.837
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody-hand_pt-aic-coco_210e-256x256-99477206_20230228.pth
diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..689dc68096075ed5a53b32f243eefc13bf7deaca
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py
@@ -0,0 +1,381 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# coco-hand onehand10k freihand2d rhd2d halpehand
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 10
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(256, 256),
+ sigma=(5.66, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmpose/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=21,
+ input_size=codec['input_size'],
+ in_featuremap_size=(8, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/'
+
+backend_args = dict(backend='local')
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.5, 1.5],
+ rotate_factor=180),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ # dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=180),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.2),
+ dict(type='MedianBlur', p=0.2),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# train datasets
+dataset_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='detection/coco/train2017/'),
+ pipeline=[],
+)
+
+dataset_onehand10k = dict(
+ type='OneHand10KDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='onehand10k/annotations/onehand10k_train.json',
+ data_prefix=dict(img='pose/OneHand10K/'),
+ pipeline=[],
+)
+
+dataset_freihand = dict(
+ type='FreiHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='freihand/annotations/freihand_train.json',
+ data_prefix=dict(img='pose/FreiHand/'),
+ pipeline=[],
+)
+
+dataset_rhd = dict(
+ type='Rhd2DDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='rhd/annotations/rhd_train.json',
+ data_prefix=dict(img='pose/RHD/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=21,
+ mapping=[
+ (0, 0),
+ (1, 4),
+ (2, 3),
+ (3, 2),
+ (4, 1),
+ (5, 8),
+ (6, 7),
+ (7, 6),
+ (8, 5),
+ (9, 12),
+ (10, 11),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+ (15, 14),
+ (16, 13),
+ (17, 20),
+ (18, 19),
+ (19, 18),
+ (20, 17),
+ ])
+ ],
+)
+
+dataset_halpehand = dict(
+ type='HalpeHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_train_v1.json',
+ data_prefix=dict(img='pose/Halpe/hico_20160224_det/images/train2015/'),
+ pipeline=[],
+)
+
+# data loaders
+train_dataloader = dict(
+ batch_size=256,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(
+ from_file='configs/_base_/datasets/coco_wholebody_hand.py'),
+ datasets=[
+ dataset_coco, dataset_onehand10k, dataset_freihand, dataset_rhd,
+ dataset_halpehand
+ ],
+ pipeline=train_pipeline,
+ test_mode=False,
+ ))
+
+# test datasets
+val_coco = dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='coco/annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+val_onehand10k = dict(
+ type='OneHand10KDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='onehand10k/annotations/onehand10k_test.json',
+ data_prefix=dict(img='pose/OneHand10K/'),
+ pipeline=[],
+)
+
+val_freihand = dict(
+ type='FreiHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='freihand/annotations/freihand_test.json',
+ data_prefix=dict(img='pose/FreiHand/'),
+ pipeline=[],
+)
+
+val_rhd = dict(
+ type='Rhd2DDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='rhd/annotations/rhd_test.json',
+ data_prefix=dict(img='pose/RHD/'),
+ pipeline=[
+ dict(
+ type='KeypointConverter',
+ num_keypoints=21,
+ mapping=[
+ (0, 0),
+ (1, 4),
+ (2, 3),
+ (3, 2),
+ (4, 1),
+ (5, 8),
+ (6, 7),
+ (7, 6),
+ (8, 5),
+ (9, 12),
+ (10, 11),
+ (11, 10),
+ (12, 9),
+ (13, 16),
+ (14, 15),
+ (15, 14),
+ (16, 13),
+ (17, 20),
+ (18, 19),
+ (19, 18),
+ (20, 17),
+ ])
+ ],
+)
+
+val_halpehand = dict(
+ type='HalpeHandDataset',
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='halpe/annotations/halpe_val_v1.json',
+ data_prefix=dict(img='detection/coco/val2017/'),
+ pipeline=[],
+)
+
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type='CombinedDataset',
+ metainfo=dict(
+ from_file='configs/_base_/datasets/coco_wholebody_hand.py'),
+ datasets=[
+ val_coco, val_onehand10k, val_freihand, val_rhd, val_halpehand
+ ],
+ pipeline=val_pipeline,
+ test_mode=True,
+ ))
+
+val_dataloader = test_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='AUC', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md
new file mode 100644
index 0000000000000000000000000000000000000000..361770dad2ec789daf9bfde0d08b3947a8a2cf38
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.md
@@ -0,0 +1,67 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO (ECCV'2014)
+
+```bibtex
+@inproceedings{lin2014microsoft,
+ title={Microsoft coco: Common objects in context},
+ author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
+ booktitle={European conference on computer vision},
+ pages={740--755},
+ year={2014},
+ organization={Springer}
+}
+```
+
+
+
+- `Hand5` and `*` denote model trained on 5 public datasets:
+ - [COCO-Wholebody-Hand](https://github.com/jin-s13/COCO-WholeBody/)
+ - [OneHand10K](https://www.yangangwang.com/papers/WANG-MCC-2018-10.html)
+ - [FreiHand2d](https://lmb.informatik.uni-freiburg.de/projects/freihand/)
+ - [RHD2d](https://lmb.informatik.uni-freiburg.de/resources/datasets/RenderedHandposeDataset.en.html)
+ - [Halpe](https://mmpose.readthedocs.io/en/latest/dataset_zoo/2d_wholebody_keypoint.html#halpe)
+
+| Config | Input Size | PCK@0.2
(COCO-Wholebody-Hand) | PCK@0.2
(Hand5) | AUC
(Hand5) | EPE
(Hand5) | FLOPS(G) | Download |
+| :---------------------------------------: | :--------: | :-----------------------------------: | :---------------------: | :-----------------: | :-----------------: | :------: | :-----------------------------------------: |
+| [RTMPose-m\*
(alpha version)](./rtmpose/hand_2d_keypoint/rtmpose-m_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 81.5 | 96.4 | 83.9 | 5.06 | 2.581 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth) |
diff --git a/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a8dfd42e39166be29c25a38354ad472c0612d313
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/rtmpose/hand5/rtmpose_hand5.yml
@@ -0,0 +1,27 @@
+Collections:
+- Name: RTMPose
+ Paper:
+ Title: "RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose"
+ URL: https://arxiv.org/abs/2303.07399
+ README: https://github.com/open-mmlab/mmpose/blob/main/projects/rtmpose/README.md
+Models:
+- Config: configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-256x256.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: &id001
+ - RTMPose
+ Training Data: &id002
+ - COCO-Wholebody-Hand
+ - OneHand10K
+ - FreiHand2d
+ - RHD2d
+ - Halpe
+ Name: rtmpose-m_8xb256-210e_hand5-256x256
+ Results:
+ - Dataset: Hand5
+ Metrics:
+ PCK@0.2: 0.964
+ AUC: 0.839
+ EPE: 5.06
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-hand5_pt-aic-coco_210e-256x256-74fb594_20230320.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7f63f1f8259dd1cf52bebd3612db8ceec3d23220
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/README.md
@@ -0,0 +1,55 @@
+# Top-down heatmap-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html).
+
+
+

+
+
+## Results and Models
+
+### COCO-WholeBody-Hand Dataset
+
+Results on COCO-WholeBody-Hand val set
+
+| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download |
+| :--------------: | :--------: | :-----: | :---: | :--: | :----------------------------------------------------------------------------------------------: |
+| HRNetv2-w18+Dark | 256x256 | 0.814 | 0.840 | 4.37 | [hrnetv2_dark_coco_wholebody_hand.md](./coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md) |
+| HRNetv2-w18 | 256x256 | 0.813 | 0.840 | 4.39 | [hrnetv2_coco_wholebody_hand.md](./coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md) |
+| HourglassNet | 256x256 | 0.804 | 0.835 | 4.54 | [hourglass_coco_wholebody_hand.md](./coco_wholebody_hand/hourglass_coco_wholebody_hand.md) |
+| SCNet-50 | 256x256 | 0.803 | 0.834 | 4.55 | [scnet_coco_wholebody_hand.md](./coco_wholebody_hand/scnet_coco_wholebody_hand.md) |
+| ResNet-50 | 256x256 | 0.800 | 0.833 | 4.64 | [resnet_coco_wholebody_hand.md](./coco_wholebody_hand/resnet_coco_wholebody_hand.md) |
+| LiteHRNet-18 | 256x256 | 0.795 | 0.830 | 4.77 | [litehrnet_coco_wholebody_hand.md](./coco_wholebody_hand/litehrnet_coco_wholebody_hand.md) |
+| MobileNet-v2 | 256x256 | 0.795 | 0.829 | 4.77 | [mobilenetv2_coco_wholebody_hand.md](./coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md) |
+
+### FreiHand Dataset
+
+Results on FreiHand val & test set
+
+| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download |
+| :-------: | :--------: | :-----: | :---: | :--: | :-------------------------------------------------------: |
+| ResNet-50 | 224x224 | 0.999 | 0.868 | 3.27 | [resnet_freihand2d.md](./freihand2d/resnet_freihand2d.md) |
+
+### OneHand10K Dataset
+
+Results on OneHand10K val set
+
+| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download |
+| :--------------: | :--------: | :-----: | :---: | :---: | :-------------------------------------------------------------------: |
+| HRNetv2-w18+Dark | 256x256 | 0.990 | 0.572 | 23.96 | [hrnetv2_dark_onehand10k.md](./onehand10k/hrnetv2_dark_onehand10k.md) |
+| HRNetv2-w18+UDP | 256x256 | 0.990 | 0.571 | 23.88 | [hrnetv2_udp_onehand10k.md](./onehand10k/hrnetv2_udp_onehand10k.md) |
+| HRNetv2-w18 | 256x256 | 0.990 | 0.567 | 24.26 | [hrnetv2_onehand10k.md](./onehand10k/hrnetv2_onehand10k.md) |
+| ResNet-50 | 256x256 | 0.989 | 0.555 | 25.16 | [resnet_onehand10k.md](./onehand10k/resnet_onehand10k.md) |
+| MobileNet-v2 | 256x256 | 0.986 | 0.537 | 28.56 | [mobilenetv2_onehand10k.md](./onehand10k/mobilenetv2_onehand10k.md) |
+
+### RHD Dataset
+
+Results on RHD test set
+
+| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download |
+| :--------------: | :--------: | :-----: | :---: | :--: | :----------------------------------------------------: |
+| HRNetv2-w18+Dark | 256x256 | 0.992 | 0.903 | 2.18 | [hrnetv2_dark_rhd2d.md](./rhd2d/hrnetv2_dark_rhd2d.md) |
+| HRNetv2-w18+UDP | 256x256 | 0.992 | 0.902 | 2.19 | [hrnetv2_udp_rhd2d.md](./rhd2d/hrnetv2_udp_rhd2d.md) |
+| HRNetv2-w18 | 256x256 | 0.992 | 0.902 | 2.21 | [hrnetv2_rhd2d.md](./rhd2d/hrnetv2_rhd2d.md) |
+| ResNet-50 | 256x256 | 0.991 | 0.898 | 2.32 | [resnet_rhd2d.md](./rhd2d/resnet_rhd2d.md) |
+| MobileNet-v2 | 256x256 | 0.985 | 0.883 | 2.79 | [mobilenetv2_rhd2d.md](./rhd2d/mobilenetv2_rhd2d.md) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..4728baaba2f19ad8e6760958be21ab54dc964266
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.md
@@ -0,0 +1,39 @@
+
+
+
+Hourglass (ECCV'2016)
+
+```bibtex
+@inproceedings{newell2016stacked,
+ title={Stacked hourglass networks for human pose estimation},
+ author={Newell, Alejandro and Yang, Kaiyu and Deng, Jia},
+ booktitle={European conference on computer vision},
+ pages={483--499},
+ year={2016},
+ organization={Springer}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_hourglass_52](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.804 | 0.835 | 4.54 | [ckpt](https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256-7b05c6db_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256_20210909.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f6247504e2b6323010a800ea7b5a1c6d01f51a99
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hourglass_coco_wholebody_hand.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: Hourglass
+ Metadata:
+ Architecture:
+ - Hourglass
+ Training Data: COCO-WholeBody-Hand
+ Name: td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.835
+ EPE: 4.54
+ PCK@0.2: 0.804
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hourglass/hourglass52_coco_wholebody_hand_256x256-7b05c6db_20210909.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..d944ff43a268aad9f781dbea9063ae4eb000a597
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.md
@@ -0,0 +1,39 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_hrnetv2_w18](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.813 | 0.840 | 4.39 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256_20210908.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f6c0046f663badee9d9b9ed0d42baaaaafe280ad
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_coco_wholebody_hand.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: COCO-WholeBody-Hand
+ Name: td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.84
+ EPE: 4.39
+ PCK@0.2: 0.813
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_coco_wholebody_hand_256x256-1c028db7_20210908.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..73896361860d72f901b173206a29d1985a85bd65
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.md
@@ -0,0 +1,56 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_hrnetv2_w18_dark](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.814 | 0.840 | 4.37 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark_20210908.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..af1d607d10f4fc62e678a62676fddcf0f1752295
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/hrnetv2_dark_coco_wholebody_hand.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: COCO-WholeBody-Hand
+ Name: td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.84
+ EPE: 4.37
+ PCK@0.2: 0.814
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_coco_wholebody_hand_256x256_dark-a9228c9c_20210908.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..7c084b79e1a463794988134b4403393fba540e6a
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.md
@@ -0,0 +1,37 @@
+
+
+
+LiteHRNet (CVPR'2021)
+
+```bibtex
+@inproceedings{Yulitehrnet21,
+ title={Lite-HRNet: A Lightweight High-Resolution Network},
+ author={Yu, Changqian and Xiao, Bin and Gao, Changxin and Yuan, Lu and Zhang, Lei and Sang, Nong and Wang, Jingdong},
+ booktitle={CVPR},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [LiteHRNet-18](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.795 | 0.830 | 4.77 | [ckpt](https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256-d6945e6a_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256_20210908.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..eeecbfe7e244b8c629fd4d621aea73a6b5694704
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/litehrnet_coco_wholebody_hand.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: LiteHRNet
+ Metadata:
+ Architecture:
+ - LiteHRNet
+ Training Data: COCO-WholeBody-Hand
+ Name: td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.83
+ EPE: 4.77
+ PCK@0.2: 0.795
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/litehrnet/litehrnet_w18_coco_wholebody_hand_256x256-d6945e6a_20210908.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..cc76358a8fe124d1f2bc04523e841e551590fccb
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.md
@@ -0,0 +1,38 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_mobilenetv2](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.795 | 0.829 | 4.77 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256-06b8c877_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256_20210909.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a9d0101ce77db1d1062cfb20c9d2d80f848f0ecf
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/mobilenetv2_coco_wholebody_hand.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - MobilenetV2
+ Training Data: COCO-WholeBody-Hand
+ Name: td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.829
+ EPE: 4.77
+ PCK@0.2: 0.795
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_coco_wholebody_hand_256x256-06b8c877_20210909.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..ae7f287e3d250de0f9a39a0190562eb9f6e69e7c
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.md
@@ -0,0 +1,55 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_resnet_50](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.800 | 0.833 | 4.64 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256-8dbc750c_20210908.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256_20210908.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..78d16a6e459e6cbafeffb2fb14d863910a7b2144
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/resnet_coco_wholebody_hand.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: COCO-WholeBody-Hand
+ Name: td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.833
+ EPE: 4.64
+ PCK@0.2: 0.8
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_coco_wholebody_hand_256x256-8dbc750c_20210908.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md
new file mode 100644
index 0000000000000000000000000000000000000000..06c6fda74c6c36df1abac5a06c5d642f9eac580d
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.md
@@ -0,0 +1,38 @@
+
+
+
+SCNet (CVPR'2020)
+
+```bibtex
+@inproceedings{liu2020improving,
+ title={Improving Convolutional Networks with Self-Calibrated Convolutions},
+ author={Liu, Jiang-Jiang and Hou, Qibin and Cheng, Ming-Ming and Wang, Changhu and Feng, Jiashi},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={10096--10105},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody-Hand (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody-Hand val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_scnet_50](/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py) | 256x256 | 0.803 | 0.834 | 4.55 | [ckpt](https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256-e73414c7_20210909.pth) | [log](https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256_20210909.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml
new file mode 100644
index 0000000000000000000000000000000000000000..a8887b3c8eeb2be74423d845ece640af8333e1d3
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/scnet_coco_wholebody_hand.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SCNet
+ Training Data: COCO-WholeBody-Hand
+ Name: td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256
+ Results:
+ - Dataset: COCO-WholeBody-Hand
+ Metrics:
+ AUC: 0.834
+ EPE: 4.55
+ PCK@0.2: 0.803
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/scnet/scnet50_coco_wholebody_hand_256x256-e73414c7_20210909.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0bc1c8739c9d8ea1fc585882abe5b8189087e2a
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hourglass52_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HourglassNet',
+ num_stacks=1,
+ ),
+ head=dict(
+ type='CPMHead',
+ in_channels=256,
+ out_channels=21,
+ num_stages=1,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=180.0,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9b9f0f281b9bc72598b9e1ffacd99f58248175d
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d67f393f6612aab494a47c15bd9ce7b68fc8b4d
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_hrnetv2-w18_dark-8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3a6150e49e687bb3d510bd7139d66bd8ac8b37f
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_litehrnet-w18_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,136 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='LiteHRNet',
+ in_channels=3,
+ extra=dict(
+ stem=dict(stem_channels=32, out_channels=32, expand_ratio=1),
+ num_stages=3,
+ stages_spec=dict(
+ num_modules=(2, 4, 2),
+ num_branches=(2, 3, 4),
+ num_blocks=(2, 2, 2),
+ module_type=('LITE', 'LITE', 'LITE'),
+ with_fuse=(True, True, True),
+ reduce_ratios=(8, 8, 8),
+ num_channels=(
+ (40, 80),
+ (40, 80, 160),
+ (40, 80, 160, 320),
+ )),
+ with_head=True,
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=40,
+ out_channels=21,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..dba8538a5fe7b4313b888cae5a21f0c55b58c340
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_mobilenetv2_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,120 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(type='Pretrained', checkpoint='mmcls://mobilenet_v2')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..c04950bfaabcfebc806ce541e8d5285d0bca75be
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_res50_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,119 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..f596227c5c109fe51b0fd822c1f2b26b4abaae83
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/coco_wholebody_hand/td-hm_scnet50_8xb32-210e_coco-wholebody-hand-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='SCNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/scnet50-7ef0a199.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyHandDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE')
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..f1a6c80132255b1cd6d029acd05add2a71f72e98
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.md
@@ -0,0 +1,56 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+FreiHand (ICCV'2019)
+
+```bibtex
+@inproceedings{zimmermann2019freihand,
+ title={Freihand: A dataset for markerless capture of hand pose and shape from single rgb images},
+ author={Zimmermann, Christian and Ceylan, Duygu and Yang, Jimei and Russell, Bryan and Argus, Max and Brox, Thomas},
+ booktitle={Proceedings of the IEEE International Conference on Computer Vision},
+ pages={813--822},
+ year={2019}
+}
+```
+
+
+
+Results on FreiHand val & test set
+
+| Set | Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--- | :-------------------------------------------------------: | :--------: | :-----: | :---: | :--: | :-------------------------------------------------------: | :------------------------------------------------------: |
+| test | [pose_resnet_50](/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py) | 224x224 | 0.999 | 0.868 | 3.27 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224_20200914.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9937b50be6756f1b8cd3de45347cabb159919d2f
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/resnet_freihand2d.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: FreiHand
+ Name: td-hm_res50_8xb64-100e_freihand2d-224x224
+ Results:
+ - Dataset: FreiHand
+ Metrics:
+ AUC: 0.868
+ EPE: 3.27
+ PCK@0.2: 0.999
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_freihand_224x224-ff0799bc_20200914.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd1750cdebc9d977ae917432b3e714ee1275f3d8
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/freihand2d/td-hm_res50_8xb64-100e_freihand2d-224x224.py
@@ -0,0 +1,138 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=100, val_interval=1)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=100,
+ milestones=[50, 70],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='AUC', rule='greater', interval=1))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(224, 224), heatmap_size=(56, 56), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'FreiHandDataset'
+data_mode = 'topdown'
+data_root = 'data/freihand/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.25,
+ rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale', padding=0.8),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/freihand_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/freihand_val.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/freihand_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..59d70fc597094e1597440809c5b1de2d9e4a760f
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.md
@@ -0,0 +1,60 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: |
+| [pose_hrnetv2_w18_dark](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.572 | 23.96 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark-a2f80c64_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7fc64b75c7a445e7cae9c0350e5847b90205f87f
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_dark_onehand10k.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: OneHand10K
+ Name: td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.572
+ EPE: 23.96
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_onehand10k_256x256_dark-a2f80c64_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..262bf3225390b69c8d965e02d9f78691a39b4760
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.md
@@ -0,0 +1,43 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: |
+| [pose_hrnetv2_w18](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.567 | 24.26 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256-30bc9c6b_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..fd0c75587621aa94af22654a8a3ea80957eddc0a
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_onehand10k.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: OneHand10K
+ Name: td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.567
+ EPE: 24.26
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_onehand10k_256x256-30bc9c6b_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..ca1599c116e3df24438842da11a05c713fbf99b1
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.md
@@ -0,0 +1,60 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: |
+| [pose_hrnetv2_w18_udp](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.571 | 23.88 | [ckpt](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp-0d1b515d_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..32d5dd6db5e3f6873456dd5016bdb74a75b783cf
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/hrnetv2_udp_onehand10k.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py
+ In Collection: UDP
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - UDP
+ Training Data: OneHand10K
+ Name: td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.571
+ EPE: 23.88
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_onehand10k_256x256_udp-0d1b515d_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..3f0bf9d1b76f27294712462584791657c910212e
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.md
@@ -0,0 +1,42 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: |
+| [pose_mobilenet_v2](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.986 | 0.537 | 28.56 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256-f3a3d90e_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ade1f054f1b16754be9c2dddf174a85db838140d
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/mobilenetv2_onehand10k.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - MobilenetV2
+ Training Data: OneHand10K
+ Name: td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.537
+ EPE: 28.56
+ PCK@0.2: 0.986
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_onehand10k_256x256-f3a3d90e_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..c07817d68eae7caba106695c69d6ab1746e6c5b8
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.md
@@ -0,0 +1,59 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: |
+| [pose_resnet_50](/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py) | 256x256 | 0.989 | 0.555 | 25.16 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256-739c8639_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..59dc7f523f3f9d75e2451587525c36cf82ef851f
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/resnet_onehand10k.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: OneHand10K
+ Name: td-hm_res50_8xb32-210e_onehand10k-256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.555
+ EPE: 25.16
+ PCK@0.2: 0.989
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_onehand10k_256x256-739c8639_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..99419065aa879884fddb6afa568257fb1b9fe340
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_8xb64-210e_onehand10k-256x256.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='open-mmlab://msra/hrnetv2_w18',
+ )),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'OneHand10KDataset'
+data_mode = 'topdown'
+data_root = 'data/onehand10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..610e9d149b658166a37d1fa1a028efab32d0637d
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_dark-8xb64-210e_onehand10k-256x256.py
@@ -0,0 +1,162 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='open-mmlab://msra/hrnetv2_w18',
+ )),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'OneHand10KDataset'
+data_mode = 'topdown'
+data_root = 'data/onehand10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..54e2220d636601ac4a19a116aa6d0aabe138dbef
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_hrnetv2-w18_udp-8xb64-210e_onehand10k-256x256.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='open-mmlab://msra/hrnetv2_w18',
+ )),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'OneHand10KDataset'
+data_mode = 'topdown'
+data_root = 'data/onehand10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f4e61c37c5692b62d407f642e23b38197e23d47
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_mobilenetv2_8xb64-210e_onehand10k-256x256.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='mmcls://mobilenet_v2',
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'OneHand10KDataset'
+data_mode = 'topdown'
+data_root = 'data/onehand10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..36589d899ddd930143749845b5fd5650917d23ec
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/onehand10k/td-hm_res50_8xb32-210e_onehand10k-256x256.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='torchvision://resnet50',
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'OneHand10KDataset'
+data_mode = 'topdown'
+data_root = 'data/onehand10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..334d97978c1dbb9b7ddca0df13f75372d753a067
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.md
@@ -0,0 +1,58 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_hrnetv2_w18_dark](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.992 | 0.903 | 2.18 | [ckpt](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark-4df3a347_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..7400dc19e019f605efe6688de5be8170c1aa2c4b
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_dark_rhd2d.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - DarkPose
+ Training Data: RHD
+ Name: td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.903
+ EPE: 2.18
+ PCK@0.2: 0.992
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/dark/hrnetv2_w18_rhd2d_256x256_dark-4df3a347_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..6fe91fe17b3ef52fdd65751dab103641202c5595
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.md
@@ -0,0 +1,41 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_hrnetv2_w18](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.992 | 0.902 | 2.21 | [ckpt](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256-95b20dd8_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..f5292da7706f8a51a7a52a9dc98371ec9705aff5
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_rhd2d.yml
@@ -0,0 +1,16 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py
+ In Collection: HRNetv2
+ Metadata:
+ Architecture:
+ - HRNetv2
+ Training Data: RHD
+ Name: td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.902
+ EPE: 2.21
+ PCK@0.2: 0.992
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/hrnetv2/hrnetv2_w18_rhd2d_256x256-95b20dd8_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..c494eb8fc6b79e310f3b9099cf0cfc6059ce4f8c
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.md
@@ -0,0 +1,58 @@
+
+
+
+HRNetv2 (TPAMI'2019)
+
+```bibtex
+@article{WangSCJDZLMTWLX19,
+ title={Deep High-Resolution Representation Learning for Visual Recognition},
+ author={Jingdong Wang and Ke Sun and Tianheng Cheng and
+ Borui Jiang and Chaorui Deng and Yang Zhao and Dong Liu and Yadong Mu and
+ Mingkui Tan and Xinggang Wang and Wenyu Liu and Bin Xiao},
+ journal={TPAMI},
+ year={2019}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCKh@0.7 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :------: | :---: | :--: | :--------------------------------------------------------: | :-------------------------------------------------------: |
+| [pose_hrnetv2_w18_udp](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.992 | 0.902 | 2.19 | [ckpt](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp-63ba6007_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..db63b682e2fc50570d118a3755d9e64285fb6fe5
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/hrnetv2_udp_rhd2d.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py
+ In Collection: UDP
+ Metadata:
+ Architecture:
+ - HRNetv2
+ - UDP
+ Training Data: RHD
+ Name: td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.902
+ EPE: 2.19
+ PCKh@0.7: 0.992
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/udp/hrnetv2_w18_rhd2d_256x256_udp-63ba6007_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..877247fe86ff2ec59a296f5cb45d88b392533135
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.md
@@ -0,0 +1,40 @@
+
+
+
+MobilenetV2 (CVPR'2018)
+
+```bibtex
+@inproceedings{sandler2018mobilenetv2,
+ title={Mobilenetv2: Inverted residuals and linear bottlenecks},
+ author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={4510--4520},
+ year={2018}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_mobilenet_v2](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.985 | 0.883 | 2.79 | [ckpt](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256-85fa02db_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..202a636fbe70e59ff71b6f225273242162c400cc
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/mobilenetv2_rhd2d.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - MobilenetV2
+ Training Data: RHD
+ Name: td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.883
+ EPE: 2.79
+ PCK@0.2: 0.985
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/mobilenetv2/mobilenetv2_rhd2d_256x256-85fa02db_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..f103a0df40e4ec57223a752e2c833f42b6909ae8
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.md
@@ -0,0 +1,57 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [pose_resnet50](/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.991 | 0.898 | 2.32 | [ckpt](https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256-5dc7e4cc_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d09f8ba2685b86fb1326c0460b7eb6fc83dd95fa
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/resnet_rhd2d.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture:
+ - SimpleBaseline2D
+ - ResNet
+ Training Data: RHD
+ Name: td-hm_res50_8xb64-210e_rhd2d-256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.898
+ EPE: 2.32
+ PCK@0.2: 0.991
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/resnet/res50_rhd2d_256x256-5dc7e4cc_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a9bcc9b896ae499e034605209f1c7eb14ba7b39
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_8xb64-210e_rhd2d-256x256.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='open-mmlab://msra/hrnetv2_w18',
+ )),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'Rhd2DDataset'
+data_mode = 'topdown'
+data_root = 'data/rhd/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..44b8dc0f5a1c55d10293c40e5b8314fca6aa9b9c
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_dark-8xb64-210e_rhd2d-256x256.py
@@ -0,0 +1,162 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(256, 256),
+ heatmap_size=(64, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='open-mmlab://msra/hrnetv2_w18',
+ )),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'Rhd2DDataset'
+data_mode = 'topdown'
+data_root = 'data/rhd/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1c796234dd22760f6f52dcb97d05ad0410ceabb
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_hrnetv2-w18_udp-8xb64-210e_rhd2d-256x256.py
@@ -0,0 +1,158 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(18, 36)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(18, 36, 72)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(18, 36, 72, 144),
+ multiscale_output=True),
+ upsample=dict(mode='bilinear', align_corners=False)),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='open-mmlab://msra/hrnetv2_w18',
+ )),
+ neck=dict(
+ type='FeatureMapProcessor',
+ concat=True,
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=270,
+ out_channels=21,
+ deconv_out_channels=None,
+ conv_out_channels=(270, ),
+ conv_kernel_sizes=(1, ),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'Rhd2DDataset'
+data_mode = 'topdown'
+data_root = 'data/rhd/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..d7176bacd73cad44295c84ae8f4b9b1d1201bf35
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_mobilenetv2_8xb64-210e_rhd2d-256x256.py
@@ -0,0 +1,125 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='MobileNetV2',
+ widen_factor=1.,
+ out_indices=(7, ),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='mmcls://mobilenet_v2',
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1280,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'Rhd2DDataset'
+data_mode = 'topdown'
+data_root = 'data/rhd/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..da5556802891a6d742527e7889d0f31161223eef
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_heatmap/rhd2d/td-hm_res50_8xb64-210e_rhd2d-256x256.py
@@ -0,0 +1,124 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(256, 256), heatmap_size=(64, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='torchvision://resnet50',
+ )),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=21,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'Rhd2DDataset'
+data_mode = 'topdown'
+data_root = 'data/rhd/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/README.md b/mmpose/configs/hand_2d_keypoint/topdown_regression/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..0210a89c2de35fc5f0a480662aa216e61ac9c623
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/README.md
@@ -0,0 +1,25 @@
+# Top-down regression-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. At the 2nd stage, regression based methods directly regress the keypoint coordinates given the features extracted from the bounding box area, following the paradigm introduced in [Deeppose: Human pose estimation via deep neural networks](http://openaccess.thecvf.com/content_cvpr_2014/html/Toshev_DeepPose_Human_Pose_2014_CVPR_paper.html).
+
+
+

+
+
+## Results and Models
+
+### OneHand10K Dataset
+
+Results on OneHand10K val set
+
+| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download |
+| :-------: | :--------: | :-----: | :---: | :---: | :-------------------------------------------------------: |
+| ResNet-50 | 256x256 | 0.990 | 0.485 | 34.21 | [resnet_onehand10k.md](./onehand10k/resnet_onehand10k.md) |
+
+### RHD Dataset
+
+Results on RHD test set
+
+| Model | Input Size | PCK@0.2 | AUC | EPE | Details and Download |
+| :-------: | :--------: | :-----: | :---: | :--: | :----------------------------------------: |
+| ResNet-50 | 256x256 | 0.988 | 0.865 | 3.32 | [resnet_rhd2d.md](./rhd2d/resnet_rhd2d.md) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.md b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.md
new file mode 100644
index 0000000000000000000000000000000000000000..40c0c184959466df639518450dcf17aa0b60ba30
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.md
@@ -0,0 +1,59 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+OneHand10K (TCSVT'2019)
+
+```bibtex
+@article{wang2018mask,
+ title={Mask-pose cascaded cnn for 2d hand pose estimation from single color image},
+ author={Wang, Yangang and Peng, Cong and Liu, Yebin},
+ journal={IEEE Transactions on Circuits and Systems for Video Technology},
+ volume={29},
+ number={11},
+ pages={3258--3268},
+ year={2018},
+ publisher={IEEE}
+}
+```
+
+
+
+Results on OneHand10K val set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :---: | :--------------------------------------------------------: | :-------------------------------------------------------: |
+| [deeppose_resnet_50](/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py) | 256x256 | 0.990 | 0.485 | 34.21 | [ckpt](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256-cbddf43a_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.yml b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d5e9d8122ecb0c2082b2d0b38413dee2dc2220aa
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/resnet_onehand10k.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ Training Data: OneHand10K
+ Name: td-reg_res50_8xb64-210e_onehand10k-256x256
+ Results:
+ - Dataset: OneHand10K
+ Metrics:
+ AUC: 0.485
+ EPE: 34.21
+ PCK@0.2: 0.99
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_onehand10k_256x256-cbddf43a_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee1556d45e18e3253f421948d1affd4eabfe673f
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/onehand10k/td-reg_res50_8xb64-210e_onehand10k-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=21,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'OneHand10KDataset'
+data_mode = 'topdown'
+data_root = 'data/onehand10k/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/onehand10k_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.md b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.md
new file mode 100644
index 0000000000000000000000000000000000000000..6cca5580ba7fcb0fa0f089159b0c96007c5ce90f
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.md
@@ -0,0 +1,57 @@
+
+
+
+DeepPose (CVPR'2014)
+
+```bibtex
+@inproceedings{toshev2014deeppose,
+ title={Deeppose: Human pose estimation via deep neural networks},
+ author={Toshev, Alexander and Szegedy, Christian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={1653--1660},
+ year={2014}
+}
+```
+
+
+
+
+
+
+ResNet (CVPR'2016)
+
+```bibtex
+@inproceedings{he2016deep,
+ title={Deep residual learning for image recognition},
+ author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={770--778},
+ year={2016}
+}
+```
+
+
+
+
+
+
+RHD (ICCV'2017)
+
+```bibtex
+@TechReport{zb2017hand,
+ author={Christian Zimmermann and Thomas Brox},
+ title={Learning to Estimate 3D Hand Pose from Single RGB Images},
+ institution={arXiv:1705.01389},
+ year={2017},
+ note="https://arxiv.org/abs/1705.01389",
+ url="https://lmb.informatik.uni-freiburg.de/projects/hand3d/"
+}
+```
+
+
+
+Results on RHD test set
+
+| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
+| :--------------------------------------------------------- | :--------: | :-----: | :---: | :--: | :--------------------------------------------------------: | :--------------------------------------------------------: |
+| [deeppose_resnet_50](/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py) | 256x256 | 0.988 | 0.865 | 3.32 | [ckpt](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256-37f1c4d3_20210330.pth) | [log](https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256_20210330.log.json) |
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.yml b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.yml
new file mode 100644
index 0000000000000000000000000000000000000000..3d0a920c5daa2eb1eaab3bea7c5c77529d72f377
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/resnet_rhd2d.yml
@@ -0,0 +1,17 @@
+Models:
+- Config: configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py
+ In Collection: DeepPose
+ Metadata:
+ Architecture:
+ - DeepPose
+ - ResNet
+ Training Data: RHD
+ Name: td-reg_res50_8xb64-210e_rhd2d-256x256
+ Results:
+ - Dataset: RHD
+ Metrics:
+ AUC: 0.865
+ EPE: 3.32
+ PCK@0.2: 0.988
+ Task: Hand 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/hand/deeppose/deeppose_res50_rhd2d_256x256-37f1c4d3_20210330.pth
diff --git a/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py
new file mode 100644
index 0000000000000000000000000000000000000000..a350c24bfe2d3d7246ed63c78f737414ee5f247e
--- /dev/null
+++ b/mmpose/configs/hand_2d_keypoint/topdown_regression/rhd2d/td-reg_res50_8xb64-210e_rhd2d-256x256.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(checkpoint=dict(save_best='AUC', rule='greater'))
+
+# codec settings
+codec = dict(type='RegressionLabel', input_size=(256, 256))
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ neck=dict(type='GlobalAveragePooling'),
+ head=dict(
+ type='RegressionHead',
+ in_channels=2048,
+ num_joints=21,
+ loss=dict(type='SmoothL1Loss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'Rhd2DDataset'
+data_mode = 'topdown'
+data_root = 'data/rhd/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(
+ type='RandomBBoxTransform', rotate_factor=180,
+ scale_factor=(0.7, 1.3)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_train.json',
+ data_prefix=dict(img=''),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/rhd_test.json',
+ data_prefix=dict(img=''),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# evaluators
+val_evaluator = [
+ dict(type='PCKAccuracy', thr=0.2),
+ dict(type='AUC'),
+ dict(type='EPE'),
+]
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/hand_3d_keypoint/README.md b/mmpose/configs/hand_3d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..330319f42b186afec3a7c73afd627505e91c7dc8
--- /dev/null
+++ b/mmpose/configs/hand_3d_keypoint/README.md
@@ -0,0 +1,7 @@
+# 3D Hand Pose Estimation
+
+3D hand pose estimation is defined as the task of detecting the poses (or keypoints) of the hand from an input image.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/3d_hand_keypoint.md) to prepare data.
diff --git a/mmpose/configs/hand_gesture/README.md b/mmpose/configs/hand_gesture/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..7cc5bb323b05823b8eeedfb905756ce84c87c8ac
--- /dev/null
+++ b/mmpose/configs/hand_gesture/README.md
@@ -0,0 +1,13 @@
+# Gesture Recognition
+
+Gesture recognition aims to recognize the hand gestures in the video, such as thumbs up.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_hand_gesture.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/en/gesture_recognition_demo.md) to run the demo.
+
+
diff --git a/mmpose/configs/wholebody_2d_keypoint/README.md b/mmpose/configs/wholebody_2d_keypoint/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..362a6a89764acec6db1a4ef8216352c1fbbe697e
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/README.md
@@ -0,0 +1,19 @@
+# 2D Human Whole-Body Pose Estimation
+
+2D human whole-body pose estimation aims to localize dense landmarks on the entire human body including face, hands, body, and feet.
+
+Existing approaches can be categorized into top-down and bottom-up approaches.
+
+Top-down methods divide the task into two stages: human detection and whole-body pose estimation. They perform human detection first, followed by single-person whole-body pose estimation given human bounding boxes.
+
+Bottom-up approaches (e.g. AE) first detect all the whole-body keypoints and then group/associate them into person instances.
+
+## Data preparation
+
+Please follow [DATA Preparation](/docs/en/dataset_zoo/2d_wholebody_keypoint.md) to prepare data.
+
+## Demo
+
+Please follow [Demo](/demo/docs/en/2d_wholebody_pose_demo.md) to run demos.
+
+
diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/README.md b/mmpose/configs/wholebody_2d_keypoint/rtmpose/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..47e488567ca002b499c3349e8860ef9c1e398cae
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/README.md
@@ -0,0 +1,18 @@
+# RTMPose
+
+Recent studies on 2D pose estimation have achieved excellent performance on public benchmarks, yet its application in the industrial community still suffers from heavy model parameters and high latency.
+In order to bridge this gap, we empirically study five aspects that affect the performance of multi-person pose estimation algorithms: paradigm, backbone network, localization algorithm, training strategy, and deployment inference, and present a high-performance real-time multi-person pose estimation framework, **RTMPose**, based on MMPose.
+Our RTMPose-m achieves **75.8% AP** on COCO with **90+ FPS** on an Intel i7-11700 CPU and **430+ FPS** on an NVIDIA GTX 1660 Ti GPU, and RTMPose-l achieves **67.0% AP** on COCO-WholeBody with **130+ FPS**, outperforming existing open-source libraries.
+To further evaluate RTMPose's capability in critical real-time applications, we also report the performance after deploying on the mobile device.
+
+## Results and Models
+
+### COCO-WholeBody Dataset
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Model | Input Size | Whole AP | Whole AR | Details and Download |
+| :-------: | :--------: | :------: | :------: | :---------------------------------------------------------------------: |
+| RTMPose-m | 256x192 | 0.604 | 0.667 | [rtmpose_coco-wholebody.md](./coco-wholebody/rtmpose_coco-wholebody.md) |
+| RTMPose-l | 256x192 | 0.632 | 0.694 | [rtmpose_coco-wholebody.md](./coco-wholebody/rtmpose_coco-wholebody.md) |
+| RTMPose-l | 384x288 | 0.670 | 0.723 | [rtmpose_coco-wholebody.md](./coco-wholebody/rtmpose_coco-wholebody.md) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..a926fe38d7d75c31cdebe4d350b07832670d1218
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
@@ -0,0 +1,231 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 270
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(288, 384),
+ sigma=(6., 6.93),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=133,
+ input_size=codec['input_size'],
+ in_featuremap_size=(9, 12),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..026336a550396aa0e65e212bbd93f54ca3a56a60
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
@@ -0,0 +1,231 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 270
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-l_udp-aic-coco_210e-256x192-273b7631_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=1024,
+ out_channels=133,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a834b830150d8de343d1c1e4a81734d3541a671
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
@@ -0,0 +1,231 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 270
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='SimCCLabel',
+ input_size=(192, 256),
+ sigma=(4.9, 5.66),
+ simcc_split_ratio=2.0,
+ normalize=False,
+ use_dark=False)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmpose/v1/projects/'
+ 'rtmposev1/cspnext-m_udp-aic-coco_210e-256x192-f2f7d6f6_20230130.pth' # noqa
+ )),
+ head=dict(
+ type='RTMCCHead',
+ in_channels=768,
+ out_channels=133,
+ input_size=codec['input_size'],
+ in_featuremap_size=(6, 8),
+ simcc_split_ratio=codec['simcc_split_ratio'],
+ final_layer_kernel_size=7,
+ gau_cfg=dict(
+ hidden_dims=256,
+ s=128,
+ expansion_factor=2,
+ dropout_rate=0.,
+ drop_path=0.,
+ act_fn='SiLU',
+ use_rel_bias=False,
+ pos_enc=False),
+ loss=dict(
+ type='KLDiscretLoss',
+ use_target_weight=True,
+ beta=10.,
+ label_softmax=True),
+ decoder=codec),
+ test_cfg=dict(flip_test=True, ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..bdf327d631e07c18e63183996df7e6fa51a9b6b1
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.md
@@ -0,0 +1,62 @@
+
+
+
+RTMPose (arXiv'2023)
+
+```bibtex
+@misc{https://doi.org/10.48550/arxiv.2303.07399,
+ doi = {10.48550/ARXIV.2303.07399},
+ url = {https://arxiv.org/abs/2303.07399},
+ author = {Jiang, Tao and Lu, Peng and Zhang, Li and Ma, Ningsheng and Han, Rui and Lyu, Chengqi and Li, Yining and Chen, Kai},
+ keywords = {Computer Vision and Pattern Recognition (cs.CV), FOS: Computer and information sciences, FOS: Computer and information sciences},
+ title = {RTMPose: Real-Time Multi-Person Pose Estimation based on MMPose},
+ publisher = {arXiv},
+ year = {2023},
+ copyright = {Creative Commons Attribution 4.0 International}
+}
+
+```
+
+
+
+
+
+
+RTMDet (arXiv'2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: |
+| [rtmpose-m](/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 0.697 | 0.743 | 0.660 | 0.749 | 0.822 | 0.858 | 0.483 | 0.564 | 0.604 | 0.667 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.json) |
+| [rtmpose-l](/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py) | 256x192 | 0.721 | 0.764 | 0.693 | 0.780 | 0.844 | 0.876 | 0.523 | 0.600 | 0.632 | 0.694 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.json) |
+| [rtmpose-l](/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py) | 384x288 | 0.736 | 0.776 | 0.738 | 0.810 | 0.895 | 0.918 | 0.591 | 0.659 | 0.670 | 0.723 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.json) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..19a3cd0cec54115b463c26a338de7354cf4dd65d
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose_coco-wholebody.yml
@@ -0,0 +1,66 @@
+Models:
+- Config: configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-m_8xb64-270e_coco-wholebody-256x192.py
+ In Collection: RTMPose
+ Alias: wholebody
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: COCO-WholeBody
+ Name: rtmpose-m_8xb64-270e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.697
+ Body AR: 0.743
+ Face AP: 0.822
+ Face AR: 0.858
+ Foot AP: 0.66
+ Foot AR: 0.749
+ Hand AP: 0.483
+ Hand AR: 0.564
+ Whole AP: 0.604
+ Whole AR: 0.667
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-m_simcc-coco-wholebody_pt-aic-coco_270e-256x192-cd5e845c_20230123.pth
+- Config: configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb64-270e_coco-wholebody-256x192.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: rtmpose-l_8xb64-270e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.721
+ Body AR: 0.764
+ Face AP: 0.844
+ Face AR: 0.876
+ Foot AP: 0.693
+ Foot AR: 0.78
+ Hand AP: 0.523
+ Hand AR: 0.6
+ Whole AP: 0.632
+ Whole AR: 0.694
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-256x192-6f206314_20230124.pth
+- Config: configs/wholebody_2d_keypoint/rtmpose/coco-wholebody/rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
+ In Collection: RTMPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: rtmpose-l_8xb32-270e_coco-wholebody-384x288.py
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.736
+ Body AR: 0.776
+ Face AP: 0.895
+ Face AR: 0.918
+ Foot AP: 0.738
+ Foot AR: 0.81
+ Hand AP: 0.591
+ Hand AR: 0.659
+ Whole AP: 0.67
+ Whole AR: 0.723
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/rtmpose-l_simcc-coco-wholebody_pt-aic-coco_270e-384x288-eaeb96c8_20230125.pth
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/README.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..23ee1ed315956e31ddf1637049032db767e8006c
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/README.md
@@ -0,0 +1,26 @@
+# Top-down heatmap-based pose estimation
+
+Top-down methods divide the task into two stages: object detection, followed by single-object pose estimation given object bounding boxes. Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the likelihood of being a keypoint, following the paradigm introduced in [Simple Baselines for Human Pose Estimation and Tracking](http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html).
+
+
+

+
+
+## Results and Models
+
+### COCO-WholeBody Dataset
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Model | Input Size | Whole AP | Whole AR | Details and Download |
+| :-----------------: | :--------: | :------: | :------: | :-----------------------------------------------------------------------------: |
+| HRNet-w48+Dark+ | 384x288 | 0.661 | 0.743 | [hrnet_dark_coco-wholebody.md](./coco-wholebody/hrnet_dark_coco-wholebody.md) |
+| HRNet-w32+Dark | 256x192 | 0.582 | 0.671 | [hrnet_dark_coco-wholebody.md](./coco-wholebody/hrnet_dark_coco-wholebody.md) |
+| HRNet-w48 | 256x192 | 0.579 | 0.681 | [hrnet_coco-wholebody.md](./coco-wholebody/hrnet_coco-wholebody.md) |
+| CSPNeXt-m | 256x192 | 0.567 | 0.641 | [cspnext_udp_coco-wholebody.md](./coco-wholebody/cspnext_udp_coco-wholebody.md) |
+| ResNet-152 | 256x192 | 0.548 | 0.661 | [resnet_coco-wholebody.md](./coco-wholebody/resnet_coco-wholebody.md) |
+| HRNet-w32 | 256x192 | 0.536 | 0.636 | [hrnet_coco-wholebody.md](./coco-wholebody/hrnet_coco-wholebody.md) |
+| ResNet-101 | 256x192 | 0.531 | 0.645 | [resnet_coco-wholebody.md](./coco-wholebody/resnet_coco-wholebody.md) |
+| S-ViPNAS-Res50+Dark | 256x192 | 0.528 | 0.632 | [vipnas_dark_coco-wholebody.md](./coco-wholebody/vipnas_dark_coco-wholebody.md) |
+| ResNet-50 | 256x192 | 0.521 | 0.633 | [resnet_coco-wholebody.md](./coco-wholebody/resnet_coco-wholebody.md) |
+| S-ViPNAS-Res50 | 256x192 | 0.495 | 0.607 | [vipnas_coco-wholebody.md](./coco-wholebody/vipnas_coco-wholebody.md) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-l_udp_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-l_udp_8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2112e19e7688be70ee6afd3085d0070e7d68d0a3
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-l_udp_8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,213 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=1.,
+ widen_factor=1.,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=1024,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfcb5c3917bb8fd0d60057a5129e0998cb37a672
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,213 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+max_epochs = 210
+stage2_num_epochs = 30
+base_lr = 4e-3
+
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+randomness = dict(seed=21)
+
+# optimizer
+optim_wrapper = dict(
+ type='OptimWrapper',
+ optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+ paramwise_cfg=dict(
+ norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+ dict(
+ type='LinearLR',
+ start_factor=1.0e-5,
+ by_epoch=False,
+ begin=0,
+ end=1000),
+ dict(
+ # use cosine lr from 150 to 300 epoch
+ type='CosineAnnealingLR',
+ eta_min=base_lr * 0.05,
+ begin=max_epochs // 2,
+ end=max_epochs,
+ T_max=max_epochs // 2,
+ by_epoch=True,
+ convert_to_iter_based=True),
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# codec settings
+codec = dict(
+ type='UDPHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ _scope_='mmdet',
+ type='CSPNeXt',
+ arch='P5',
+ expand_ratio=0.5,
+ deepen_factor=0.67,
+ widen_factor=0.75,
+ out_indices=(4, ),
+ channel_attention=True,
+ norm_cfg=dict(type='SyncBN'),
+ act_cfg=dict(type='SiLU'),
+ init_cfg=dict(
+ type='Pretrained',
+ prefix='backbone.',
+ checkpoint='https://download.openmmlab.com/mmdetection/v3.0/'
+ 'rtmdet/cspnext_rsb_pretrain/'
+ 'cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth')),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=768,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=False,
+ flip_mode='heatmap',
+ shift_heatmap=False,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+backend_args = dict(backend='local')
+# backend_args = dict(
+# backend='petrel',
+# path_mapping=dict({
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/',
+# f'{data_root}': 's3://openmmlab/datasets/detection/coco/'
+# }))
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform', scale_factor=[0.6, 1.4], rotate_factor=80),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=1.0),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+train_pipeline_stage2 = [
+ dict(type='LoadImage', backend_args=backend_args),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ shift_factor=0.,
+ scale_factor=[0.75, 1.25],
+ rotate_factor=60),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='mmdet.YOLOXHSVRandomAug'),
+ dict(
+ type='Albumentation',
+ transforms=[
+ dict(type='Blur', p=0.1),
+ dict(type='MedianBlur', p=0.1),
+ dict(
+ type='CoarseDropout',
+ max_holes=1,
+ max_height=0.4,
+ max_width=0.4,
+ min_holes=1,
+ min_height=0.2,
+ min_width=0.2,
+ p=0.5),
+ ]),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=10,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=10,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(
+ save_best='coco-wholebody/AP', rule='greater', max_keep_ckpts=1))
+
+custom_hooks = [
+ dict(
+ type='EMAHook',
+ ema_type='ExpMomentumEMA',
+ momentum=0.0002,
+ update_buffers=True,
+ priority=49),
+ dict(
+ type='mmdet.PipelineSwitchHook',
+ switch_epoch=max_epochs - stage2_num_epochs,
+ switch_pipeline=train_pipeline_stage2)
+]
+
+# evaluators
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..1fc4a78dfbda287d33c6edd16d9d36944992f365
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.md
@@ -0,0 +1,56 @@
+
+
+
+RTMDet (ArXiv 2022)
+
+```bibtex
+@misc{lyu2022rtmdet,
+ title={RTMDet: An Empirical Study of Designing Real-Time Object Detectors},
+ author={Chengqi Lyu and Wenwei Zhang and Haian Huang and Yue Zhou and Yudong Wang and Yanyi Liu and Shilong Zhang and Kai Chen},
+ year={2022},
+ eprint={2212.07784},
+ archivePrefix={arXiv},
+ primaryClass={cs.CV}
+}
+```
+
+
+
+
+
+
+UDP (CVPR'2020)
+
+```bibtex
+@InProceedings{Huang_2020_CVPR,
+ author = {Huang, Junjie and Zhu, Zheng and Guo, Feng and Huang, Guan},
+ title = {The Devil Is in the Details: Delving Into Unbiased Data Processing for Human Pose Estimation},
+ booktitle = {The IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+ month = {June},
+ year = {2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: |
+| [pose_cspnext_m_udp](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.687 | 0.735 | 0.680 | 0.763 | 0.697 | 0.755 | 0.460 | 0.543 | 0.567 | 0.641 | [ckpt](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco-wholebody_pt-in1k_210e-256x192-320fa258_20230123.pth) | [log](https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco-wholebody_pt-in1k_210e-256x192-320fa258_20230123.json) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ebdcc7146ef7969da2fdd26926c92268a9abae90
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext_udp_coco-wholebody.yml
@@ -0,0 +1,24 @@
+Models:
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/cspnext-m_udp_8xb64-210e_coco-wholebody-256x192.py
+ In Collection: UDP
+ Metadata:
+ Architecture: &id001
+ - UDP
+ - CSPNeXt
+ Training Data: COCO-WholeBody
+ Name: cspnext-m_udp_8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.687
+ Body AR: 0.735
+ Face AP: 0.697
+ Face AR: 0.755
+ Foot AP: 0.680
+ Foot AR: 0.763
+ Hand AP: 0.46
+ Hand AR: 0.567
+ Whole AP: 0.567
+ Whole AR: 0.641
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/cspnext-m_udp-coco-wholebody_pt-in1k_210e-256x192-320fa258_20230123.pth
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..53f240bc528f9c81e65620fb33dbc4546519001c
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.md
@@ -0,0 +1,41 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: |
+| [pose_hrnet_w32](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.678 | 0.755 | 0.543 | 0.661 | 0.630 | 0.708 | 0.467 | 0.566 | 0.536 | 0.636 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192-853765cd_20200918.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_20200918.log.json) |
+| [pose_hrnet_w32](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py) | 384x288 | 0.700 | 0.772 | 0.585 | 0.691 | 0.726 | 0.783 | 0.515 | 0.603 | 0.586 | 0.673 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288-78cacac3_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288_20200922.log.json) |
+| [pose_hrnet_w48](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py) | 256x192 | 0.701 | 0.776 | 0.675 | 0.787 | 0.656 | 0.743 | 0.535 | 0.639 | 0.579 | 0.681 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192-643e18cb_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192_20200922.log.json) |
+| [pose_hrnet_w48](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.722 | 0.791 | 0.696 | 0.801 | 0.776 | 0.834 | 0.587 | 0.678 | 0.632 | 0.717 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288-6e061c6a_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_20200922.log.json) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..929bd0535671b25499624cf41487008d4be27ab2
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_coco-wholebody.yml
@@ -0,0 +1,86 @@
+Models:
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ Training Data: COCO-WholeBody
+ Name: td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.678
+ Body AR: 0.755
+ Face AP: 0.630
+ Face AR: 0.708
+ Foot AP: 0.543
+ Foot AR: 0.661
+ Hand AP: 0.467
+ Hand AR: 0.566
+ Whole AP: 0.536
+ Whole AR: 0.636
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192-853765cd_20200918.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.700
+ Body AR: 0.772
+ Face AP: 0.726
+ Face AR: 0.783
+ Foot AP: 0.585
+ Foot AR: 0.691
+ Hand AP: 0.515
+ Hand AR: 0.603
+ Whole AP: 0.586
+ Whole AR: 0.673
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_384x288-78cacac3_20200922.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.701
+ Body AR: 0.776
+ Face AP: 0.656
+ Face AR: 0.743
+ Foot AP: 0.675
+ Foot AR: 0.787
+ Hand AP: 0.535
+ Hand AR: 0.639
+ Whole AP: 0.579
+ Whole AR: 0.681
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_256x192-643e18cb_20200922.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py
+ In Collection: HRNet
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.722
+ Body AR: 0.791
+ Face AP: 0.776
+ Face AR: 0.834
+ Foot AP: 0.696
+ Foot AR: 0.801
+ Hand AP: 0.587
+ Hand AR: 0.678
+ Whole AP: 0.632
+ Whole AR: 0.717
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288-6e061c6a_20200922.pth
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..b215b3c5f25b595ca30ae54b743c907a53629084
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.md
@@ -0,0 +1,58 @@
+
+
+
+HRNet (CVPR'2019)
+
+```bibtex
+@inproceedings{sun2019deep,
+ title={Deep high-resolution representation learning for human pose estimation},
+ author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ pages={5693--5703},
+ year={2019}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: |
+| [pose_hrnet_w32_dark](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.693 | 0.764 | 0.564 | 0.674 | 0.737 | 0.809 | 0.503 | 0.602 | 0.582 | 0.671 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark-469327ef_20200922.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark_20200922.log.json) |
+| [pose_hrnet_w48_dark+](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.742 | 0.807 | 0.707 | 0.806 | 0.841 | 0.892 | 0.602 | 0.694 | 0.661 | 0.743 | [ckpt](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth) | [log](https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark_20200918.log.json) |
+
+Note: `+` means the model is first pre-trained on original COCO dataset, and then fine-tuned on COCO-WholeBody dataset. We find this will lead to better performance.
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..d0e2bd69542be8c482aee9498b9369e4151440c8
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/hrnet_dark_coco-wholebody.yml
@@ -0,0 +1,45 @@
+Models:
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: &id001
+ - HRNet
+ - DarkPose
+ Training Data: COCO-WholeBody
+ Name: td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.693
+ Body AR: 0.764
+ Face AP: 0.737
+ Face AR: 0.809
+ Foot AP: 0.564
+ Foot AR: 0.674
+ Hand AP: 0.503
+ Hand AR: 0.602
+ Whole AP: 0.582
+ Whole AR: 0.671
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_wholebody_256x192_dark-469327ef_20200922.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py
+ In Collection: DarkPose
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.742
+ Body AR: 0.807
+ Face AP: 0.841
+ Face AR: 0.892
+ Foot AP: 0.707
+ Foot AR: 0.806
+ Hand AP: 0.602
+ Hand AR: 0.694
+ Whole AP: 0.661
+ Whole AR: 0.743
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..e4a189833b6b731d0efe5a5e6b9426c9a78ce1b3
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.md
@@ -0,0 +1,43 @@
+
+
+
+SimpleBaseline2D (ECCV'2018)
+
+```bibtex
+@inproceedings{xiao2018simple,
+ title={Simple baselines for human pose estimation and tracking},
+ author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
+ booktitle={Proceedings of the European conference on computer vision (ECCV)},
+ pages={466--481},
+ year={2018}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: |
+| [pose_resnet_50](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.652 | 0.738 | 0.615 | 0.749 | 0.606 | 0.715 | 0.460 | 0.584 | 0.521 | 0.633 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192-9e37ed88_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192_20201004.log.json) |
+| [pose_resnet_50](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py) | 384x288 | 0.666 | 0.747 | 0.634 | 0.763 | 0.731 | 0.811 | 0.536 | 0.646 | 0.574 | 0.670 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288-ce11e294_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288_20201004.log.json) |
+| [pose_resnet_101](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py) | 256x192 | 0.669 | 0.753 | 0.637 | 0.766 | 0.611 | 0.722 | 0.463 | 0.589 | 0.531 | 0.645 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192-7325f982_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192_20201004.log.json) |
+| [pose_resnet_101](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.692 | 0.770 | 0.680 | 0.799 | 0.746 | 0.820 | 0.548 | 0.657 | 0.597 | 0.693 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288-6c137b9a_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288_20201004.log.json) |
+| [pose_resnet_152](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py) | 256x192 | 0.682 | 0.764 | 0.661 | 0.787 | 0.623 | 0.728 | 0.481 | 0.607 | 0.548 | 0.661 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192-5de8ae23_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192_20201004.log.json) |
+| [pose_resnet_152](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py) | 384x288 | 0.704 | 0.780 | 0.693 | 0.813 | 0.751 | 0.824 | 0.559 | 0.666 | 0.610 | 0.705 | [ckpt](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288-eab8caa8_20201004.pth) | [log](https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288_20201004.log.json) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0e8db24f6acb4166776e7cbf4ba2109b1f2a28a3
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/resnet_coco-wholebody.yml
@@ -0,0 +1,128 @@
+Models:
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: &id001
+ - SimpleBaseline2D
+ Training Data: COCO-WholeBody
+ Name: td-hm_res50_8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.652
+ Body AR: 0.738
+ Face AP: 0.606
+ Face AR: 0.715
+ Foot AP: 0.615
+ Foot AR: 0.749
+ Hand AP: 0.46
+ Hand AR: 0.584
+ Whole AP: 0.521
+ Whole AR: 0.633
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_256x192-9e37ed88_20201004.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_res50_8xb64-210e_coco-wholebody-384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.666
+ Body AR: 0.747
+ Face AP: 0.731
+ Face AR: 0.811
+ Foot AP: 0.634
+ Foot AR: 0.763
+ Hand AP: 0.536
+ Hand AR: 0.646
+ Whole AP: 0.574
+ Whole AR: 0.67
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res50_coco_wholebody_384x288-ce11e294_20201004.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_res101_8xb32-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.669
+ Body AR: 0.753
+ Face AP: 0.611
+ Face AR: 0.722
+ Foot AP: 0.637
+ Foot AR: 0.766
+ Hand AP: 0.463
+ Hand AR: 0.589
+ Whole AP: 0.531
+ Whole AR: 0.645
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_256x192-7325f982_20201004.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_res101_8xb32-210e_coco-wholebody-384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.692
+ Body AR: 0.77
+ Face AP: 0.746
+ Face AR: 0.82
+ Foot AP: 0.68
+ Foot AR: 0.799
+ Hand AP: 0.548
+ Hand AR: 0.657
+ Whole AP: 0.598
+ Whole AR: 0.691
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res101_coco_wholebody_384x288-6c137b9a_20201004.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_res152_8xb32-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.682
+ Body AR: 0.764
+ Face AP: 0.623
+ Face AR: 0.728
+ Foot AP: 0.661
+ Foot AR: 0.787
+ Hand AP: 0.481
+ Hand AR: 0.607
+ Whole AP: 0.548
+ Whole AR: 0.661
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_256x192-5de8ae23_20201004.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py
+ In Collection: SimpleBaseline2D
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_res152_8xb32-210e_coco-wholebody-384x288
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.704
+ Body AR: 0.78
+ Face AP: 0.751
+ Face AR: 0.824
+ Foot AP: 0.693
+ Foot AR: 0.813
+ Hand AP: 0.559
+ Hand AR: 0.666
+ Whole AP: 0.61
+ Whole AR: 0.705
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/resnet/res152_coco_wholebody_384x288-eab8caa8_20201004.pth
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..2595e3fc13e6913a01af45fa8d7b9c6377511ddb
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=133,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..727fa9472ec9c446cb572e6c4fcd49976bf3916b
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_8xb64-210e_coco-wholebody-384x288.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=133,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffee1d1383e4757b79ed0ea4461c69d7b4247b15
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w32_dark-8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(32, 64)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(32, 64, 128)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(32, 64, 128, 256))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w32-36af842e.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=32,
+ out_channels=133,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..892b4b7936123840c3192e87491123e5f11b3f7f
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-256x192.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=133,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..d587dbc45bf2f90a3912e263d63d0dc64205298a
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_8xb32-210e_coco-wholebody-384x288.py
@@ -0,0 +1,150 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=133,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..63175b99ea3e604fb87e1e45ef921aee2e7a1b16
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py
@@ -0,0 +1,154 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(288, 384),
+ heatmap_size=(72, 96),
+ sigma=3,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='HRNet',
+ in_channels=3,
+ extra=dict(
+ stage1=dict(
+ num_modules=1,
+ num_branches=1,
+ block='BOTTLENECK',
+ num_blocks=(4, ),
+ num_channels=(64, )),
+ stage2=dict(
+ num_modules=1,
+ num_branches=2,
+ block='BASIC',
+ num_blocks=(4, 4),
+ num_channels=(48, 96)),
+ stage3=dict(
+ num_modules=4,
+ num_branches=3,
+ block='BASIC',
+ num_blocks=(4, 4, 4),
+ num_channels=(48, 96, 192)),
+ stage4=dict(
+ num_modules=3,
+ num_branches=4,
+ block='BASIC',
+ num_blocks=(4, 4, 4, 4),
+ num_channels=(48, 96, 192, 384))),
+ init_cfg=dict(
+ type='Pretrained',
+ checkpoint='https://download.openmmlab.com/mmpose/'
+ 'pretrain_models/hrnet_w48-8ef0771d.pth'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=48,
+ out_channels=133,
+ deconv_out_channels=None,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0d8187ab47b54d445d3f125da596f381c494309
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..42e98575fba714ab65f3f19f226b5c06c2898a93
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res101_8xb32-210e_coco-wholebody-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=256)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=101,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..10c16eb71f9ac28ea6746e85d51ed526dd035abe
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..43ec5fb67c23df4e5e3d1c93072c41e0d08b88a6
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res152_8xb32-210e_coco-wholebody-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=152,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet152'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..e568c78b175bf3cc3364235c671d04944d84c53f
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py
new file mode 100644
index 0000000000000000000000000000000000000000..6869d17ba998b7918133eefcf98fc3344e729a26
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_res50_8xb64-210e_coco-wholebody-384x288.py
@@ -0,0 +1,121 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(288, 384), heatmap_size=(72, 96), sigma=3)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ResNet',
+ depth=50,
+ init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
+ ),
+ head=dict(
+ type='HeatmapHead',
+ in_channels=2048,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(type='RandomBBoxTransform'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..cad9c539bef73cce6fc8e48e9d91489ea9f72270
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,122 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ head=dict(
+ type='ViPNASHead',
+ in_channels=160,
+ out_channels=133,
+ deconv_out_channels=(160, 160, 160),
+ deconv_num_groups=(160, 160, 160),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..d34ea50db64b6a2716469ebf872045b9308fc413
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,126 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(type='ViPNAS_MobileNetV3'),
+ head=dict(
+ type='ViPNASHead',
+ in_channels=160,
+ out_channels=133,
+ deconv_out_channels=(160, 160, 160),
+ deconv_num_groups=(160, 160, 160),
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..822e4c698a54a82a62fd30f6cc891f814d024930
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,123 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap', input_size=(192, 256), heatmap_size=(48, 64), sigma=2)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ViPNAS_ResNet',
+ depth=50,
+ ),
+ head=dict(
+ type='ViPNASHead',
+ in_channels=608,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py
new file mode 100644
index 0000000000000000000000000000000000000000..15b152fe96d3d60806c3461a04a0a4b5c66b3c96
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py
@@ -0,0 +1,127 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+# runtime
+train_cfg = dict(max_epochs=210, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(optimizer=dict(
+ type='Adam',
+ lr=5e-4,
+))
+
+# learning policy
+param_scheduler = [
+ dict(
+ type='LinearLR', begin=0, end=500, start_factor=0.001,
+ by_epoch=False), # warm-up
+ dict(
+ type='MultiStepLR',
+ begin=0,
+ end=210,
+ milestones=[170, 200],
+ gamma=0.1,
+ by_epoch=True)
+]
+
+# automatically scaling LR based on the actual training batch size
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+ checkpoint=dict(save_best='coco-wholebody/AP', rule='greater'))
+
+# codec settings
+codec = dict(
+ type='MSRAHeatmap',
+ input_size=(192, 256),
+ heatmap_size=(48, 64),
+ sigma=2,
+ unbiased=True)
+
+# model settings
+model = dict(
+ type='TopdownPoseEstimator',
+ data_preprocessor=dict(
+ type='PoseDataPreprocessor',
+ mean=[123.675, 116.28, 103.53],
+ std=[58.395, 57.12, 57.375],
+ bgr_to_rgb=True),
+ backbone=dict(
+ type='ViPNAS_ResNet',
+ depth=50,
+ ),
+ head=dict(
+ type='ViPNASHead',
+ in_channels=608,
+ out_channels=133,
+ loss=dict(type='KeypointMSELoss', use_target_weight=True),
+ decoder=codec),
+ test_cfg=dict(
+ flip_test=True,
+ flip_mode='heatmap',
+ shift_heatmap=True,
+ ))
+
+# base dataset settings
+dataset_type = 'CocoWholeBodyDataset'
+data_mode = 'topdown'
+data_root = 'data/coco/'
+
+# pipelines
+train_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='RandomFlip', direction='horizontal'),
+ dict(type='RandomHalfBody'),
+ dict(
+ type='RandomBBoxTransform',
+ rotate_factor=60,
+ scale_factor=(0.75, 1.25)),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='GenerateTarget', encoder=codec),
+ dict(type='PackPoseInputs')
+]
+val_pipeline = [
+ dict(type='LoadImage'),
+ dict(type='GetBBoxCenterScale'),
+ dict(type='TopdownAffine', input_size=codec['input_size']),
+ dict(type='PackPoseInputs')
+]
+
+# data loaders
+train_dataloader = dict(
+ batch_size=64,
+ num_workers=2,
+ persistent_workers=True,
+ sampler=dict(type='DefaultSampler', shuffle=True),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_train_v1.0.json',
+ data_prefix=dict(img='train2017/'),
+ pipeline=train_pipeline,
+ ))
+val_dataloader = dict(
+ batch_size=32,
+ num_workers=2,
+ persistent_workers=True,
+ drop_last=False,
+ sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+ dataset=dict(
+ type=dataset_type,
+ data_root=data_root,
+ data_mode=data_mode,
+ ann_file='annotations/coco_wholebody_val_v1.0.json',
+ data_prefix=dict(img='val2017/'),
+ test_mode=True,
+ bbox_file='data/coco/person_detection_results/'
+ 'COCO_val2017_detections_AP_H_56_person.json',
+ pipeline=val_pipeline,
+ ))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+ type='CocoWholeBodyMetric',
+ ann_file=data_root + 'annotations/coco_wholebody_val_v1.0.json')
+test_evaluator = val_evaluator
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..63fc0aed8af576808a7c6ee7dac89c3235d2ebfc
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.md
@@ -0,0 +1,38 @@
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: |
+| [S-ViPNAS-MobileNetV3](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.619 | 0.700 | 0.477 | 0.608 | 0.585 | 0.689 | 0.386 | 0.505 | 0.473 | 0.578 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192-0fee581a_20211205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_20211205.log.json) |
+| [S-ViPNAS-Res50](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.643 | 0.726 | 0.553 | 0.694 | 0.587 | 0.698 | 0.410 | 0.529 | 0.495 | 0.607 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192-49e1c3a4_20211112.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_20211112.log.json) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..28148364075f0ea14c844965a82db732620fd3f2
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_coco-wholebody.yml
@@ -0,0 +1,44 @@
+Models:
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: &id001
+ - ViPNAS
+ Training Data: COCO-WholeBody
+ Name: td-hm_vipnas-mbv3_8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.619
+ Body AR: 0.7
+ Face AP: 0.585
+ Face AR: 0.689
+ Foot AP: 0.477
+ Foot AR: 0.608
+ Hand AP: 0.386
+ Hand AR: 0.505
+ Whole AP: 0.473
+ Whole AR: 0.578
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192-0fee581a_20211205.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_vipnas-res50_8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.643
+ Body AR: 0.726
+ Face AP: 0.587
+ Face AR: 0.698
+ Foot AP: 0.553
+ Foot AR: 0.694
+ Hand AP: 0.41
+ Hand AR: 0.529
+ Whole AP: 0.495
+ Whole AR: 0.607
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192-49e1c3a4_20211112.pth
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md
new file mode 100644
index 0000000000000000000000000000000000000000..e39c66e913ab44ae58e511e987397ebae5e30fe1
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.md
@@ -0,0 +1,55 @@
+
+
+
+ViPNAS (CVPR'2021)
+
+```bibtex
+@article{xu2021vipnas,
+ title={ViPNAS: Efficient Video Pose Estimation via Neural Architecture Search},
+ author={Xu, Lumin and Guan, Yingda and Jin, Sheng and Liu, Wentao and Qian, Chen and Luo, Ping and Ouyang, Wanli and Wang, Xiaogang},
+ booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
+ year={2021}
+}
+```
+
+
+
+
+
+
+DarkPose (CVPR'2020)
+
+```bibtex
+@inproceedings{zhang2020distribution,
+ title={Distribution-aware coordinate representation for human pose estimation},
+ author={Zhang, Feng and Zhu, Xiatian and Dai, Hanbin and Ye, Mao and Zhu, Ce},
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+ pages={7093--7102},
+ year={2020}
+}
+```
+
+
+
+
+
+
+COCO-WholeBody (ECCV'2020)
+
+```bibtex
+@inproceedings{jin2020whole,
+ title={Whole-Body Human Pose Estimation in the Wild},
+ author={Jin, Sheng and Xu, Lumin and Xu, Jin and Wang, Can and Liu, Wentao and Qian, Chen and Ouyang, Wanli and Luo, Ping},
+ booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
+ year={2020}
+}
+```
+
+
+
+Results on COCO-WholeBody v1.0 val with detector having human AP of 56.4 on COCO val2017 dataset
+
+| Arch | Input Size | Body AP | Body AR | Foot AP | Foot AR | Face AP | Face AR | Hand AP | Hand AR | Whole AP | Whole AR | ckpt | log |
+| :-------------------------------------- | :--------: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :------: | :------: | :--------------------------------------: | :-------------------------------------: |
+| [S-ViPNAS-MobileNetV3_dark](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.632 | 0.710 | 0.530 | 0.660 | 0.672 | 0.771 | 0.404 | 0.519 | 0.508 | 0.607 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark_20211205.log.json) |
+| [S-ViPNAS-Res50_dark](/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py) | 256x192 | 0.650 | 0.732 | 0.550 | 0.686 | 0.684 | 0.783 | 0.437 | 0.554 | 0.528 | 0.632 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark_20211112.log.json) |
diff --git a/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5449af0ccd67ea433218a166b421888df3698ef8
--- /dev/null
+++ b/mmpose/configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/vipnas_dark_coco-wholebody.yml
@@ -0,0 +1,45 @@
+Models:
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: &id001
+ - ViPNAS
+ - DarkPose
+ Training Data: COCO-WholeBody
+ Name: td-hm_vipnas-mbv3_dark-8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.632
+ Body AR: 0.71
+ Face AP: 0.672
+ Face AR: 0.771
+ Foot AP: 0.53
+ Foot AR: 0.66
+ Hand AP: 0.404
+ Hand AR: 0.519
+ Whole AP: 0.508
+ Whole AR: 0.607
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_mbv3_coco_wholebody_256x192_dark-e2158108_20211205.pth
+- Config: configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192.py
+ In Collection: ViPNAS
+ Metadata:
+ Architecture: *id001
+ Training Data: COCO-WholeBody
+ Name: td-hm_vipnas-res50_dark-8xb64-210e_coco-wholebody-256x192
+ Results:
+ - Dataset: COCO-WholeBody
+ Metrics:
+ Body AP: 0.65
+ Body AR: 0.732
+ Face AP: 0.684
+ Face AR: 0.783
+ Foot AP: 0.55
+ Foot AR: 0.686
+ Hand AP: 0.437
+ Hand AR: 0.554
+ Whole AP: 0.528
+ Whole AR: 0.632
+ Task: Wholebody 2D Keypoint
+ Weights: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_wholebody_256x192_dark-67c0ce35_20211112.pth
diff --git a/pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth b/pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5a2783169751829b0fad53d9a6402bf9d2762839
--- /dev/null
+++ b/pretrain/td-hm_ViTPose-huge-simple_8xb64-210e_coco-256x192-ffd48c05_20230314.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd48c057be7f21ff22f4a077c87cebcd397b757e94de5605f43928c05e96392
+size 2524543895