ndkhanh95 commited on
Commit
29d411b
·
verified ·
1 Parent(s): 17bce06

Upload 226 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ViTPose/ckpts/vitpose-s-coco_25.pth +3 -0
  2. ViTPose/easy_ViTPose/.dockerignore +2 -0
  3. ViTPose/easy_ViTPose/.gitignore +13 -0
  4. ViTPose/easy_ViTPose/.ipynb_checkpoints/README-checkpoint.md +275 -0
  5. ViTPose/easy_ViTPose/.ipynb_checkpoints/colab_demo-checkpoint.ipynb +0 -0
  6. ViTPose/easy_ViTPose/.ipynb_checkpoints/evaluation_on_coco-checkpoint.py +92 -0
  7. ViTPose/easy_ViTPose/.ipynb_checkpoints/inference-checkpoint.py +188 -0
  8. ViTPose/easy_ViTPose/.ipynb_checkpoints/requirements_gpu-checkpoint.txt +3 -0
  9. ViTPose/easy_ViTPose/Dockerfile +11 -0
  10. ViTPose/easy_ViTPose/LICENSE +201 -0
  11. ViTPose/easy_ViTPose/README.md +275 -0
  12. ViTPose/easy_ViTPose/colab_demo.ipynb +0 -0
  13. ViTPose/easy_ViTPose/easy_ViTPose.egg-info/PKG-INFO +7 -0
  14. ViTPose/easy_ViTPose/easy_ViTPose.egg-info/SOURCES.txt +56 -0
  15. ViTPose/easy_ViTPose/easy_ViTPose.egg-info/dependency_links.txt +1 -0
  16. ViTPose/easy_ViTPose/easy_ViTPose.egg-info/top_level.txt +1 -0
  17. ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/ViTPose_Inference-checkpoint.ipynb +0 -0
  18. ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/__init__-checkpoint.py +5 -0
  19. ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/config-checkpoint.yaml +15 -0
  20. ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/inference-checkpoint.py +337 -0
  21. ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/testVITPOSE-checkpoint.jpg +0 -0
  22. ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/train-checkpoint.py +174 -0
  23. ViTPose/easy_ViTPose/easy_ViTPose/ViTPose_Inference.ipynb +0 -0
  24. ViTPose/easy_ViTPose/easy_ViTPose/__init__.py +5 -0
  25. ViTPose/easy_ViTPose/easy_ViTPose/config.yaml +15 -0
  26. ViTPose/easy_ViTPose/easy_ViTPose/configs/.ipynb_checkpoints/ViTPose_common-checkpoint.py +195 -0
  27. ViTPose/easy_ViTPose/easy_ViTPose/configs/.ipynb_checkpoints/ViTPose_small_coco_256x192-checkpoint.py +173 -0
  28. ViTPose/easy_ViTPose/easy_ViTPose/configs/.ipynb_checkpoints/ViTPose_wholebody-checkpoint.py +20 -0
  29. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_aic.py +20 -0
  30. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_ap10k.py +22 -0
  31. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_apt36k.py +22 -0
  32. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_coco.py +18 -0
  33. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_coco_25.py +20 -0
  34. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_common.py +195 -0
  35. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_mpii.py +18 -0
  36. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_small_coco_256x192.py +173 -0
  37. ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_wholebody.py +20 -0
  38. ViTPose/easy_ViTPose/easy_ViTPose/configs/__init__.py +0 -0
  39. ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/ViTPose_coco_25.cpython-39.pyc +0 -0
  40. ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/ViTPose_common.cpython-39.pyc +0 -0
  41. ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/ViTPose_small_coco_256x192.cpython-39.pyc +0 -0
  42. ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/__init__.cpython-39.pyc +0 -0
  43. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/300w.py +384 -0
  44. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/aflw.py +83 -0
  45. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/aic.py +140 -0
  46. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/aic_info.py +140 -0
  47. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/animalpose.py +166 -0
  48. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/ap10k.py +142 -0
  49. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/ap10k_info.py +142 -0
  50. ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/atrw.py +144 -0
ViTPose/ckpts/vitpose-s-coco_25.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af7cbeb123e2a60bf25d981d4b89dab281f3fca18b7956b49a7a685b6311bfe
3
+ size 97235808
ViTPose/easy_ViTPose/.dockerignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Dockerfile
2
+ models
ViTPose/easy_ViTPose/.gitignore ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ **/*.pt
2
+ **/*.pth
3
+ **/*.onnx
4
+ **/__pycache__
5
+ **/coco/
6
+ .DS_Store
7
+ runs
8
+ ckpts
9
+ annotations
10
+ examples
11
+ outputs
12
+ .ipynb_checkpoints
13
+ easy_ViTPose.egg-info
ViTPose/easy_ViTPose/.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # easy_ViTPose
2
+ <p align="center">
3
+ <img src="https://user-images.githubusercontent.com/24314647/236082274-b25a70c8-9267-4375-97b0-eddf60a7dfc6.png" width=375> easy_ViTPose
4
+ </p>
5
+
6
+ ## Accurate 2d human and animal pose estimation
7
+
8
+ <a target="_blank" href="https://colab.research.google.com/github/JunkyByte/easy_ViTPose/blob/main/colab_demo.ipynb">
9
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
10
+ </a>
11
+
12
+ ### Easy to use SOTA `ViTPose` [Y. Xu et al., 2022] models for fast inference.
13
+ We provide all the VitPose original models, converted for inference, with single dataset format output.
14
+
15
+ In addition to that we also provide a Coco-25 model, trained on the original coco dataset + feet https://cmu-perceptual-computing-lab.github.io/foot_keypoint_dataset/
16
+ Finetuning is not currently supported, you can check de43d54cad87404cf0ad4a7b5da6bacf4240248b and previous commits for a working state of `train.py`
17
+
18
+ > [!WARNING]
19
+ > Ultralytics `yolov8` has issue with wrong bounding boxes when using `mps`, upgrade to latest version! (Works correctly on 8.2.48)
20
+
21
+ ## Results
22
+ ![resimg](https://github.com/JunkyByte/easy_ViTPose/assets/24314647/51c0777f-b268-448a-af02-9a3537f288d8)
23
+
24
+ https://github.com/JunkyByte/easy_ViTPose/assets/24314647/e9a82c17-6e99-4111-8cc8-5257910cb87e
25
+
26
+ https://github.com/JunkyByte/easy_ViTPose/assets/24314647/63af44b1-7245-4703-8906-3f034a43f9e3
27
+
28
+ (Credits dance: https://www.youtube.com/watch?v=p-rSdt0aFuw )
29
+ (Credits zebras: https://www.youtube.com/watch?v=y-vELRYS8Yk )
30
+
31
+ ## Features
32
+ - Image / Video / Webcam support
33
+ - Video support using SORT algorithm to track bboxes between frames
34
+ - Torch / ONNX / Tensorrt inference
35
+ - Runs the original VitPose checkpoints from [ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose)
36
+ - 4 ViTPose architectures with different sizes and performances (s: small, b: base, l: large, h: huge)
37
+ - Multi skeleton and dataset: (AIC / MPII / COCO / COCO + FEET / COCO WHOLEBODY / APT36k / AP10k)
38
+ - Human / Animal pose estimation
39
+ - cpu / gpu / metal support
40
+ - show and save images / videos and output to json
41
+
42
+ We run YOLOv8 for detection, it does not provide complete animal detection. You can finetune a custom yolo model to detect the animal you are interested in,
43
+ if you do please open an issue, we might want to integrate other models for detection.
44
+
45
+ ### Benchmark:
46
+ You can expect realtime >30 fps with modern nvidia gpus and apple silicon (using metal!).
47
+
48
+ ### Skeleton reference
49
+ There are multiple skeletons for different dataset. Check the definition here [visualization.py](https://github.com/JunkyByte/easy_ViTPose/blob/main/easy_ViTPose/vit_utils/visualization.py).
50
+
51
+ ## Installation and Usage
52
+ > [!IMPORTANT]
53
+ > Install `torch>2.0 with cuda / mps support` by yourself.
54
+ > also check `requirements_gpu.txt`.
55
+
56
+ ```bash
57
+ git clone [email protected]:JunkyByte/easy_ViTPose.git
58
+ cd easy_ViTPose/
59
+ pip install -e .
60
+ pip install -r requirements.txt
61
+ ```
62
+
63
+ ### Download models
64
+ - Download the models from [Huggingface](https://huggingface.co/JunkyByte/easy_ViTPose)
65
+ We provide torch models for every dataset and architecture.
66
+ If you want to run onnx / tensorrt inference download the appropriate torch ckpt and use `export.py` to convert it.
67
+ You can use `ultralytics` `yolo export` command to export yolo to onnx and tensorrt as well.
68
+
69
+ #### Export to onnx and tensorrt
70
+ ```bash
71
+ $ python export.py --help
72
+ usage: export.py [-h] --model-ckpt MODEL_CKPT --model-name {s,b,l,h} [--output OUTPUT] [--dataset DATASET]
73
+
74
+ optional arguments:
75
+ -h, --help show this help message and exit
76
+ --model-ckpt MODEL_CKPT
77
+ The torch model that shall be used for conversion
78
+ --model-name {s,b,l,h}
79
+ [s: ViT-S, b: ViT-B, l: ViT-L, h: ViT-H]
80
+ --output OUTPUT File (without extension) or dir path for checkpoint output
81
+ --dataset DATASET Name of the dataset. If None it"s extracted from the file name. ["coco", "coco_25",
82
+ "wholebody", "mpii", "ap10k", "apt36k", "aic"]
83
+ ```
84
+
85
+ ### Run inference
86
+ To run inference from command line you can use the `inference.py` script as follows:
87
+ ```bash
88
+ $ python inference.py --help
89
+ usage: inference.py [-h] [--input INPUT] [--output-path OUTPUT_PATH] --model MODEL [--yolo YOLO] [--dataset DATASET]
90
+ [--det-class DET_CLASS] [--model-name {s,b,l,h}] [--yolo-size YOLO_SIZE]
91
+ [--conf-threshold CONF_THRESHOLD] [--rotate {0,90,180,270}] [--yolo-step YOLO_STEP]
92
+ [--single-pose] [--show] [--show-yolo] [--show-raw-yolo] [--save-img] [--save-json]
93
+
94
+ optional arguments:
95
+ -h, --help show this help message and exit
96
+ --input INPUT path to image / video or webcam ID (=cv2)
97
+ --output-path OUTPUT_PATH
98
+ output path, if the path provided is a directory output files are "input_name
99
+ +_result{extension}".
100
+ --model MODEL checkpoint path of the model
101
+ --yolo YOLO checkpoint path of the yolo model
102
+ --dataset DATASET Name of the dataset. If None it"s extracted from the file name. ["coco", "coco_25",
103
+ "wholebody", "mpii", "ap10k", "apt36k", "aic"]
104
+ --det-class DET_CLASS
105
+ ["human", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
106
+ "animals"]
107
+ --model-name {s,b,l,h}
108
+ [s: ViT-S, b: ViT-B, l: ViT-L, h: ViT-H]
109
+ --yolo-size YOLO_SIZE
110
+ YOLOv8 image size during inference
111
+ --conf-threshold CONF_THRESHOLD
112
+ Minimum confidence for keypoints to be drawn. [0, 1] range
113
+ --rotate {0,90,180,270}
114
+ Rotate the image of [90, 180, 270] degress counterclockwise
115
+ --yolo-step YOLO_STEP
116
+ The tracker can be used to predict the bboxes instead of yolo for performance, this flag
117
+ specifies how often yolo is applied (e.g. 1 applies yolo every frame). This does not have any
118
+ effect when is_video is False
119
+ --single-pose Do not use SORT tracker because single pose is expected in the video
120
+ --show preview result during inference
121
+ --show-yolo draw yolo results
122
+ --show-raw-yolo draw yolo result before that SORT is applied for tracking (only valid during video inference)
123
+ --save-img save image results
124
+ --save-json save json results
125
+ ```
126
+
127
+ You can run inference from code as follows:
128
+ ```python
129
+ import cv2
130
+ from easy_ViTPose import VitInference
131
+
132
+ # Image to run inference RGB format
133
+ img = cv2.imread('./examples/img1.jpg')
134
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
135
+
136
+ # set is_video=True to enable tracking in video inference
137
+ # be sure to use VitInference.reset() function to reset the tracker after each video
138
+ # There are a few flags that allows to customize VitInference, be sure to check the class definition
139
+ model_path = './ckpts/vitpose-s-coco_25.pth'
140
+ yolo_path = './yolov8s.pth'
141
+
142
+ # If you want to use MPS (on new macbooks) use the torch checkpoints for both ViTPose and Yolo
143
+ # If device is None will try to use cuda -> mps -> cpu (otherwise specify 'cpu', 'mps' or 'cuda')
144
+ # dataset and det_class parameters can be inferred from the ckpt name, but you can specify them.
145
+ model = VitInference(model_path, yolo_path, model_name='s', yolo_size=320, is_video=False, device=None)
146
+
147
+ # Infer keypoints, output is a dict where keys are person ids and values are keypoints (np.ndarray (25, 3): (y, x, score))
148
+ # If is_video=True the IDs will be consistent among the ordered video frames.
149
+ keypoints = model.inference(img)
150
+
151
+ # call model.reset() after each video
152
+
153
+ img = model.draw(show_yolo=True) # Returns RGB image with drawings
154
+ cv2.imshow('image', cv2.cvtColor(img, cv2.COLOR_RGB2BGR)); cv2.waitKey(0)
155
+ ```
156
+ > [!NOTE]
157
+ > If the input file is a video [SORT](https://github.com/abewley/sort) is used to track people IDs and output consistent identifications.
158
+
159
+ ### OUTPUT json format
160
+ The output format of the json files:
161
+
162
+ ```
163
+ {
164
+ "keypoints":
165
+ [ # The list of frames, len(json['keypoints']) == len(video)
166
+ { # For each frame a dict
167
+ "0": [ # keys are id to track people and value the keypoints
168
+ [121.19, 458.15, 0.99], # Each keypoint is (y, x, score)
169
+ [110.02, 469.43, 0.98],
170
+ [110.86, 445.04, 0.99],
171
+ ],
172
+ "1": [
173
+ ...
174
+ ],
175
+ },
176
+ {
177
+ "0": [
178
+ [122.19, 458.15, 0.91],
179
+ [105.02, 469.43, 0.95],
180
+ [122.86, 445.04, 0.99],
181
+ ],
182
+ "1": [
183
+ ...
184
+ ]
185
+ }
186
+ ],
187
+ "skeleton":
188
+ { # Skeleton reference, key the idx, value the name
189
+ "0": "nose",
190
+ "1": "left_eye",
191
+ "2": "right_eye",
192
+ "3": "left_ear",
193
+ "4": "right_ear",
194
+ "5": "neck",
195
+ ...
196
+ }
197
+ }
198
+ ```
199
+
200
+ ## Finetuning
201
+ Finetuning is possible but not officially supported right now. If you would like to finetune and need help open an issue.
202
+ You can check `train.py`, `datasets/COCO.py` and `config.yaml` for details.
203
+
204
+ ---
205
+
206
+ ## Evaluation on COCO dataset
207
+ 1. Download COCO dataset images and labels
208
+ - 2017 Val images [5K/1GB]: http://images.cocodataset.org/zips/val2017.zip <br>
209
+ The extracted directory looks like this:
210
+ ```
211
+ val2017/
212
+ ├── 000000000139.jpg
213
+ ├── 000000000285.jpg
214
+ ├── 000000000632.jpg
215
+ └── ...
216
+ ```
217
+ - 2017 Train/Val annotations [241MB]: http://images.cocodataset.org/annotations/annotations_trainval2017.zip <br>
218
+ The extracted directory looks like this:
219
+ ```
220
+ annotations/
221
+ ├── person_keypoints_val2017.json
222
+ ├── person_keypoints_train2017.json
223
+ └── ...
224
+ ```
225
+
226
+ 2. Run the following command:
227
+
228
+ ```bash
229
+
230
+ $ python evaluation_on_coco.py
231
+
232
+ Command line arguments:
233
+ --model_path: Path to the pretrained ViT Pose model
234
+
235
+ --yolo_path: Path to the YOLOv8 model
236
+
237
+ --img_folder_path: Path to the directory containing COCO val images (/val2017 extracted in step 1).
238
+
239
+ --annFile: Path to json file for COCO keypoints for val set (annotations/person_keypoints_val2017.json extracted in step 1)
240
+ ```
241
+
242
+ ---
243
+
244
+
245
+ ## Docker
246
+ The system may be built in a container using Docker. This is intended to demonstrate container-wise inference, adapt it to your own needs by changing models and skeletons:
247
+
248
+ `docker build . -t easy_vitpose`
249
+
250
+ The image is based on NVIDIA's PyTorch image, which is 20GB large.
251
+ If you have a compatible GPU set up with [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html),
252
+ ViTPose will run with hardware acceleration.
253
+
254
+ To test an example, create a folder called `cats` with a picture of a cat as `image.jpg`.
255
+ Run `./models/download.sh` to fetch the large yolov8 and ap10k ViTPose models. Then run inference using the following command (replace with the correct `cats` and `models` paths):
256
+
257
+ `docker run --gpus all --rm -it --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -v ./models:/models -v ~/cats:/cats easy_vitpose python inference.py --det-class cat --input /cats/image.jpg --output-path /cats --save-img --model /models/vitpose-l-ap10k.onnx --yolo /models/yolov8l.pt`
258
+
259
+ The result image may be viewed in your `cats` folder.
260
+
261
+ ## TODO:
262
+ - refactor finetuning (currently not available)
263
+ - benchmark and check bottlenecks of inference pipeline
264
+ - parallel batched inference
265
+ - other minor fixes
266
+ - yolo version for animal pose, check https://github.com/JunkyByte/easy_ViTPose/pull/18
267
+ - solve cuda exceptions on script exit when using tensorrt (no idea how)
268
+ - add infos about inferred informations during inference, better output of inference status (device etc)
269
+ - check if is possible to make colab work without runtime restart
270
+
271
+ Feel free to open issues, pull requests and contribute on these TODOs.
272
+
273
+ ## Reference
274
+ Thanks to the VitPose authors and their official implementation [ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose).
275
+ The SORT code is taken from [abewley/sort](https://github.com/abewley/sort)
ViTPose/easy_ViTPose/.ipynb_checkpoints/colab_demo-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
ViTPose/easy_ViTPose/.ipynb_checkpoints/evaluation_on_coco-checkpoint.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reference: https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
2
+
3
+ import cv2
4
+ from easy_ViTPose.inference import VitInference
5
+ from pathlib import Path
6
+ import os
7
+ from tqdm.auto import tqdm
8
+
9
+ from pycocotools.coco import COCO
10
+ from pycocotools.cocoeval import COCOeval
11
+ from statistics import mean
12
+ import json
13
+ import argparse
14
+
15
+ def parse_arguments():
16
+
17
+ parser = argparse.ArgumentParser(description='Argument Parser for infer')
18
+ parser.add_argument('--model_path', type=str,
19
+ help='Path to the ViT Pose model')
20
+ parser.add_argument('--model-name', type=str, choices=['s', 'b', 'l', 'h'],
21
+ help='[s: ViT-S, b: ViT-B, l: ViT-L, h: ViT-H]')
22
+ parser.add_argument('--yolo_path', type=str,
23
+ help='Path to the YOLOv8 model')
24
+ parser.add_argument('--img_folder_path', type=str,
25
+ help='Path to the folder containing images')
26
+ parser.add_argument('--annFile', type=str,
27
+ help='Path to the COCO annotations file')
28
+ return parser.parse_args()
29
+
30
+
31
+ def evaluation_on_coco(model_path, model_name, yolo_path, img_folder_path, annFile):
32
+ # get image IDs of images in val set
33
+ # Opening JSON file
34
+ f = open(annFile)
35
+ gt_annotations = json.load(f)
36
+ f.close()
37
+
38
+ image_ids = set()
39
+ for ann in gt_annotations['images']:
40
+ image_ids.add(ann['id'])
41
+
42
+
43
+ model = VitInference(model_path, yolo_path, model_name = model_name, yolo_size=640, is_video=False, device=None)
44
+ results_list = []
45
+
46
+ for image_id in tqdm(image_ids):
47
+ # run inference here
48
+ img_path = os.path.join(img_folder_path, str(image_id).zfill(12) + '.jpg')
49
+ img = cv2.imread(img_path)
50
+
51
+
52
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
53
+ frame_keypoints = model.inference(img)
54
+ for key in frame_keypoints:
55
+ results_element = {}
56
+ results_element['image_id'] = image_id
57
+ results_element['category_id'] = 1
58
+ results_element['score'] = model._scores_bbox[key]
59
+ results_element['bbox'] = []
60
+ keypoints = []
61
+ for k in frame_keypoints[key]:
62
+ keypoints.append(float(round(k[1], 0)))
63
+ keypoints.append(float(round(k[0], 0)))
64
+ keypoints.append(0)
65
+ results_element['keypoints'] = keypoints
66
+ results_list.append(results_element)
67
+
68
+
69
+ # Define the file path where you want to save the JSON file
70
+ file_path = "results.json"
71
+ # Save the list of dictionaries to a JSON file
72
+ with open(file_path, "w") as json_file:
73
+ json.dump(results_list, json_file, indent=4)
74
+
75
+
76
+ #initialize COCO ground truth api
77
+ annType = 'keypoints'
78
+ cocoGt=COCO(annFile)
79
+ #initialize COCO detections api
80
+ resFile="results.json"
81
+ cocoDt=cocoGt.loadRes(resFile)
82
+ # running evaluation
83
+ cocoEval = COCOeval(cocoGt,cocoDt,annType)
84
+ cocoEval.params.imgIds = [int(i) for i in image_ids]
85
+ cocoEval.evaluate()
86
+ cocoEval.accumulate()
87
+ cocoEval.summarize()
88
+
89
+
90
+ if __name__ == '__main__':
91
+ args = parse_arguments()
92
+ evaluation_on_coco(args.model_path, args.model_name, args.yolo_path, args.img_folder_path, args.annFile)
ViTPose/easy_ViTPose/.ipynb_checkpoints/inference-checkpoint.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import os
4
+ import time
5
+
6
+ from PIL import Image
7
+ import cv2
8
+ import numpy as np
9
+ import torch
10
+ import tqdm
11
+
12
+ from easy_ViTPose.vit_utils.inference import NumpyEncoder, VideoReader
13
+ from easy_ViTPose.inference import VitInference
14
+ from easy_ViTPose.vit_utils.visualization import joints_dict
15
+
16
+ try:
17
+ import onnxruntime # noqa: F401
18
+ has_onnx = True
19
+ except ModuleNotFoundError:
20
+ has_onnx = False
21
+
22
+
23
+ if __name__ == "__main__":
24
+ parser = argparse.ArgumentParser()
25
+ parser.add_argument('--input', type=str, required=True,
26
+ help='path to image / video or webcam ID (=cv2)')
27
+ parser.add_argument('--output-path', type=str, default='',
28
+ help='output path, if the path provided is a directory '
29
+ 'output files are "input_name +_result{extension}".')
30
+ parser.add_argument('--model', type=str, required=True,
31
+ help='checkpoint path of the model')
32
+ parser.add_argument('--yolo', type=str, required=False, default=None,
33
+ help='checkpoint path of the yolo model')
34
+ parser.add_argument('--dataset', type=str, required=False, default=None,
35
+ help='Name of the dataset. If None it"s extracted from the file name. \
36
+ ["coco", "coco_25", "wholebody", "mpii", "ap10k", "apt36k", "aic"]')
37
+ parser.add_argument('--det-class', type=str, required=False, default=None,
38
+ help='["human", "cat", "dog", "horse", "sheep", \
39
+ "cow", "elephant", "bear", "zebra", "giraffe", "animals"]')
40
+ parser.add_argument('--model-name', type=str, required=False, choices=['s', 'b', 'l', 'h'],
41
+ help='[s: ViT-S, b: ViT-B, l: ViT-L, h: ViT-H]')
42
+ parser.add_argument('--yolo-size', type=int, required=False, default=320,
43
+ help='YOLOv8 image size during inference')
44
+ parser.add_argument('--conf-threshold', type=float, required=False, default=0.5,
45
+ help='Minimum confidence for keypoints to be drawn. [0, 1] range')
46
+ parser.add_argument('--rotate', type=int, choices=[0, 90, 180, 270],
47
+ required=False, default=0,
48
+ help='Rotate the image of [90, 180, 270] degress counterclockwise')
49
+ parser.add_argument('--yolo-step', type=int,
50
+ required=False, default=1,
51
+ help='The tracker can be used to predict the bboxes instead of yolo for performance, '
52
+ 'this flag specifies how often yolo is applied (e.g. 1 applies yolo every frame). '
53
+ 'This does not have any effect when is_video is False')
54
+ parser.add_argument('--single-pose', default=False, action='store_true',
55
+ help='Do not use SORT tracker because single pose is expected in the video')
56
+ parser.add_argument('--show', default=False, action='store_true',
57
+ help='preview result during inference')
58
+ parser.add_argument('--show-yolo', default=False, action='store_true',
59
+ help='draw yolo results')
60
+ parser.add_argument('--show-raw-yolo', default=False, action='store_true',
61
+ help='draw yolo result before that SORT is applied for tracking'
62
+ ' (only valid during video inference)')
63
+ parser.add_argument('--save-img', default=False, action='store_true',
64
+ help='save image results')
65
+ parser.add_argument('--save-json', default=False, action='store_true',
66
+ help='save json results')
67
+ args = parser.parse_args()
68
+
69
+ use_mps = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()
70
+ use_cuda = torch.cuda.is_available()
71
+
72
+ # Load Yolo
73
+ yolo = args.yolo
74
+ if yolo is None:
75
+ yolo = 'easy_ViTPose/' + ('yolov8s' + ('.onnx' if has_onnx and not (use_mps or use_cuda) else '.pt'))
76
+ input_path = args.input
77
+
78
+ # Load the image / video reader
79
+ try: # Check if is webcam
80
+ int(input_path)
81
+ is_video = True
82
+ except ValueError:
83
+ assert os.path.isfile(input_path), 'The input file does not exist'
84
+ is_video = input_path[input_path.rfind('.') + 1:].lower() in ['mp4', 'mov']
85
+
86
+ ext = '.mp4' if is_video else '.png'
87
+ assert not (args.save_img or args.save_json) or args.output_path, \
88
+ 'Specify an output path if using save-img or save-json flags'
89
+ output_path = args.output_path
90
+ if output_path:
91
+ if os.path.isdir(output_path):
92
+ og_ext = input_path[input_path.rfind('.'):]
93
+ save_name_img = os.path.basename(input_path).replace(og_ext, f"_result{ext}")
94
+ save_name_json = os.path.basename(input_path).replace(og_ext, "_result.json")
95
+ output_path_img = os.path.join(output_path, save_name_img)
96
+ output_path_json = os.path.join(output_path, save_name_json)
97
+ else:
98
+ output_path_img = output_path + f'{ext}'
99
+ output_path_json = output_path + '.json'
100
+
101
+ wait = 0
102
+ total_frames = 1
103
+ if is_video:
104
+ reader = VideoReader(input_path, args.rotate)
105
+ cap = cv2.VideoCapture(input_path) # type: ignore
106
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
107
+ cap.release()
108
+ wait = 15
109
+ if args.save_img:
110
+ cap = cv2.VideoCapture(input_path) # type: ignore
111
+ fps = cap.get(cv2.CAP_PROP_FPS)
112
+ ret, frame = cap.read()
113
+ cap.release()
114
+ assert ret
115
+ assert fps > 0
116
+ output_size = frame.shape[:2][::-1]
117
+
118
+ # Check if we have X264 otherwise use default MJPG
119
+ try:
120
+ temp_video = cv2.VideoWriter('/tmp/checkcodec.mp4',
121
+ cv2.VideoWriter_fourcc(*'h264'), 30, (32, 32))
122
+ opened = temp_video.isOpened()
123
+ except Exception:
124
+ opened = False
125
+ codec = 'h264' if opened else 'MJPG'
126
+ out_writer = cv2.VideoWriter(output_path_img,
127
+ cv2.VideoWriter_fourcc(*codec), # More efficient codec
128
+ fps, output_size) # type: ignore
129
+ else:
130
+ reader = [np.array(Image.open(input_path).rotate(args.rotate))] # type: ignore
131
+
132
+ # Initialize model
133
+ model = VitInference(args.model, yolo, args.model_name,
134
+ args.det_class, args.dataset,
135
+ args.yolo_size, is_video=is_video,
136
+ single_pose=args.single_pose,
137
+ yolo_step=args.yolo_step) # type: ignore
138
+ print(f">>> Model loaded: {args.model}")
139
+
140
+ print(f'>>> Running inference on {input_path}')
141
+ keypoints = []
142
+ fps = []
143
+ tot_time = 0.
144
+ for (ith, img) in tqdm.tqdm(enumerate(reader), total=total_frames):
145
+ t0 = time.time()
146
+
147
+ # Run inference
148
+ frame_keypoints = model.inference(img)
149
+ keypoints.append(frame_keypoints)
150
+
151
+ delta = time.time() - t0
152
+ tot_time += delta
153
+ fps.append(delta)
154
+
155
+ # Draw the poses and save the output img
156
+ if args.show or args.save_img:
157
+ # Draw result and transform to BGR
158
+ img = model.draw(args.show_yolo, args.show_raw_yolo, args.conf_threshold)[..., ::-1]
159
+
160
+ if args.save_img:
161
+ # TODO: If exists add (1), (2), ...
162
+ if is_video:
163
+ out_writer.write(img)
164
+ else:
165
+ print('>>> Saving output image')
166
+ cv2.imwrite(output_path_img, img)
167
+
168
+ if args.show:
169
+ cv2.imshow('preview', img)
170
+ cv2.waitKey(wait)
171
+
172
+ if is_video:
173
+ tot_poses = sum(len(k) for k in keypoints)
174
+ print(f'>>> Mean inference FPS: {1 / np.mean(fps):.2f}')
175
+ print(f'>>> Total poses predicted: {tot_poses} mean per frame: '
176
+ f'{(tot_poses / (ith + 1)):.2f}')
177
+ print(f'>>> Mean FPS per pose: {(tot_poses / tot_time):.2f}')
178
+
179
+ if args.save_json:
180
+ print('>>> Saving output json')
181
+ with open(output_path_json, 'w') as f:
182
+ out = {'keypoints': keypoints,
183
+ 'skeleton': joints_dict()[model.dataset]['keypoints']}
184
+ json.dump(out, f, cls=NumpyEncoder)
185
+
186
+ if is_video and args.save_img:
187
+ out_writer.release()
188
+ cv2.destroyAllWindows()
ViTPose/easy_ViTPose/.ipynb_checkpoints/requirements_gpu-checkpoint.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ onnxruntime-gpu>=1.13.0
2
+ tensorrt>=8.5.1.7
3
+ torch-tensorrt>=1.4.0
ViTPose/easy_ViTPose/Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvcr.io/nvidia/pytorch:24.07-py3
2
+ COPY . /easy_ViTPose
3
+ WORKDIR /easy_ViTPose/
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+
6
+ RUN pip uninstall -y $(pip list --format=freeze | grep opencv) && \
7
+ rm -rf /usr/local/lib/python3.10/dist-packages/cv2/
8
+ RUN pip install -e . && pip install -r requirements.txt && pip install -r requirements_gpu.txt
9
+
10
+ # OpenCV dependency
11
+ RUN apt-get update && apt-get install -y libgl1
ViTPose/easy_ViTPose/LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
ViTPose/easy_ViTPose/README.md ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # easy_ViTPose
2
+ <p align="center">
3
+ <img src="https://user-images.githubusercontent.com/24314647/236082274-b25a70c8-9267-4375-97b0-eddf60a7dfc6.png" width=375> easy_ViTPose
4
+ </p>
5
+
6
+ ## Accurate 2d human and animal pose estimation
7
+
8
+ <a target="_blank" href="https://colab.research.google.com/github/JunkyByte/easy_ViTPose/blob/main/colab_demo.ipynb">
9
+ <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
10
+ </a>
11
+
12
+ ### Easy to use SOTA `ViTPose` [Y. Xu et al., 2022] models for fast inference.
13
+ We provide all the VitPose original models, converted for inference, with single dataset format output.
14
+
15
+ In addition to that we also provide a Coco-25 model, trained on the original coco dataset + feet https://cmu-perceptual-computing-lab.github.io/foot_keypoint_dataset/
16
+ Finetuning is not currently supported, you can check de43d54cad87404cf0ad4a7b5da6bacf4240248b and previous commits for a working state of `train.py`
17
+
18
+ > [!WARNING]
19
+ > Ultralytics `yolov8` has issue with wrong bounding boxes when using `mps`, upgrade to latest version! (Works correctly on 8.2.48)
20
+
21
+ ## Results
22
+ ![resimg](https://github.com/JunkyByte/easy_ViTPose/assets/24314647/51c0777f-b268-448a-af02-9a3537f288d8)
23
+
24
+ https://github.com/JunkyByte/easy_ViTPose/assets/24314647/e9a82c17-6e99-4111-8cc8-5257910cb87e
25
+
26
+ https://github.com/JunkyByte/easy_ViTPose/assets/24314647/63af44b1-7245-4703-8906-3f034a43f9e3
27
+
28
+ (Credits dance: https://www.youtube.com/watch?v=p-rSdt0aFuw )
29
+ (Credits zebras: https://www.youtube.com/watch?v=y-vELRYS8Yk )
30
+
31
+ ## Features
32
+ - Image / Video / Webcam support
33
+ - Video support using SORT algorithm to track bboxes between frames
34
+ - Torch / ONNX / Tensorrt inference
35
+ - Runs the original VitPose checkpoints from [ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose)
36
+ - 4 ViTPose architectures with different sizes and performances (s: small, b: base, l: large, h: huge)
37
+ - Multi skeleton and dataset: (AIC / MPII / COCO / COCO + FEET / COCO WHOLEBODY / APT36k / AP10k)
38
+ - Human / Animal pose estimation
39
+ - cpu / gpu / metal support
40
+ - show and save images / videos and output to json
41
+
42
+ We run YOLOv8 for detection, it does not provide complete animal detection. You can finetune a custom yolo model to detect the animal you are interested in,
43
+ if you do please open an issue, we might want to integrate other models for detection.
44
+
45
+ ### Benchmark:
46
+ You can expect realtime >30 fps with modern nvidia gpus and apple silicon (using metal!).
47
+
48
+ ### Skeleton reference
49
+ There are multiple skeletons for different dataset. Check the definition here [visualization.py](https://github.com/JunkyByte/easy_ViTPose/blob/main/easy_ViTPose/vit_utils/visualization.py).
50
+
51
+ ## Installation and Usage
52
+ > [!IMPORTANT]
53
+ > Install `torch>2.0 with cuda / mps support` by yourself.
54
+ > also check `requirements_gpu.txt`.
55
+
56
+ ```bash
57
+ git clone [email protected]:JunkyByte/easy_ViTPose.git
58
+ cd easy_ViTPose/
59
+ pip install -e .
60
+ pip install -r requirements.txt
61
+ ```
62
+
63
+ ### Download models
64
+ - Download the models from [Huggingface](https://huggingface.co/JunkyByte/easy_ViTPose)
65
+ We provide torch models for every dataset and architecture.
66
+ If you want to run onnx / tensorrt inference download the appropriate torch ckpt and use `export.py` to convert it.
67
+ You can use `ultralytics` `yolo export` command to export yolo to onnx and tensorrt as well.
68
+
69
+ #### Export to onnx and tensorrt
70
+ ```bash
71
+ $ python export.py --help
72
+ usage: export.py [-h] --model-ckpt MODEL_CKPT --model-name {s,b,l,h} [--output OUTPUT] [--dataset DATASET]
73
+
74
+ optional arguments:
75
+ -h, --help show this help message and exit
76
+ --model-ckpt MODEL_CKPT
77
+ The torch model that shall be used for conversion
78
+ --model-name {s,b,l,h}
79
+ [s: ViT-S, b: ViT-B, l: ViT-L, h: ViT-H]
80
+ --output OUTPUT File (without extension) or dir path for checkpoint output
81
+ --dataset DATASET Name of the dataset. If None it"s extracted from the file name. ["coco", "coco_25",
82
+ "wholebody", "mpii", "ap10k", "apt36k", "aic"]
83
+ ```
84
+
85
+ ### Run inference
86
+ To run inference from command line you can use the `inference.py` script as follows:
87
+ ```bash
88
+ $ python inference.py --help
89
+ usage: inference.py [-h] [--input INPUT] [--output-path OUTPUT_PATH] --model MODEL [--yolo YOLO] [--dataset DATASET]
90
+ [--det-class DET_CLASS] [--model-name {s,b,l,h}] [--yolo-size YOLO_SIZE]
91
+ [--conf-threshold CONF_THRESHOLD] [--rotate {0,90,180,270}] [--yolo-step YOLO_STEP]
92
+ [--single-pose] [--show] [--show-yolo] [--show-raw-yolo] [--save-img] [--save-json]
93
+
94
+ optional arguments:
95
+ -h, --help show this help message and exit
96
+ --input INPUT path to image / video or webcam ID (=cv2)
97
+ --output-path OUTPUT_PATH
98
+ output path, if the path provided is a directory output files are "input_name
99
+ +_result{extension}".
100
+ --model MODEL checkpoint path of the model
101
+ --yolo YOLO checkpoint path of the yolo model
102
+ --dataset DATASET Name of the dataset. If None it"s extracted from the file name. ["coco", "coco_25",
103
+ "wholebody", "mpii", "ap10k", "apt36k", "aic"]
104
+ --det-class DET_CLASS
105
+ ["human", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
106
+ "animals"]
107
+ --model-name {s,b,l,h}
108
+ [s: ViT-S, b: ViT-B, l: ViT-L, h: ViT-H]
109
+ --yolo-size YOLO_SIZE
110
+ YOLOv8 image size during inference
111
+ --conf-threshold CONF_THRESHOLD
112
+ Minimum confidence for keypoints to be drawn. [0, 1] range
113
+ --rotate {0,90,180,270}
114
+ Rotate the image of [90, 180, 270] degress counterclockwise
115
+ --yolo-step YOLO_STEP
116
+ The tracker can be used to predict the bboxes instead of yolo for performance, this flag
117
+ specifies how often yolo is applied (e.g. 1 applies yolo every frame). This does not have any
118
+ effect when is_video is False
119
+ --single-pose Do not use SORT tracker because single pose is expected in the video
120
+ --show preview result during inference
121
+ --show-yolo draw yolo results
122
+ --show-raw-yolo draw yolo result before that SORT is applied for tracking (only valid during video inference)
123
+ --save-img save image results
124
+ --save-json save json results
125
+ ```
126
+
127
+ You can run inference from code as follows:
128
+ ```python
129
+ import cv2
130
+ from easy_ViTPose import VitInference
131
+
132
+ # Image to run inference RGB format
133
+ img = cv2.imread('./examples/img1.jpg')
134
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
135
+
136
+ # set is_video=True to enable tracking in video inference
137
+ # be sure to use VitInference.reset() function to reset the tracker after each video
138
+ # There are a few flags that allows to customize VitInference, be sure to check the class definition
139
+ model_path = './ckpts/vitpose-s-coco_25.pth'
140
+ yolo_path = './yolov8s.pth'
141
+
142
+ # If you want to use MPS (on new macbooks) use the torch checkpoints for both ViTPose and Yolo
143
+ # If device is None will try to use cuda -> mps -> cpu (otherwise specify 'cpu', 'mps' or 'cuda')
144
+ # dataset and det_class parameters can be inferred from the ckpt name, but you can specify them.
145
+ model = VitInference(model_path, yolo_path, model_name='s', yolo_size=320, is_video=False, device=None)
146
+
147
+ # Infer keypoints, output is a dict where keys are person ids and values are keypoints (np.ndarray (25, 3): (y, x, score))
148
+ # If is_video=True the IDs will be consistent among the ordered video frames.
149
+ keypoints = model.inference(img)
150
+
151
+ # call model.reset() after each video
152
+
153
+ img = model.draw(show_yolo=True) # Returns RGB image with drawings
154
+ cv2.imshow('image', cv2.cvtColor(img, cv2.COLOR_RGB2BGR)); cv2.waitKey(0)
155
+ ```
156
+ > [!NOTE]
157
+ > If the input file is a video [SORT](https://github.com/abewley/sort) is used to track people IDs and output consistent identifications.
158
+
159
+ ### OUTPUT json format
160
+ The output format of the json files:
161
+
162
+ ```
163
+ {
164
+ "keypoints":
165
+ [ # The list of frames, len(json['keypoints']) == len(video)
166
+ { # For each frame a dict
167
+ "0": [ # keys are id to track people and value the keypoints
168
+ [121.19, 458.15, 0.99], # Each keypoint is (y, x, score)
169
+ [110.02, 469.43, 0.98],
170
+ [110.86, 445.04, 0.99],
171
+ ],
172
+ "1": [
173
+ ...
174
+ ],
175
+ },
176
+ {
177
+ "0": [
178
+ [122.19, 458.15, 0.91],
179
+ [105.02, 469.43, 0.95],
180
+ [122.86, 445.04, 0.99],
181
+ ],
182
+ "1": [
183
+ ...
184
+ ]
185
+ }
186
+ ],
187
+ "skeleton":
188
+ { # Skeleton reference, key the idx, value the name
189
+ "0": "nose",
190
+ "1": "left_eye",
191
+ "2": "right_eye",
192
+ "3": "left_ear",
193
+ "4": "right_ear",
194
+ "5": "neck",
195
+ ...
196
+ }
197
+ }
198
+ ```
199
+
200
+ ## Finetuning
201
+ Finetuning is possible but not officially supported right now. If you would like to finetune and need help open an issue.
202
+ You can check `train.py`, `datasets/COCO.py` and `config.yaml` for details.
203
+
204
+ ---
205
+
206
+ ## Evaluation on COCO dataset
207
+ 1. Download COCO dataset images and labels
208
+ - 2017 Val images [5K/1GB]: http://images.cocodataset.org/zips/val2017.zip <br>
209
+ The extracted directory looks like this:
210
+ ```
211
+ val2017/
212
+ ├── 000000000139.jpg
213
+ ├── 000000000285.jpg
214
+ ├── 000000000632.jpg
215
+ └── ...
216
+ ```
217
+ - 2017 Train/Val annotations [241MB]: http://images.cocodataset.org/annotations/annotations_trainval2017.zip <br>
218
+ The extracted directory looks like this:
219
+ ```
220
+ annotations/
221
+ ├── person_keypoints_val2017.json
222
+ ├── person_keypoints_train2017.json
223
+ └── ...
224
+ ```
225
+
226
+ 2. Run the following command:
227
+
228
+ ```bash
229
+
230
+ $ python evaluation_on_coco.py
231
+
232
+ Command line arguments:
233
+ --model_path: Path to the pretrained ViT Pose model
234
+
235
+ --yolo_path: Path to the YOLOv8 model
236
+
237
+ --img_folder_path: Path to the directory containing COCO val images (/val2017 extracted in step 1).
238
+
239
+ --annFile: Path to json file for COCO keypoints for val set (annotations/person_keypoints_val2017.json extracted in step 1)
240
+ ```
241
+
242
+ ---
243
+
244
+
245
+ ## Docker
246
+ The system may be built in a container using Docker. This is intended to demonstrate container-wise inference, adapt it to your own needs by changing models and skeletons:
247
+
248
+ `docker build . -t easy_vitpose`
249
+
250
+ The image is based on NVIDIA's PyTorch image, which is 20GB large.
251
+ If you have a compatible GPU set up with [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html),
252
+ ViTPose will run with hardware acceleration.
253
+
254
+ To test an example, create a folder called `cats` with a picture of a cat as `image.jpg`.
255
+ Run `./models/download.sh` to fetch the large yolov8 and ap10k ViTPose models. Then run inference using the following command (replace with the correct `cats` and `models` paths):
256
+
257
+ `docker run --gpus all --rm -it --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 -v ./models:/models -v ~/cats:/cats easy_vitpose python inference.py --det-class cat --input /cats/image.jpg --output-path /cats --save-img --model /models/vitpose-l-ap10k.onnx --yolo /models/yolov8l.pt`
258
+
259
+ The result image may be viewed in your `cats` folder.
260
+
261
+ ## TODO:
262
+ - refactor finetuning (currently not available)
263
+ - benchmark and check bottlenecks of inference pipeline
264
+ - parallel batched inference
265
+ - other minor fixes
266
+ - yolo version for animal pose, check https://github.com/JunkyByte/easy_ViTPose/pull/18
267
+ - solve cuda exceptions on script exit when using tensorrt (no idea how)
268
+ - add infos about inferred informations during inference, better output of inference status (device etc)
269
+ - check if is possible to make colab work without runtime restart
270
+
271
+ Feel free to open issues, pull requests and contribute on these TODOs.
272
+
273
+ ## Reference
274
+ Thanks to the VitPose authors and their official implementation [ViTAE-Transformer/ViTPose](https://github.com/ViTAE-Transformer/ViTPose).
275
+ The SORT code is taken from [abewley/sort](https://github.com/abewley/sort)
ViTPose/easy_ViTPose/colab_demo.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
ViTPose/easy_ViTPose/easy_ViTPose.egg-info/PKG-INFO ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: easy_ViTPose
3
+ Version: 1.1
4
+ Home-page: https://github.com/JunkyByte/easy_ViTPose
5
+ Author: JunkyByte
6
+ Author-email: [email protected]
7
+ License-File: LICENSE
ViTPose/easy_ViTPose/easy_ViTPose.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ easy_ViTPose/__init__.py
5
+ easy_ViTPose/inference.py
6
+ easy_ViTPose/sort.py
7
+ easy_ViTPose/train.py
8
+ easy_ViTPose.egg-info/PKG-INFO
9
+ easy_ViTPose.egg-info/SOURCES.txt
10
+ easy_ViTPose.egg-info/dependency_links.txt
11
+ easy_ViTPose.egg-info/top_level.txt
12
+ easy_ViTPose/configs/ViTPose_aic.py
13
+ easy_ViTPose/configs/ViTPose_ap10k.py
14
+ easy_ViTPose/configs/ViTPose_apt36k.py
15
+ easy_ViTPose/configs/ViTPose_coco.py
16
+ easy_ViTPose/configs/ViTPose_coco_25.py
17
+ easy_ViTPose/configs/ViTPose_common.py
18
+ easy_ViTPose/configs/ViTPose_mpii.py
19
+ easy_ViTPose/configs/ViTPose_wholebody.py
20
+ easy_ViTPose/configs/__init__.py
21
+ easy_ViTPose/datasets/COCO.py
22
+ easy_ViTPose/datasets/HumanPoseEstimation.py
23
+ easy_ViTPose/datasets/__init__.py
24
+ easy_ViTPose/vit_models/__init__.py
25
+ easy_ViTPose/vit_models/model.py
26
+ easy_ViTPose/vit_models/optimizer.py
27
+ easy_ViTPose/vit_models/backbone/__init__.py
28
+ easy_ViTPose/vit_models/backbone/vit.py
29
+ easy_ViTPose/vit_models/head/__init__.py
30
+ easy_ViTPose/vit_models/head/topdown_heatmap_base_head.py
31
+ easy_ViTPose/vit_models/head/topdown_heatmap_simple_head.py
32
+ easy_ViTPose/vit_models/losses/__init__.py
33
+ easy_ViTPose/vit_models/losses/classfication_loss.py
34
+ easy_ViTPose/vit_models/losses/heatmap_loss.py
35
+ easy_ViTPose/vit_models/losses/mesh_loss.py
36
+ easy_ViTPose/vit_models/losses/mse_loss.py
37
+ easy_ViTPose/vit_models/losses/multi_loss_factory.py
38
+ easy_ViTPose/vit_models/losses/regression_loss.py
39
+ easy_ViTPose/vit_utils/__init__.py
40
+ easy_ViTPose/vit_utils/dist_util.py
41
+ easy_ViTPose/vit_utils/inference.py
42
+ easy_ViTPose/vit_utils/logging.py
43
+ easy_ViTPose/vit_utils/top_down_eval.py
44
+ easy_ViTPose/vit_utils/train_valid_fn.py
45
+ easy_ViTPose/vit_utils/transform.py
46
+ easy_ViTPose/vit_utils/util.py
47
+ easy_ViTPose/vit_utils/visualization.py
48
+ easy_ViTPose/vit_utils/nms/__init__.py
49
+ easy_ViTPose/vit_utils/nms/nms.py
50
+ easy_ViTPose/vit_utils/nms/nms_ori.py
51
+ easy_ViTPose/vit_utils/nms/setup_linux.py
52
+ easy_ViTPose/vit_utils/post_processing/__init__.py
53
+ easy_ViTPose/vit_utils/post_processing/group.py
54
+ easy_ViTPose/vit_utils/post_processing/nms.py
55
+ easy_ViTPose/vit_utils/post_processing/one_euro_filter.py
56
+ easy_ViTPose/vit_utils/post_processing/post_transforms.py
ViTPose/easy_ViTPose/easy_ViTPose.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
ViTPose/easy_ViTPose/easy_ViTPose.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ easy_ViTPose
ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/ViTPose_Inference-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/__init__-checkpoint.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .inference import VitInference
2
+
3
+ __all__ = [
4
+ 'VitInference'
5
+ ]
ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/config-checkpoint.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Train config ---------------------------------------
2
+ log_level: logging.INFO
3
+ seed: 0
4
+ gpu_ids: 0
5
+ deterministic: True
6
+ cudnn_benchmark: True # Use cudnn
7
+ resume_from: "C:/Users/user/ViTPose/ckpts/vitpose-s-coco_25.pth" # CKPT path
8
+ #resume_from: False
9
+ gpu_ids: [0]
10
+ launcher: 'none' # When distributed training ['none', 'pytorch', 'slurm', 'mpi']
11
+ use_amp: False
12
+ validate: True
13
+ autoscale_lr: False
14
+ dist_params:
15
+ ...
ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/inference-checkpoint.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import abc
2
+ import os
3
+ from typing import Optional
4
+ import typing
5
+
6
+ import cv2
7
+ import numpy as np
8
+ import torch
9
+
10
+ from ultralytics import YOLO
11
+
12
+ from .configs.ViTPose_common import data_cfg
13
+ from .sort import Sort
14
+ from .vit_models.model import ViTPose
15
+ from .vit_utils.inference import draw_bboxes, pad_image
16
+ from .vit_utils.top_down_eval import keypoints_from_heatmaps
17
+ from .vit_utils.util import dyn_model_import, infer_dataset_by_path
18
+ from .vit_utils.visualization import draw_points_and_skeleton, joints_dict
19
+
20
+ try:
21
+ import torch_tensorrt
22
+ except ModuleNotFoundError:
23
+ pass
24
+
25
+ try:
26
+ import onnxruntime
27
+ except ModuleNotFoundError:
28
+ pass
29
+
30
+ __all__ = ['VitInference']
31
+ np.bool = np.bool_
32
+ MEAN = [0.485, 0.456, 0.406]
33
+ STD = [0.229, 0.224, 0.225]
34
+
35
+
36
+ DETC_TO_YOLO_YOLOC = {
37
+ 'human': [0],
38
+ 'cat': [15],
39
+ 'dog': [16],
40
+ 'horse': [17],
41
+ 'sheep': [18],
42
+ 'cow': [19],
43
+ 'elephant': [20],
44
+ 'bear': [21],
45
+ 'zebra': [22],
46
+ 'giraffe': [23],
47
+ 'animals': [15, 16, 17, 18, 19, 20, 21, 22, 23]
48
+ }
49
+
50
+
51
+ class VitInference:
52
+ """
53
+ Class for performing inference using ViTPose models with YOLOv8 human detection and SORT tracking.
54
+
55
+ Args:
56
+ model (str): Path to the ViT model file (.pth, .onnx, .engine).
57
+ yolo (str): Path of the YOLOv8 model to load.
58
+ model_name (str, optional): Name of the ViT model architecture to use.
59
+ Valid values are 's', 'b', 'l', 'h'.
60
+ Defaults to None, is necessary when using .pth checkpoints.
61
+ det_class (str, optional): the detection class. if None it is inferred by the dataset.
62
+ valid values are 'human', 'cat', 'dog', 'horse', 'sheep',
63
+ 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
64
+ 'animals' (which is all previous but human)
65
+ dataset (str, optional): Name of the dataset. If None it's extracted from the file name.
66
+ Valid values are 'coco', 'coco_25', 'wholebody', 'mpii',
67
+ 'ap10k', 'apt36k', 'aic'
68
+ yolo_size (int, optional): Size of the input image for YOLOv8 model. Defaults to 320.
69
+ device (str, optional): Device to use for inference. Defaults to 'cuda' if available, else 'cpu'.
70
+ is_video (bool, optional): Flag indicating if the input is video. Defaults to False.
71
+ single_pose (bool, optional): Flag indicating if the video (on images this flag has no effect)
72
+ will contain a single pose.
73
+ In this case the SORT tracker is not used (increasing performance)
74
+ but people id tracking
75
+ won't be consistent among frames.
76
+ yolo_step (int, optional): The tracker can be used to predict the bboxes instead of yolo for performance,
77
+ this flag specifies how often yolo is applied (e.g. 1 applies yolo every frame).
78
+ This does not have any effect when is_video is False.
79
+ """
80
+
81
+ def __init__(self, model: str,
82
+ yolo: str,
83
+ model_name: Optional[str] = None,
84
+ det_class: Optional[str] = None,
85
+ dataset: Optional[str] = None,
86
+ yolo_size: Optional[int] = 320,
87
+ device: Optional[str] = None,
88
+ is_video: Optional[bool] = False,
89
+ single_pose: Optional[bool] = False,
90
+ yolo_step: Optional[int] = 1):
91
+ assert os.path.isfile(model), f'The model file {model} does not exist'
92
+ assert os.path.isfile(yolo), f'The YOLOv8 model {yolo} does not exist'
93
+
94
+ # Device priority is cuda / mps / cpu
95
+ if device is None:
96
+ if torch.cuda.is_available():
97
+ device = 'cuda'
98
+ elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
99
+ device = 'mps'
100
+ else:
101
+ device = 'cpu'
102
+
103
+ self.device = device
104
+ self.yolo = YOLO(yolo, task='detect')
105
+ self.yolo_size = yolo_size
106
+ self.yolo_step = yolo_step
107
+ self.is_video = is_video
108
+ self.single_pose = single_pose
109
+ self.reset()
110
+
111
+ # State saving during inference
112
+ self.save_state = True # Can be disabled manually
113
+ self._img = None
114
+ self._yolo_res = None
115
+ self._tracker_res = None
116
+ self._keypoints = None
117
+
118
+ # Use extension to decide which kind of model has been loaded
119
+ use_onnx = model.endswith('.onnx')
120
+ use_trt = model.endswith('.engine')
121
+
122
+
123
+ # Extract dataset name
124
+ if dataset is None:
125
+ dataset = infer_dataset_by_path(model)
126
+
127
+ assert dataset in ['mpii', 'coco', 'coco_25', 'wholebody', 'aic', 'ap10k', 'apt36k'], \
128
+ 'The specified dataset is not valid'
129
+
130
+ # Dataset can now be set for visualization
131
+ self.dataset = dataset
132
+
133
+ # if we picked the dataset switch to correct yolo classes if not set
134
+ if det_class is None:
135
+ det_class = 'animals' if dataset in ['ap10k', 'apt36k'] else 'human'
136
+ self.yolo_classes = DETC_TO_YOLO_YOLOC[det_class]
137
+
138
+ assert model_name in [None, 's', 'b', 'l', 'h'], \
139
+ f'The model name {model_name} is not valid'
140
+
141
+ # onnx / trt models do not require model_cfg specification
142
+ if model_name is None:
143
+ assert use_onnx or use_trt, \
144
+ 'Specify the model_name if not using onnx / trt'
145
+ else:
146
+ # Dynamically import the model class
147
+ model_cfg = dyn_model_import(self.dataset, model_name)
148
+
149
+ self.target_size = data_cfg['image_size']
150
+ if use_onnx:
151
+ self._ort_session = onnxruntime.InferenceSession(model,
152
+ providers=['CUDAExecutionProvider',
153
+ 'CPUExecutionProvider'])
154
+ inf_fn = self._inference_onnx
155
+ else:
156
+ self._vit_pose = ViTPose(model_cfg)
157
+ self._vit_pose.eval()
158
+
159
+ if use_trt:
160
+ self._vit_pose = torch.jit.load(model)
161
+ else:
162
+ ckpt = torch.load(model, map_location='cpu', weights_only=True)
163
+ if 'state_dict' in ckpt:
164
+ self._vit_pose.load_state_dict(ckpt['state_dict'])
165
+ else:
166
+ self._vit_pose.load_state_dict(ckpt)
167
+ self._vit_pose.to(torch.device(device))
168
+
169
+ inf_fn = self._inference_torch
170
+
171
+ # Override _inference abstract with selected engine
172
+ self._inference = inf_fn # type: ignore
173
+
174
+ def reset(self):
175
+ """
176
+ Reset the inference class to be ready for a new video.
177
+ This will reset the internal counter of frames, on videos
178
+ this is necessary to reset the tracker.
179
+ """
180
+ min_hits = 3 if self.yolo_step == 1 else 1
181
+ use_tracker = self.is_video and not self.single_pose
182
+ self.tracker = Sort(max_age=self.yolo_step,
183
+ min_hits=min_hits,
184
+ iou_threshold=0.3) if use_tracker else None # TODO: Params
185
+ self.frame_counter = 0
186
+
187
+ @classmethod
188
+ def postprocess(cls, heatmaps, org_w, org_h):
189
+ """
190
+ Postprocess the heatmaps to obtain keypoints and their probabilities.
191
+
192
+ Args:
193
+ heatmaps (ndarray): Heatmap predictions from the model.
194
+ org_w (int): Original width of the image.
195
+ org_h (int): Original height of the image.
196
+
197
+ Returns:
198
+ ndarray: Processed keypoints with probabilities.
199
+ """
200
+ points, prob = keypoints_from_heatmaps(heatmaps=heatmaps,
201
+ center=np.array([[org_w // 2,
202
+ org_h // 2]]),
203
+ scale=np.array([[org_w, org_h]]),
204
+ unbiased=True, use_udp=True)
205
+ return np.concatenate([points[:, :, ::-1], prob], axis=2)
206
+
207
+ @abc.abstractmethod
208
+ def _inference(self, img: np.ndarray) -> np.ndarray:
209
+ """
210
+ Abstract method for performing inference on an image.
211
+ It is overloaded by each inference engine.
212
+
213
+ Args:
214
+ img (ndarray): Input image for inference.
215
+
216
+ Returns:
217
+ ndarray: Inference results.
218
+ """
219
+ raise NotImplementedError
220
+
221
+ def inference(self, img: np.ndarray) -> dict[typing.Any, typing.Any]:
222
+ """
223
+ Perform inference on the input image.
224
+
225
+ Args:
226
+ img (ndarray): Input image for inference in RGB format.
227
+
228
+ Returns:
229
+ dict[typing.Any, typing.Any]: Inference results.
230
+ """
231
+
232
+ # First use YOLOv8 for detection
233
+ res_pd = np.empty((0, 5))
234
+ results = None
235
+ if (self.tracker is None or
236
+ (self.frame_counter % self.yolo_step == 0 or self.frame_counter < 3)):
237
+ results = self.yolo(img[..., ::-1], verbose=False, imgsz=self.yolo_size,
238
+ device=self.device if self.device != 'cuda' else 0,
239
+ classes=self.yolo_classes)[0]
240
+ res_pd = np.array([r[:5].tolist() for r in # TODO: Confidence threshold
241
+ results.boxes.data.cpu().numpy() if r[4] > 0.35]).reshape((-1, 5))
242
+ self.frame_counter += 1
243
+
244
+ frame_keypoints = {}
245
+ scores_bbox = {}
246
+ ids = None
247
+ if self.tracker is not None:
248
+ res_pd = self.tracker.update(res_pd)
249
+ ids = res_pd[:, 5].astype(int).tolist()
250
+
251
+ # Prepare boxes for inference
252
+ bboxes = res_pd[:, :4].round().astype(int)
253
+ scores = res_pd[:, 4].tolist()
254
+ pad_bbox = 10
255
+
256
+ if ids is None:
257
+ ids = range(len(bboxes))
258
+
259
+ for bbox, id, score in zip(bboxes, ids, scores):
260
+ # TODO: Slightly bigger bbox
261
+ bbox[[0, 2]] = np.clip(bbox[[0, 2]] + [-pad_bbox, pad_bbox], 0, img.shape[1])
262
+ bbox[[1, 3]] = np.clip(bbox[[1, 3]] + [-pad_bbox, pad_bbox], 0, img.shape[0])
263
+
264
+ # Crop image and pad to 3/4 aspect ratio
265
+ img_inf = img[bbox[1]:bbox[3], bbox[0]:bbox[2]]
266
+ img_inf, (left_pad, top_pad) = pad_image(img_inf, 3 / 4)
267
+
268
+ keypoints = self._inference(img_inf)[0]
269
+ # Transform keypoints to original image
270
+ keypoints[:, :2] += bbox[:2][::-1] - [top_pad, left_pad]
271
+ frame_keypoints[id] = keypoints
272
+ scores_bbox[id] = score # Replace this with avg_keypoint_conf*person_obj_conf. For now, only person_obj_conf from yolo is being used.
273
+
274
+ if self.save_state:
275
+ self._img = img
276
+ self._yolo_res = results
277
+ self._tracker_res = (bboxes, ids, scores)
278
+ self._keypoints = frame_keypoints
279
+ self._scores_bbox = scores_bbox
280
+
281
+ return frame_keypoints
282
+
283
+ def draw(self, show_yolo=True, show_raw_yolo=False, confidence_threshold=0.5):
284
+ """
285
+ Draw keypoints and bounding boxes on the image.
286
+
287
+ Args:
288
+ show_yolo (bool, optional): Whether to show YOLOv8 bounding boxes. Default is True.
289
+ show_raw_yolo (bool, optional): Whether to show raw YOLOv8 bounding boxes. Default is False.
290
+
291
+ Returns:
292
+ ndarray: Image with keypoints and bounding boxes drawn.
293
+ """
294
+ img = self._img.copy()
295
+ bboxes, ids, scores = self._tracker_res
296
+
297
+ if self._yolo_res is not None and (show_raw_yolo or (self.tracker is None and show_yolo)):
298
+ img = np.array(self._yolo_res.plot())[..., ::-1]
299
+
300
+ if show_yolo and self.tracker is not None:
301
+ img = draw_bboxes(img, bboxes, ids, scores)
302
+
303
+ img = np.array(img)[..., ::-1] # RGB to BGR for cv2 modules
304
+ for idx, k in self._keypoints.items():
305
+ img = draw_points_and_skeleton(img.copy(), k,
306
+ joints_dict()[self.dataset]['skeleton'],
307
+ person_index=idx,
308
+ points_color_palette='gist_rainbow',
309
+ skeleton_color_palette='jet',
310
+ points_palette_samples=10,
311
+ confidence_threshold=confidence_threshold)
312
+ return img[..., ::-1] # Return RGB as original
313
+
314
+ def pre_img(self, img):
315
+ org_h, org_w = img.shape[:2]
316
+ img_input = cv2.resize(img, self.target_size, interpolation=cv2.INTER_LINEAR) / 255
317
+ img_input = ((img_input - MEAN) / STD).transpose(2, 0, 1)[None].astype(np.float32)
318
+ return img_input, org_h, org_w
319
+
320
+ @torch.no_grad()
321
+ def _inference_torch(self, img: np.ndarray) -> np.ndarray:
322
+ # Prepare input data
323
+ img_input, org_h, org_w = self.pre_img(img)
324
+ img_input = torch.from_numpy(img_input).to(torch.device(self.device))
325
+
326
+ # Feed to model
327
+ heatmaps = self._vit_pose(img_input).detach().cpu().numpy()
328
+ return self.postprocess(heatmaps, org_w, org_h)
329
+
330
+ def _inference_onnx(self, img: np.ndarray) -> np.ndarray:
331
+ # Prepare input data
332
+ img_input, org_h, org_w = self.pre_img(img)
333
+
334
+ # Feed to model
335
+ ort_inputs = {self._ort_session.get_inputs()[0].name: img_input}
336
+ heatmaps = self._ort_session.run(None, ort_inputs)[0]
337
+ return self.postprocess(heatmaps, org_w, org_h)
ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/testVITPOSE-checkpoint.jpg ADDED
ViTPose/easy_ViTPose/easy_ViTPose/.ipynb_checkpoints/train-checkpoint.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import argparse
3
+ import copy
4
+ import os
5
+ import os.path as osp
6
+ import time
7
+ import warnings
8
+ import click
9
+ import yaml
10
+
11
+ from glob import glob
12
+
13
+ import torch
14
+ import torch.distributed as dist
15
+
16
+ from vit_utils.util import init_random_seed, set_random_seed
17
+ from vit_utils.dist_util import get_dist_info, init_dist
18
+ from vit_utils.logging import get_root_logger
19
+
20
+ import configs.ViTPose_small_coco_256x192 as s_cfg
21
+ import configs.ViTPose_base_coco_256x192 as b_cfg
22
+ import configs.ViTPose_large_coco_256x192 as l_cfg
23
+ import configs.ViTPose_huge_coco_256x192 as h_cfg
24
+
25
+ from vit_models.model import ViTPose
26
+ from datasets.COCO import COCODataset
27
+ from vit_utils.train_valid_fn import train_model
28
+
29
+ CUR_PATH = osp.dirname(__file__)
30
+
31
+ @click.command()
32
+ @click.option('--config-path', type=click.Path(exists=True), default='config.yaml', required=True, help='train config file path')
33
+ @click.option('--model-name', type=str, default='b', required=True, help='[b: ViT-B, l: ViT-L, h: ViT-H]')
34
+ def main(config_path, model_name):
35
+
36
+ cfg = {'b':b_cfg,
37
+ 's':s_cfg,
38
+ 'l':l_cfg,
39
+ 'h':h_cfg}.get(model_name.lower())
40
+ # Load config.yaml
41
+ with open(config_path, 'r') as f:
42
+ cfg_yaml = yaml.load(f, Loader=yaml.SafeLoader)
43
+
44
+ for k, v in cfg_yaml.items():
45
+ if hasattr(cfg, k):
46
+ raise ValueError(f"Already exists {k} in config")
47
+ else:
48
+ cfg.__setattr__(k, v)
49
+
50
+ # set cudnn_benchmark
51
+ if cfg.cudnn_benchmark:
52
+ torch.backends.cudnn.benchmark = True
53
+
54
+ # Set work directory (session-level)
55
+ if not hasattr(cfg, 'work_dir'):
56
+ cfg.__setattr__('work_dir', f"{CUR_PATH}/runs/train")
57
+
58
+ if not osp.exists(cfg.work_dir):
59
+ os.makedirs(cfg.work_dir)
60
+ session_list = sorted(glob(f"{cfg.work_dir}/*"))
61
+ if len(session_list) == 0:
62
+ session = 1
63
+ else:
64
+ session = int(os.path.basename(session_list[-1])) + 1
65
+ session_dir = osp.join(cfg.work_dir, str(session).zfill(3))
66
+ os.makedirs(session_dir)
67
+ cfg.__setattr__('work_dir', session_dir)
68
+
69
+
70
+ if cfg.autoscale_lr:
71
+ # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
72
+ cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
73
+
74
+ # init distributed env first, since logger depends on the dist info.
75
+ if cfg.launcher == 'none':
76
+ distributed = False
77
+ if len(cfg.gpu_ids) > 1:
78
+ warnings.warn(
79
+ f"We treat {cfg['gpu_ids']} as gpu-ids, and reset to "
80
+ f"{cfg['gpu_ids'][0:1]} as gpu-ids to avoid potential error in "
81
+ "non-distribute training time.")
82
+ cfg.gpu_ids = cfg.gpu_ids[0:1]
83
+ else:
84
+ distributed = True
85
+ init_dist(cfg.launcher, **cfg.dist_params)
86
+ # re-set gpu_ids with distributed training mode
87
+ _, world_size = get_dist_info()
88
+ cfg.gpu_ids = range(world_size)
89
+
90
+ # init the logger before other steps
91
+ timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
92
+ log_file = osp.join(session_dir, f'{timestamp}.log')
93
+ logger = get_root_logger(log_file=log_file)
94
+
95
+ # init the meta dict to record some important information such as
96
+ # environment info and seed, which will be logged
97
+ meta = dict()
98
+
99
+ # log some basic info
100
+ logger.info(f'Distributed training: {distributed}')
101
+
102
+ # set random seeds
103
+ seed = init_random_seed(cfg.seed)
104
+ logger.info(f"Set random seed to {seed}, "
105
+ f"deterministic: {cfg.deterministic}")
106
+ set_random_seed(seed, deterministic=cfg.deterministic)
107
+ meta['seed'] = seed
108
+
109
+ # Set model
110
+ model = ViTPose(cfg.model)
111
+ if cfg.resume_from:
112
+ # Load ckpt partially
113
+ ckpt_state = torch.load(cfg.resume_from)['state_dict']
114
+ ckpt_state.pop('keypoint_head.final_layer.bias')
115
+ ckpt_state.pop('keypoint_head.final_layer.weight')
116
+ model.load_state_dict(ckpt_state, strict=False)
117
+
118
+ # freeze the backbone, leave the head to be finetuned
119
+ model.backbone.frozen_stages = model.backbone.depth - 1
120
+ model.backbone.freeze_ffn = True
121
+ model.backbone.freeze_attn = True
122
+ model.backbone._freeze_stages()
123
+
124
+ # Set dataset
125
+ datasets_train = COCODataset(
126
+ root_path=cfg.data_root,
127
+ data_version="feet_train",
128
+ is_train=True,
129
+ use_gt_bboxes=True,
130
+ image_width=192,
131
+ image_height=256,
132
+ scale=True,
133
+ scale_factor=0.35,
134
+ flip_prob=0.5,
135
+ rotate_prob=0.5,
136
+ rotation_factor=45.,
137
+ half_body_prob=0.3,
138
+ use_different_joints_weight=True,
139
+ heatmap_sigma=3,
140
+ soft_nms=False
141
+ )
142
+
143
+ datasets_valid = COCODataset(
144
+ root_path=cfg.data_root,
145
+ data_version="feet_val",
146
+ is_train=False,
147
+ use_gt_bboxes=True,
148
+ image_width=192,
149
+ image_height=256,
150
+ scale=False,
151
+ scale_factor=0.35,
152
+ flip_prob=0.5,
153
+ rotate_prob=0.5,
154
+ rotation_factor=45.,
155
+ half_body_prob=0.3,
156
+ use_different_joints_weight=True,
157
+ heatmap_sigma=3,
158
+ soft_nms=False
159
+ )
160
+
161
+ train_model(
162
+ model=model,
163
+ datasets_train=datasets_train,
164
+ datasets_valid=datasets_valid,
165
+ cfg=cfg,
166
+ distributed=distributed,
167
+ validate=cfg.validate,
168
+ timestamp=timestamp,
169
+ meta=meta
170
+ )
171
+
172
+
173
+ if __name__ == '__main__':
174
+ main()
ViTPose/easy_ViTPose/easy_ViTPose/ViTPose_Inference.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
ViTPose/easy_ViTPose/easy_ViTPose/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .inference import VitInference
2
+
3
+ __all__ = [
4
+ 'VitInference'
5
+ ]
ViTPose/easy_ViTPose/easy_ViTPose/config.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Train config ---------------------------------------
2
+ log_level: logging.INFO
3
+ seed: 0
4
+ gpu_ids: 0
5
+ deterministic: True
6
+ cudnn_benchmark: True # Use cudnn
7
+ resume_from: "C:/Users/user/ViTPose/ckpts/vitpose-s-coco_25.pth" # CKPT path
8
+ #resume_from: False
9
+ gpu_ids: [0]
10
+ launcher: 'none' # When distributed training ['none', 'pytorch', 'slurm', 'mpi']
11
+ use_amp: False
12
+ validate: True
13
+ autoscale_lr: False
14
+ dist_params:
15
+ ...
ViTPose/easy_ViTPose/easy_ViTPose/configs/.ipynb_checkpoints/ViTPose_common-checkpoint.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Common configuration
2
+ optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.1,
3
+ constructor='LayerDecayOptimizerConstructor',
4
+ paramwise_cfg=dict(
5
+ num_layers=12,
6
+ layer_decay_rate=1 - 2e-4,
7
+ custom_keys={
8
+ 'bias': dict(decay_multi=0.),
9
+ 'pos_embed': dict(decay_mult=0.),
10
+ 'relative_position_bias_table': dict(decay_mult=0.),
11
+ 'norm': dict(decay_mult=0.)
12
+ }
13
+ )
14
+ )
15
+
16
+ optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
17
+
18
+ # learning policy
19
+ lr_config = dict(
20
+ policy='step',
21
+ warmup='linear',
22
+ warmup_iters=300,
23
+ warmup_ratio=0.001,
24
+ step=[3])
25
+
26
+ total_epochs = 4
27
+ target_type = 'GaussianHeatmap'
28
+
29
+ data_cfg = dict(
30
+ image_size=[192, 256],
31
+ heatmap_size=[48, 64],
32
+ soft_nms=False,
33
+ nms_thr=1.0,
34
+ oks_thr=0.9,
35
+ vis_thr=0.2,
36
+ use_gt_bbox=False,
37
+ det_bbox_thr=0.0,
38
+ bbox_file='data/coco/person_detection_results/'
39
+ 'COCO_val2017_detections_AP_H_56_person.json',
40
+ )
41
+
42
+ data_root = '/home/adryw/dataset/COCO17'
43
+ data = dict(
44
+ samples_per_gpu=64,
45
+ workers_per_gpu=6,
46
+ val_dataloader=dict(samples_per_gpu=128),
47
+ test_dataloader=dict(samples_per_gpu=128),
48
+ train=dict(
49
+ type='TopDownCocoDataset',
50
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
51
+ img_prefix=f'{data_root}/train2017/',
52
+ data_cfg=data_cfg),
53
+ val=dict(
54
+ type='TopDownCocoDataset',
55
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
56
+ img_prefix=f'{data_root}/val2017/',
57
+ data_cfg=data_cfg),
58
+ test=dict(
59
+ type='TopDownCocoDataset',
60
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
61
+ img_prefix=f'{data_root}/val2017/',
62
+ data_cfg=data_cfg)
63
+ )
64
+
65
+ model_small = dict(
66
+ type='TopDown',
67
+ pretrained=None,
68
+ backbone=dict(
69
+ type='ViT',
70
+ img_size=(256, 192),
71
+ patch_size=16,
72
+ embed_dim=384,
73
+ depth=12,
74
+ num_heads=12,
75
+ ratio=1,
76
+ use_checkpoint=False,
77
+ mlp_ratio=4,
78
+ qkv_bias=True,
79
+ drop_path_rate=0.1,
80
+ ),
81
+ keypoint_head=dict(
82
+ type='TopdownHeatmapSimpleHead',
83
+ in_channels=384,
84
+ num_deconv_layers=2,
85
+ num_deconv_filters=(256, 256),
86
+ num_deconv_kernels=(4, 4),
87
+ extra=dict(final_conv_kernel=1, ),
88
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
89
+ train_cfg=dict(),
90
+ test_cfg=dict(
91
+ flip_test=True,
92
+ post_process='default',
93
+ shift_heatmap=False,
94
+ target_type=target_type,
95
+ modulate_kernel=11,
96
+ use_udp=True))
97
+
98
+ model_base = dict(
99
+ type='TopDown',
100
+ pretrained=None,
101
+ backbone=dict(
102
+ type='ViT',
103
+ img_size=(256, 192),
104
+ patch_size=16,
105
+ embed_dim=768,
106
+ depth=12,
107
+ num_heads=12,
108
+ ratio=1,
109
+ use_checkpoint=False,
110
+ mlp_ratio=4,
111
+ qkv_bias=True,
112
+ drop_path_rate=0.3,
113
+ ),
114
+ keypoint_head=dict(
115
+ type='TopdownHeatmapSimpleHead',
116
+ in_channels=768,
117
+ num_deconv_layers=2,
118
+ num_deconv_filters=(256, 256),
119
+ num_deconv_kernels=(4, 4),
120
+ extra=dict(final_conv_kernel=1, ),
121
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
122
+ train_cfg=dict(),
123
+ test_cfg=dict(
124
+ flip_test=True,
125
+ post_process='default',
126
+ shift_heatmap=False,
127
+ target_type=target_type,
128
+ modulate_kernel=11,
129
+ use_udp=True))
130
+
131
+ model_large = dict(
132
+ type='TopDown',
133
+ pretrained=None,
134
+ backbone=dict(
135
+ type='ViT',
136
+ img_size=(256, 192),
137
+ patch_size=16,
138
+ embed_dim=1024,
139
+ depth=24,
140
+ num_heads=16,
141
+ ratio=1,
142
+ use_checkpoint=False,
143
+ mlp_ratio=4,
144
+ qkv_bias=True,
145
+ drop_path_rate=0.5,
146
+ ),
147
+ keypoint_head=dict(
148
+ type='TopdownHeatmapSimpleHead',
149
+ in_channels=1024,
150
+ num_deconv_layers=2,
151
+ num_deconv_filters=(256, 256),
152
+ num_deconv_kernels=(4, 4),
153
+ extra=dict(final_conv_kernel=1, ),
154
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
155
+ train_cfg=dict(),
156
+ test_cfg=dict(
157
+ flip_test=True,
158
+ post_process='default',
159
+ shift_heatmap=False,
160
+ target_type=target_type,
161
+ modulate_kernel=11,
162
+ use_udp=True))
163
+
164
+ model_huge = dict(
165
+ type='TopDown',
166
+ pretrained=None,
167
+ backbone=dict(
168
+ type='ViT',
169
+ img_size=(256, 192),
170
+ patch_size=16,
171
+ embed_dim=1280,
172
+ depth=32,
173
+ num_heads=16,
174
+ ratio=1,
175
+ use_checkpoint=False,
176
+ mlp_ratio=4,
177
+ qkv_bias=True,
178
+ drop_path_rate=0.55,
179
+ ),
180
+ keypoint_head=dict(
181
+ type='TopdownHeatmapSimpleHead',
182
+ in_channels=1280,
183
+ num_deconv_layers=2,
184
+ num_deconv_filters=(256, 256),
185
+ num_deconv_kernels=(4, 4),
186
+ extra=dict(final_conv_kernel=1, ),
187
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
188
+ train_cfg=dict(),
189
+ test_cfg=dict(
190
+ flip_test=True,
191
+ post_process='default',
192
+ shift_heatmap=False,
193
+ target_type=target_type,
194
+ modulate_kernel=11,
195
+ use_udp=True))
ViTPose/easy_ViTPose/easy_ViTPose/configs/.ipynb_checkpoints/ViTPose_small_coco_256x192-checkpoint.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../../../../_base_/default_runtime.py',
3
+ '../../../../_base_/datasets/coco.py'
4
+ ]
5
+ evaluation = dict(interval=10, metric='mAP', save_best='AP')
6
+
7
+ optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
8
+ constructor='LayerDecayOptimizerConstructor',
9
+ paramwise_cfg=dict(
10
+ num_layers=12,
11
+ layer_decay_rate=0.8,
12
+ custom_keys={
13
+ 'bias': dict(decay_multi=0.),
14
+ 'pos_embed': dict(decay_mult=0.),
15
+ 'relative_position_bias_table': dict(decay_mult=0.),
16
+ 'norm': dict(decay_mult=0.)
17
+ }
18
+ )
19
+ )
20
+
21
+ optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
22
+
23
+ # learning policy
24
+ lr_config = dict(
25
+ policy='step',
26
+ warmup='linear',
27
+ warmup_iters=500,
28
+ warmup_ratio=0.001,
29
+ step=[170, 200])
30
+ total_epochs = 210
31
+ target_type = 'GaussianHeatmap'
32
+ channel_cfg = dict(
33
+ num_output_channels=17,
34
+ dataset_joints=17,
35
+ dataset_channel=[
36
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
37
+ ],
38
+ inference_channel=[
39
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
40
+ ])
41
+
42
+ # model settings
43
+ model = dict(
44
+ type='TopDown',
45
+ pretrained=None,
46
+ backbone=dict(
47
+ type='ViT',
48
+ img_size=(256, 192),
49
+ patch_size=16,
50
+ embed_dim=384,
51
+ depth=12,
52
+ num_heads=12,
53
+ ratio=1,
54
+ use_checkpoint=False,
55
+ mlp_ratio=4,
56
+ qkv_bias=True,
57
+ drop_path_rate=0.1,
58
+ ),
59
+ keypoint_head=dict(
60
+ type='TopdownHeatmapSimpleHead',
61
+ in_channels=384,
62
+ num_deconv_layers=2,
63
+ num_deconv_filters=(256, 256),
64
+ num_deconv_kernels=(4, 4),
65
+ extra=dict(final_conv_kernel=1, ),
66
+ out_channels=channel_cfg['num_output_channels'],
67
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
68
+ train_cfg=dict(),
69
+ test_cfg=dict(
70
+ flip_test=True,
71
+ post_process='default',
72
+ shift_heatmap=False,
73
+ target_type=target_type,
74
+ modulate_kernel=11,
75
+ use_udp=True))
76
+
77
+ data_cfg = dict(
78
+ image_size=[192, 256],
79
+ heatmap_size=[48, 64],
80
+ num_output_channels=channel_cfg['num_output_channels'],
81
+ num_joints=channel_cfg['dataset_joints'],
82
+ dataset_channel=channel_cfg['dataset_channel'],
83
+ inference_channel=channel_cfg['inference_channel'],
84
+ soft_nms=False,
85
+ nms_thr=1.0,
86
+ oks_thr=0.9,
87
+ vis_thr=0.9,
88
+ use_gt_bbox=False,
89
+ det_bbox_thr=0.0,
90
+ bbox_file='data/coco/person_detection_results/'
91
+ 'COCO_val2017_detections_AP_H_56_person.json',
92
+ )
93
+
94
+ train_pipeline = [
95
+ dict(type='LoadImageFromFile'),
96
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
97
+ dict(
98
+ type='TopDownHalfBodyTransform',
99
+ num_joints_half_body=8,
100
+ prob_half_body=0.3),
101
+ dict(
102
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
103
+ dict(type='TopDownAffine', use_udp=True),
104
+ dict(type='ToTensor'),
105
+ dict(
106
+ type='NormalizeTensor',
107
+ mean=[0.485, 0.456, 0.406],
108
+ std=[0.229, 0.224, 0.225]),
109
+ dict(
110
+ type='TopDownGenerateTarget',
111
+ sigma=2,
112
+ encoding='UDP',
113
+ target_type=target_type),
114
+ dict(
115
+ type='Collect',
116
+ keys=['img', 'target', 'target_weight'],
117
+ meta_keys=[
118
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
119
+ 'rotation', 'bbox_score', 'flip_pairs'
120
+ ]),
121
+ ]
122
+
123
+ val_pipeline = [
124
+ dict(type='LoadImageFromFile'),
125
+ dict(type='TopDownAffine', use_udp=True),
126
+ dict(type='ToTensor'),
127
+ dict(
128
+ type='NormalizeTensor',
129
+ mean=[0.485, 0.456, 0.406],
130
+ std=[0.229, 0.224, 0.225]),
131
+ dict(
132
+ type='Collect',
133
+ keys=['img'],
134
+ meta_keys=[
135
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
136
+ 'flip_pairs'
137
+ ]),
138
+ ]
139
+
140
+ test_pipeline = val_pipeline
141
+
142
+ data_root = r'D:\ViTPose\Evaluating'
143
+ data = dict(
144
+ samples_per_gpu=4,
145
+ workers_per_gpu=4,
146
+ val_dataloader=dict(samples_per_gpu=4),
147
+ test_dataloader=dict(samples_per_gpu=4),
148
+ train=dict(
149
+ type='TopDownCocoDataset',
150
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
151
+ img_prefix=f'{data_root}/train2017/',
152
+ data_cfg=data_cfg,
153
+ pipeline=train_pipeline,
154
+ # dataset_info={{_base_.dataset_info}}
155
+ ),
156
+ val=dict(
157
+ type='TopDownCocoDataset',
158
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
159
+ img_prefix=f'{data_root}/val2017/',
160
+ data_cfg=data_cfg,
161
+ pipeline=val_pipeline,
162
+ # dataset_info={{_base_.dataset_info}}
163
+ ),
164
+ test=dict(
165
+ type='TopDownCocoDataset',
166
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
167
+ img_prefix=f'{data_root}/val2017/',
168
+ data_cfg=data_cfg,
169
+ pipeline=test_pipeline,
170
+ #dataset_info={{_base_.dataset_info}}
171
+ ),
172
+ )
173
+
ViTPose/easy_ViTPose/easy_ViTPose/configs/.ipynb_checkpoints/ViTPose_wholebody-checkpoint.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=133,
6
+ dataset_joints=133,
7
+ dataset_channel=[
8
+ list(range(133)),
9
+ ],
10
+ inference_channel=list(range(133)))
11
+
12
+ # Set models channels
13
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
14
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
15
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
16
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
17
+
18
+ names = ['small', 'base', 'large', 'huge']
19
+ for name in names:
20
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_aic.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=14,
6
+ dataset_joints=14,
7
+ dataset_channel=[
8
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13],
9
+ ],
10
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13])
11
+
12
+ # Set models channels
13
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
14
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
15
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
16
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
17
+
18
+ names = ['small', 'base', 'large', 'huge']
19
+ for name in names:
20
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_ap10k.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=17,
6
+ dataset_joints=17,
7
+ dataset_channel=[
8
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
9
+ ],
10
+ inference_channel=[
11
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
12
+ ])
13
+
14
+ # Set models channels
15
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
16
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
17
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
18
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
19
+
20
+ names = ['small', 'base', 'large', 'huge']
21
+ for name in names:
22
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_apt36k.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=17,
6
+ dataset_joints=17,
7
+ dataset_channel=[
8
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
9
+ ],
10
+ inference_channel=[
11
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
12
+ ])
13
+
14
+ # Set models channels
15
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
16
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
17
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
18
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
19
+
20
+ names = ['small', 'base', 'large', 'huge']
21
+ for name in names:
22
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_coco.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=17,
6
+ dataset_joints=17,
7
+ dataset_channel=list(range(17)),
8
+ inference_channel=list(range(17)))
9
+
10
+ # Set models channels
11
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
12
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
13
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
14
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
15
+
16
+ names = ['small', 'base', 'large', 'huge']
17
+ for name in names:
18
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_coco_25.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=25,
6
+ dataset_joints=25,
7
+ dataset_channel=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
8
+ 16, 17, 18, 19, 20, 21, 22, 23, 24], ],
9
+ inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
10
+ 16, 17, 18, 19, 20, 21, 22, 23, 24])
11
+
12
+ # Set models channels
13
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
14
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
15
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
16
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
17
+
18
+ names = ['small', 'base', 'large', 'huge']
19
+ for name in names:
20
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_common.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Common configuration
2
+ optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.1,
3
+ constructor='LayerDecayOptimizerConstructor',
4
+ paramwise_cfg=dict(
5
+ num_layers=12,
6
+ layer_decay_rate=1 - 2e-4,
7
+ custom_keys={
8
+ 'bias': dict(decay_multi=0.),
9
+ 'pos_embed': dict(decay_mult=0.),
10
+ 'relative_position_bias_table': dict(decay_mult=0.),
11
+ 'norm': dict(decay_mult=0.)
12
+ }
13
+ )
14
+ )
15
+
16
+ optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
17
+
18
+ # learning policy
19
+ lr_config = dict(
20
+ policy='step',
21
+ warmup='linear',
22
+ warmup_iters=300,
23
+ warmup_ratio=0.001,
24
+ step=[3])
25
+
26
+ total_epochs = 4
27
+ target_type = 'GaussianHeatmap'
28
+
29
+ data_cfg = dict(
30
+ image_size=[192, 256],
31
+ heatmap_size=[48, 64],
32
+ soft_nms=False,
33
+ nms_thr=1.0,
34
+ oks_thr=0.9,
35
+ vis_thr=0.2,
36
+ use_gt_bbox=False,
37
+ det_bbox_thr=0.0,
38
+ bbox_file='data/coco/person_detection_results/'
39
+ 'COCO_val2017_detections_AP_H_56_person.json',
40
+ )
41
+
42
+ data_root = '/home/adryw/dataset/COCO17'
43
+ data = dict(
44
+ samples_per_gpu=64,
45
+ workers_per_gpu=6,
46
+ val_dataloader=dict(samples_per_gpu=128),
47
+ test_dataloader=dict(samples_per_gpu=128),
48
+ train=dict(
49
+ type='TopDownCocoDataset',
50
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
51
+ img_prefix=f'{data_root}/train2017/',
52
+ data_cfg=data_cfg),
53
+ val=dict(
54
+ type='TopDownCocoDataset',
55
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
56
+ img_prefix=f'{data_root}/val2017/',
57
+ data_cfg=data_cfg),
58
+ test=dict(
59
+ type='TopDownCocoDataset',
60
+ ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
61
+ img_prefix=f'{data_root}/val2017/',
62
+ data_cfg=data_cfg)
63
+ )
64
+
65
+ model_small = dict(
66
+ type='TopDown',
67
+ pretrained=None,
68
+ backbone=dict(
69
+ type='ViT',
70
+ img_size=(256, 192),
71
+ patch_size=16,
72
+ embed_dim=384,
73
+ depth=12,
74
+ num_heads=12,
75
+ ratio=1,
76
+ use_checkpoint=False,
77
+ mlp_ratio=4,
78
+ qkv_bias=True,
79
+ drop_path_rate=0.1,
80
+ ),
81
+ keypoint_head=dict(
82
+ type='TopdownHeatmapSimpleHead',
83
+ in_channels=384,
84
+ num_deconv_layers=2,
85
+ num_deconv_filters=(256, 256),
86
+ num_deconv_kernels=(4, 4),
87
+ extra=dict(final_conv_kernel=1, ),
88
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
89
+ train_cfg=dict(),
90
+ test_cfg=dict(
91
+ flip_test=True,
92
+ post_process='default',
93
+ shift_heatmap=False,
94
+ target_type=target_type,
95
+ modulate_kernel=11,
96
+ use_udp=True))
97
+
98
+ model_base = dict(
99
+ type='TopDown',
100
+ pretrained=None,
101
+ backbone=dict(
102
+ type='ViT',
103
+ img_size=(256, 192),
104
+ patch_size=16,
105
+ embed_dim=768,
106
+ depth=12,
107
+ num_heads=12,
108
+ ratio=1,
109
+ use_checkpoint=False,
110
+ mlp_ratio=4,
111
+ qkv_bias=True,
112
+ drop_path_rate=0.3,
113
+ ),
114
+ keypoint_head=dict(
115
+ type='TopdownHeatmapSimpleHead',
116
+ in_channels=768,
117
+ num_deconv_layers=2,
118
+ num_deconv_filters=(256, 256),
119
+ num_deconv_kernels=(4, 4),
120
+ extra=dict(final_conv_kernel=1, ),
121
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
122
+ train_cfg=dict(),
123
+ test_cfg=dict(
124
+ flip_test=True,
125
+ post_process='default',
126
+ shift_heatmap=False,
127
+ target_type=target_type,
128
+ modulate_kernel=11,
129
+ use_udp=True))
130
+
131
+ model_large = dict(
132
+ type='TopDown',
133
+ pretrained=None,
134
+ backbone=dict(
135
+ type='ViT',
136
+ img_size=(256, 192),
137
+ patch_size=16,
138
+ embed_dim=1024,
139
+ depth=24,
140
+ num_heads=16,
141
+ ratio=1,
142
+ use_checkpoint=False,
143
+ mlp_ratio=4,
144
+ qkv_bias=True,
145
+ drop_path_rate=0.5,
146
+ ),
147
+ keypoint_head=dict(
148
+ type='TopdownHeatmapSimpleHead',
149
+ in_channels=1024,
150
+ num_deconv_layers=2,
151
+ num_deconv_filters=(256, 256),
152
+ num_deconv_kernels=(4, 4),
153
+ extra=dict(final_conv_kernel=1, ),
154
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
155
+ train_cfg=dict(),
156
+ test_cfg=dict(
157
+ flip_test=True,
158
+ post_process='default',
159
+ shift_heatmap=False,
160
+ target_type=target_type,
161
+ modulate_kernel=11,
162
+ use_udp=True))
163
+
164
+ model_huge = dict(
165
+ type='TopDown',
166
+ pretrained=None,
167
+ backbone=dict(
168
+ type='ViT',
169
+ img_size=(256, 192),
170
+ patch_size=16,
171
+ embed_dim=1280,
172
+ depth=32,
173
+ num_heads=16,
174
+ ratio=1,
175
+ use_checkpoint=False,
176
+ mlp_ratio=4,
177
+ qkv_bias=True,
178
+ drop_path_rate=0.55,
179
+ ),
180
+ keypoint_head=dict(
181
+ type='TopdownHeatmapSimpleHead',
182
+ in_channels=1280,
183
+ num_deconv_layers=2,
184
+ num_deconv_filters=(256, 256),
185
+ num_deconv_kernels=(4, 4),
186
+ extra=dict(final_conv_kernel=1, ),
187
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
188
+ train_cfg=dict(),
189
+ test_cfg=dict(
190
+ flip_test=True,
191
+ post_process='default',
192
+ shift_heatmap=False,
193
+ target_type=target_type,
194
+ modulate_kernel=11,
195
+ use_udp=True))
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_mpii.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=16,
6
+ dataset_joints=16,
7
+ dataset_channel=list(range(16)),
8
+ inference_channel=list(range(16)))
9
+
10
+ # Set models channels
11
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
12
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
13
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
14
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
15
+
16
+ names = ['small', 'base', 'large', 'huge']
17
+ for name in names:
18
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_small_coco_256x192.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _base_ = [
2
+ '../../../../_base_/default_runtime.py',
3
+ '../../../../_base_/datasets/coco.py'
4
+ ]
5
+ evaluation = dict(interval=10, metric='mAP', save_best='AP')
6
+
7
+ optimizer = dict(type='AdamW', lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
8
+ constructor='LayerDecayOptimizerConstructor',
9
+ paramwise_cfg=dict(
10
+ num_layers=12,
11
+ layer_decay_rate=0.8,
12
+ custom_keys={
13
+ 'bias': dict(decay_multi=0.),
14
+ 'pos_embed': dict(decay_mult=0.),
15
+ 'relative_position_bias_table': dict(decay_mult=0.),
16
+ 'norm': dict(decay_mult=0.)
17
+ }
18
+ )
19
+ )
20
+
21
+ optimizer_config = dict(grad_clip=dict(max_norm=1., norm_type=2))
22
+
23
+ # learning policy
24
+ lr_config = dict(
25
+ policy='step',
26
+ warmup='linear',
27
+ warmup_iters=500,
28
+ warmup_ratio=0.001,
29
+ step=[170, 200])
30
+ total_epochs = 210
31
+ target_type = 'GaussianHeatmap'
32
+ channel_cfg = dict(
33
+ num_output_channels=17,
34
+ dataset_joints=17,
35
+ dataset_channel=[
36
+ [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
37
+ ],
38
+ inference_channel=[
39
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
40
+ ])
41
+
42
+ # model settings
43
+ model = dict(
44
+ type='TopDown',
45
+ pretrained=None,
46
+ backbone=dict(
47
+ type='ViT',
48
+ img_size=(256, 192),
49
+ patch_size=16,
50
+ embed_dim=384,
51
+ depth=12,
52
+ num_heads=12,
53
+ ratio=1,
54
+ use_checkpoint=False,
55
+ mlp_ratio=4,
56
+ qkv_bias=True,
57
+ drop_path_rate=0.1,
58
+ ),
59
+ keypoint_head=dict(
60
+ type='TopdownHeatmapSimpleHead',
61
+ in_channels=384,
62
+ num_deconv_layers=2,
63
+ num_deconv_filters=(256, 256),
64
+ num_deconv_kernels=(4, 4),
65
+ extra=dict(final_conv_kernel=1, ),
66
+ out_channels=channel_cfg['num_output_channels'],
67
+ loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
68
+ train_cfg=dict(),
69
+ test_cfg=dict(
70
+ flip_test=True,
71
+ post_process='default',
72
+ shift_heatmap=False,
73
+ target_type=target_type,
74
+ modulate_kernel=11,
75
+ use_udp=True))
76
+
77
+ data_cfg = dict(
78
+ image_size=[192, 256],
79
+ heatmap_size=[48, 64],
80
+ num_output_channels=channel_cfg['num_output_channels'],
81
+ num_joints=channel_cfg['dataset_joints'],
82
+ dataset_channel=channel_cfg['dataset_channel'],
83
+ inference_channel=channel_cfg['inference_channel'],
84
+ soft_nms=False,
85
+ nms_thr=1.0,
86
+ oks_thr=0.9,
87
+ vis_thr=0.9,
88
+ use_gt_bbox=False,
89
+ det_bbox_thr=0.0,
90
+ bbox_file='data/coco/person_detection_results/'
91
+ 'COCO_val2017_detections_AP_H_56_person.json',
92
+ )
93
+
94
+ train_pipeline = [
95
+ dict(type='LoadImageFromFile'),
96
+ dict(type='TopDownRandomFlip', flip_prob=0.5),
97
+ dict(
98
+ type='TopDownHalfBodyTransform',
99
+ num_joints_half_body=8,
100
+ prob_half_body=0.3),
101
+ dict(
102
+ type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
103
+ dict(type='TopDownAffine', use_udp=True),
104
+ dict(type='ToTensor'),
105
+ dict(
106
+ type='NormalizeTensor',
107
+ mean=[0.485, 0.456, 0.406],
108
+ std=[0.229, 0.224, 0.225]),
109
+ dict(
110
+ type='TopDownGenerateTarget',
111
+ sigma=2,
112
+ encoding='UDP',
113
+ target_type=target_type),
114
+ dict(
115
+ type='Collect',
116
+ keys=['img', 'target', 'target_weight'],
117
+ meta_keys=[
118
+ 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
119
+ 'rotation', 'bbox_score', 'flip_pairs'
120
+ ]),
121
+ ]
122
+
123
+ val_pipeline = [
124
+ dict(type='LoadImageFromFile'),
125
+ dict(type='TopDownAffine', use_udp=True),
126
+ dict(type='ToTensor'),
127
+ dict(
128
+ type='NormalizeTensor',
129
+ mean=[0.485, 0.456, 0.406],
130
+ std=[0.229, 0.224, 0.225]),
131
+ dict(
132
+ type='Collect',
133
+ keys=['img'],
134
+ meta_keys=[
135
+ 'image_file', 'center', 'scale', 'rotation', 'bbox_score',
136
+ 'flip_pairs'
137
+ ]),
138
+ ]
139
+
140
+ test_pipeline = val_pipeline
141
+
142
+ data_root = r'D:\ViTPose\Evaluating'
143
+ data = dict(
144
+ samples_per_gpu=4,
145
+ workers_per_gpu=4,
146
+ val_dataloader=dict(samples_per_gpu=4),
147
+ test_dataloader=dict(samples_per_gpu=4),
148
+ train=dict(
149
+ type='TopDownCocoDataset',
150
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
151
+ img_prefix=f'{data_root}/train2017/',
152
+ data_cfg=data_cfg,
153
+ pipeline=train_pipeline,
154
+ # dataset_info={{_base_.dataset_info}}
155
+ ),
156
+ val=dict(
157
+ type='TopDownCocoDataset',
158
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
159
+ img_prefix=f'{data_root}/val2017/',
160
+ data_cfg=data_cfg,
161
+ pipeline=val_pipeline,
162
+ # dataset_info={{_base_.dataset_info}}
163
+ ),
164
+ test=dict(
165
+ type='TopDownCocoDataset',
166
+ ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
167
+ img_prefix=f'{data_root}/val2017/',
168
+ data_cfg=data_cfg,
169
+ pipeline=test_pipeline,
170
+ #dataset_info={{_base_.dataset_info}}
171
+ ),
172
+ )
173
+
ViTPose/easy_ViTPose/easy_ViTPose/configs/ViTPose_wholebody.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ViTPose_common import *
2
+
3
+ # Channel configuration
4
+ channel_cfg = dict(
5
+ num_output_channels=133,
6
+ dataset_joints=133,
7
+ dataset_channel=[
8
+ list(range(133)),
9
+ ],
10
+ inference_channel=list(range(133)))
11
+
12
+ # Set models channels
13
+ data_cfg['num_output_channels'] = channel_cfg['num_output_channels']
14
+ data_cfg['num_joints']= channel_cfg['dataset_joints']
15
+ data_cfg['dataset_channel']= channel_cfg['dataset_channel']
16
+ data_cfg['inference_channel']= channel_cfg['inference_channel']
17
+
18
+ names = ['small', 'base', 'large', 'huge']
19
+ for name in names:
20
+ globals()[f'model_{name}']['keypoint_head']['out_channels'] = channel_cfg['num_output_channels']
ViTPose/easy_ViTPose/easy_ViTPose/configs/__init__.py ADDED
File without changes
ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/ViTPose_coco_25.cpython-39.pyc ADDED
Binary file (697 Bytes). View file
 
ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/ViTPose_common.cpython-39.pyc ADDED
Binary file (2.88 kB). View file
 
ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/ViTPose_small_coco_256x192.cpython-39.pyc ADDED
Binary file (3.69 kB). View file
 
ViTPose/easy_ViTPose/easy_ViTPose/configs/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (158 Bytes). View file
 
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/300w.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_info = dict(
2
+ dataset_name='300w',
3
+ paper_info=dict(
4
+ author='Sagonas, Christos and Antonakos, Epameinondas '
5
+ 'and Tzimiropoulos, Georgios and Zafeiriou, Stefanos '
6
+ 'and Pantic, Maja',
7
+ title='300 faces in-the-wild challenge: '
8
+ 'Database and results',
9
+ container='Image and vision computing',
10
+ year='2016',
11
+ homepage='https://ibug.doc.ic.ac.uk/resources/300-W/',
12
+ ),
13
+ keypoint_info={
14
+ 0:
15
+ dict(
16
+ name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-16'),
17
+ 1:
18
+ dict(
19
+ name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-15'),
20
+ 2:
21
+ dict(
22
+ name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-14'),
23
+ 3:
24
+ dict(
25
+ name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-13'),
26
+ 4:
27
+ dict(
28
+ name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-12'),
29
+ 5:
30
+ dict(
31
+ name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-11'),
32
+ 6:
33
+ dict(
34
+ name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-10'),
35
+ 7:
36
+ dict(name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-9'),
37
+ 8:
38
+ dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap=''),
39
+ 9:
40
+ dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-7'),
41
+ 10:
42
+ dict(
43
+ name='kpt-10', id=10, color=[255, 255, 255], type='',
44
+ swap='kpt-6'),
45
+ 11:
46
+ dict(
47
+ name='kpt-11', id=11, color=[255, 255, 255], type='',
48
+ swap='kpt-5'),
49
+ 12:
50
+ dict(
51
+ name='kpt-12', id=12, color=[255, 255, 255], type='',
52
+ swap='kpt-4'),
53
+ 13:
54
+ dict(
55
+ name='kpt-13', id=13, color=[255, 255, 255], type='',
56
+ swap='kpt-3'),
57
+ 14:
58
+ dict(
59
+ name='kpt-14', id=14, color=[255, 255, 255], type='',
60
+ swap='kpt-2'),
61
+ 15:
62
+ dict(
63
+ name='kpt-15', id=15, color=[255, 255, 255], type='',
64
+ swap='kpt-1'),
65
+ 16:
66
+ dict(
67
+ name='kpt-16', id=16, color=[255, 255, 255], type='',
68
+ swap='kpt-0'),
69
+ 17:
70
+ dict(
71
+ name='kpt-17',
72
+ id=17,
73
+ color=[255, 255, 255],
74
+ type='',
75
+ swap='kpt-26'),
76
+ 18:
77
+ dict(
78
+ name='kpt-18',
79
+ id=18,
80
+ color=[255, 255, 255],
81
+ type='',
82
+ swap='kpt-25'),
83
+ 19:
84
+ dict(
85
+ name='kpt-19',
86
+ id=19,
87
+ color=[255, 255, 255],
88
+ type='',
89
+ swap='kpt-24'),
90
+ 20:
91
+ dict(
92
+ name='kpt-20',
93
+ id=20,
94
+ color=[255, 255, 255],
95
+ type='',
96
+ swap='kpt-23'),
97
+ 21:
98
+ dict(
99
+ name='kpt-21',
100
+ id=21,
101
+ color=[255, 255, 255],
102
+ type='',
103
+ swap='kpt-22'),
104
+ 22:
105
+ dict(
106
+ name='kpt-22',
107
+ id=22,
108
+ color=[255, 255, 255],
109
+ type='',
110
+ swap='kpt-21'),
111
+ 23:
112
+ dict(
113
+ name='kpt-23',
114
+ id=23,
115
+ color=[255, 255, 255],
116
+ type='',
117
+ swap='kpt-20'),
118
+ 24:
119
+ dict(
120
+ name='kpt-24',
121
+ id=24,
122
+ color=[255, 255, 255],
123
+ type='',
124
+ swap='kpt-19'),
125
+ 25:
126
+ dict(
127
+ name='kpt-25',
128
+ id=25,
129
+ color=[255, 255, 255],
130
+ type='',
131
+ swap='kpt-18'),
132
+ 26:
133
+ dict(
134
+ name='kpt-26',
135
+ id=26,
136
+ color=[255, 255, 255],
137
+ type='',
138
+ swap='kpt-17'),
139
+ 27:
140
+ dict(name='kpt-27', id=27, color=[255, 255, 255], type='', swap=''),
141
+ 28:
142
+ dict(name='kpt-28', id=28, color=[255, 255, 255], type='', swap=''),
143
+ 29:
144
+ dict(name='kpt-29', id=29, color=[255, 255, 255], type='', swap=''),
145
+ 30:
146
+ dict(name='kpt-30', id=30, color=[255, 255, 255], type='', swap=''),
147
+ 31:
148
+ dict(
149
+ name='kpt-31',
150
+ id=31,
151
+ color=[255, 255, 255],
152
+ type='',
153
+ swap='kpt-35'),
154
+ 32:
155
+ dict(
156
+ name='kpt-32',
157
+ id=32,
158
+ color=[255, 255, 255],
159
+ type='',
160
+ swap='kpt-34'),
161
+ 33:
162
+ dict(name='kpt-33', id=33, color=[255, 255, 255], type='', swap=''),
163
+ 34:
164
+ dict(
165
+ name='kpt-34',
166
+ id=34,
167
+ color=[255, 255, 255],
168
+ type='',
169
+ swap='kpt-32'),
170
+ 35:
171
+ dict(
172
+ name='kpt-35',
173
+ id=35,
174
+ color=[255, 255, 255],
175
+ type='',
176
+ swap='kpt-31'),
177
+ 36:
178
+ dict(
179
+ name='kpt-36',
180
+ id=36,
181
+ color=[255, 255, 255],
182
+ type='',
183
+ swap='kpt-45'),
184
+ 37:
185
+ dict(
186
+ name='kpt-37',
187
+ id=37,
188
+ color=[255, 255, 255],
189
+ type='',
190
+ swap='kpt-44'),
191
+ 38:
192
+ dict(
193
+ name='kpt-38',
194
+ id=38,
195
+ color=[255, 255, 255],
196
+ type='',
197
+ swap='kpt-43'),
198
+ 39:
199
+ dict(
200
+ name='kpt-39',
201
+ id=39,
202
+ color=[255, 255, 255],
203
+ type='',
204
+ swap='kpt-42'),
205
+ 40:
206
+ dict(
207
+ name='kpt-40',
208
+ id=40,
209
+ color=[255, 255, 255],
210
+ type='',
211
+ swap='kpt-47'),
212
+ 41:
213
+ dict(
214
+ name='kpt-41',
215
+ id=41,
216
+ color=[255, 255, 255],
217
+ type='',
218
+ swap='kpt-46'),
219
+ 42:
220
+ dict(
221
+ name='kpt-42',
222
+ id=42,
223
+ color=[255, 255, 255],
224
+ type='',
225
+ swap='kpt-39'),
226
+ 43:
227
+ dict(
228
+ name='kpt-43',
229
+ id=43,
230
+ color=[255, 255, 255],
231
+ type='',
232
+ swap='kpt-38'),
233
+ 44:
234
+ dict(
235
+ name='kpt-44',
236
+ id=44,
237
+ color=[255, 255, 255],
238
+ type='',
239
+ swap='kpt-37'),
240
+ 45:
241
+ dict(
242
+ name='kpt-45',
243
+ id=45,
244
+ color=[255, 255, 255],
245
+ type='',
246
+ swap='kpt-36'),
247
+ 46:
248
+ dict(
249
+ name='kpt-46',
250
+ id=46,
251
+ color=[255, 255, 255],
252
+ type='',
253
+ swap='kpt-41'),
254
+ 47:
255
+ dict(
256
+ name='kpt-47',
257
+ id=47,
258
+ color=[255, 255, 255],
259
+ type='',
260
+ swap='kpt-40'),
261
+ 48:
262
+ dict(
263
+ name='kpt-48',
264
+ id=48,
265
+ color=[255, 255, 255],
266
+ type='',
267
+ swap='kpt-54'),
268
+ 49:
269
+ dict(
270
+ name='kpt-49',
271
+ id=49,
272
+ color=[255, 255, 255],
273
+ type='',
274
+ swap='kpt-53'),
275
+ 50:
276
+ dict(
277
+ name='kpt-50',
278
+ id=50,
279
+ color=[255, 255, 255],
280
+ type='',
281
+ swap='kpt-52'),
282
+ 51:
283
+ dict(name='kpt-51', id=51, color=[255, 255, 255], type='', swap=''),
284
+ 52:
285
+ dict(
286
+ name='kpt-52',
287
+ id=52,
288
+ color=[255, 255, 255],
289
+ type='',
290
+ swap='kpt-50'),
291
+ 53:
292
+ dict(
293
+ name='kpt-53',
294
+ id=53,
295
+ color=[255, 255, 255],
296
+ type='',
297
+ swap='kpt-49'),
298
+ 54:
299
+ dict(
300
+ name='kpt-54',
301
+ id=54,
302
+ color=[255, 255, 255],
303
+ type='',
304
+ swap='kpt-48'),
305
+ 55:
306
+ dict(
307
+ name='kpt-55',
308
+ id=55,
309
+ color=[255, 255, 255],
310
+ type='',
311
+ swap='kpt-59'),
312
+ 56:
313
+ dict(
314
+ name='kpt-56',
315
+ id=56,
316
+ color=[255, 255, 255],
317
+ type='',
318
+ swap='kpt-58'),
319
+ 57:
320
+ dict(name='kpt-57', id=57, color=[255, 255, 255], type='', swap=''),
321
+ 58:
322
+ dict(
323
+ name='kpt-58',
324
+ id=58,
325
+ color=[255, 255, 255],
326
+ type='',
327
+ swap='kpt-56'),
328
+ 59:
329
+ dict(
330
+ name='kpt-59',
331
+ id=59,
332
+ color=[255, 255, 255],
333
+ type='',
334
+ swap='kpt-55'),
335
+ 60:
336
+ dict(
337
+ name='kpt-60',
338
+ id=60,
339
+ color=[255, 255, 255],
340
+ type='',
341
+ swap='kpt-64'),
342
+ 61:
343
+ dict(
344
+ name='kpt-61',
345
+ id=61,
346
+ color=[255, 255, 255],
347
+ type='',
348
+ swap='kpt-63'),
349
+ 62:
350
+ dict(name='kpt-62', id=62, color=[255, 255, 255], type='', swap=''),
351
+ 63:
352
+ dict(
353
+ name='kpt-63',
354
+ id=63,
355
+ color=[255, 255, 255],
356
+ type='',
357
+ swap='kpt-61'),
358
+ 64:
359
+ dict(
360
+ name='kpt-64',
361
+ id=64,
362
+ color=[255, 255, 255],
363
+ type='',
364
+ swap='kpt-60'),
365
+ 65:
366
+ dict(
367
+ name='kpt-65',
368
+ id=65,
369
+ color=[255, 255, 255],
370
+ type='',
371
+ swap='kpt-67'),
372
+ 66:
373
+ dict(name='kpt-66', id=66, color=[255, 255, 255], type='', swap=''),
374
+ 67:
375
+ dict(
376
+ name='kpt-67',
377
+ id=67,
378
+ color=[255, 255, 255],
379
+ type='',
380
+ swap='kpt-65'),
381
+ },
382
+ skeleton_info={},
383
+ joint_weights=[1.] * 68,
384
+ sigmas=[])
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/aflw.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_info = dict(
2
+ dataset_name='aflw',
3
+ paper_info=dict(
4
+ author='Koestinger, Martin and Wohlhart, Paul and '
5
+ 'Roth, Peter M and Bischof, Horst',
6
+ title='Annotated facial landmarks in the wild: '
7
+ 'A large-scale, real-world database for facial '
8
+ 'landmark localization',
9
+ container='2011 IEEE international conference on computer '
10
+ 'vision workshops (ICCV workshops)',
11
+ year='2011',
12
+ homepage='https://www.tugraz.at/institute/icg/research/'
13
+ 'team-bischof/lrs/downloads/aflw/',
14
+ ),
15
+ keypoint_info={
16
+ 0:
17
+ dict(name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-5'),
18
+ 1:
19
+ dict(name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-4'),
20
+ 2:
21
+ dict(name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-3'),
22
+ 3:
23
+ dict(name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-2'),
24
+ 4:
25
+ dict(name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-1'),
26
+ 5:
27
+ dict(name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-0'),
28
+ 6:
29
+ dict(
30
+ name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-11'),
31
+ 7:
32
+ dict(
33
+ name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-10'),
34
+ 8:
35
+ dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-9'),
36
+ 9:
37
+ dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-8'),
38
+ 10:
39
+ dict(
40
+ name='kpt-10', id=10, color=[255, 255, 255], type='',
41
+ swap='kpt-7'),
42
+ 11:
43
+ dict(
44
+ name='kpt-11', id=11, color=[255, 255, 255], type='',
45
+ swap='kpt-6'),
46
+ 12:
47
+ dict(
48
+ name='kpt-12',
49
+ id=12,
50
+ color=[255, 255, 255],
51
+ type='',
52
+ swap='kpt-14'),
53
+ 13:
54
+ dict(name='kpt-13', id=13, color=[255, 255, 255], type='', swap=''),
55
+ 14:
56
+ dict(
57
+ name='kpt-14',
58
+ id=14,
59
+ color=[255, 255, 255],
60
+ type='',
61
+ swap='kpt-12'),
62
+ 15:
63
+ dict(
64
+ name='kpt-15',
65
+ id=15,
66
+ color=[255, 255, 255],
67
+ type='',
68
+ swap='kpt-17'),
69
+ 16:
70
+ dict(name='kpt-16', id=16, color=[255, 255, 255], type='', swap=''),
71
+ 17:
72
+ dict(
73
+ name='kpt-17',
74
+ id=17,
75
+ color=[255, 255, 255],
76
+ type='',
77
+ swap='kpt-15'),
78
+ 18:
79
+ dict(name='kpt-18', id=18, color=[255, 255, 255], type='', swap='')
80
+ },
81
+ skeleton_info={},
82
+ joint_weights=[1.] * 19,
83
+ sigmas=[])
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/aic.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_info = dict(
2
+ dataset_name='aic',
3
+ paper_info=dict(
4
+ author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
5
+ 'Li, Yixin and Yan, Baoming and Liang, Rui and '
6
+ 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
7
+ 'Fu, Yanwei and others',
8
+ title='Ai challenger: A large-scale dataset for going '
9
+ 'deeper in image understanding',
10
+ container='arXiv',
11
+ year='2017',
12
+ homepage='https://github.com/AIChallenger/AI_Challenger_2017',
13
+ ),
14
+ keypoint_info={
15
+ 0:
16
+ dict(
17
+ name='right_shoulder',
18
+ id=0,
19
+ color=[255, 128, 0],
20
+ type='upper',
21
+ swap='left_shoulder'),
22
+ 1:
23
+ dict(
24
+ name='right_elbow',
25
+ id=1,
26
+ color=[255, 128, 0],
27
+ type='upper',
28
+ swap='left_elbow'),
29
+ 2:
30
+ dict(
31
+ name='right_wrist',
32
+ id=2,
33
+ color=[255, 128, 0],
34
+ type='upper',
35
+ swap='left_wrist'),
36
+ 3:
37
+ dict(
38
+ name='left_shoulder',
39
+ id=3,
40
+ color=[0, 255, 0],
41
+ type='upper',
42
+ swap='right_shoulder'),
43
+ 4:
44
+ dict(
45
+ name='left_elbow',
46
+ id=4,
47
+ color=[0, 255, 0],
48
+ type='upper',
49
+ swap='right_elbow'),
50
+ 5:
51
+ dict(
52
+ name='left_wrist',
53
+ id=5,
54
+ color=[0, 255, 0],
55
+ type='upper',
56
+ swap='right_wrist'),
57
+ 6:
58
+ dict(
59
+ name='right_hip',
60
+ id=6,
61
+ color=[255, 128, 0],
62
+ type='lower',
63
+ swap='left_hip'),
64
+ 7:
65
+ dict(
66
+ name='right_knee',
67
+ id=7,
68
+ color=[255, 128, 0],
69
+ type='lower',
70
+ swap='left_knee'),
71
+ 8:
72
+ dict(
73
+ name='right_ankle',
74
+ id=8,
75
+ color=[255, 128, 0],
76
+ type='lower',
77
+ swap='left_ankle'),
78
+ 9:
79
+ dict(
80
+ name='left_hip',
81
+ id=9,
82
+ color=[0, 255, 0],
83
+ type='lower',
84
+ swap='right_hip'),
85
+ 10:
86
+ dict(
87
+ name='left_knee',
88
+ id=10,
89
+ color=[0, 255, 0],
90
+ type='lower',
91
+ swap='right_knee'),
92
+ 11:
93
+ dict(
94
+ name='left_ankle',
95
+ id=11,
96
+ color=[0, 255, 0],
97
+ type='lower',
98
+ swap='right_ankle'),
99
+ 12:
100
+ dict(
101
+ name='head_top',
102
+ id=12,
103
+ color=[51, 153, 255],
104
+ type='upper',
105
+ swap=''),
106
+ 13:
107
+ dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap='')
108
+ },
109
+ skeleton_info={
110
+ 0:
111
+ dict(link=('right_wrist', 'right_elbow'), id=0, color=[255, 128, 0]),
112
+ 1: dict(
113
+ link=('right_elbow', 'right_shoulder'), id=1, color=[255, 128, 0]),
114
+ 2: dict(link=('right_shoulder', 'neck'), id=2, color=[51, 153, 255]),
115
+ 3: dict(link=('neck', 'left_shoulder'), id=3, color=[51, 153, 255]),
116
+ 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
117
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
118
+ 6: dict(link=('right_ankle', 'right_knee'), id=6, color=[255, 128, 0]),
119
+ 7: dict(link=('right_knee', 'right_hip'), id=7, color=[255, 128, 0]),
120
+ 8: dict(link=('right_hip', 'left_hip'), id=8, color=[51, 153, 255]),
121
+ 9: dict(link=('left_hip', 'left_knee'), id=9, color=[0, 255, 0]),
122
+ 10: dict(link=('left_knee', 'left_ankle'), id=10, color=[0, 255, 0]),
123
+ 11: dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
124
+ 12: dict(
125
+ link=('right_shoulder', 'right_hip'), id=12, color=[51, 153, 255]),
126
+ 13:
127
+ dict(link=('left_shoulder', 'left_hip'), id=13, color=[51, 153, 255])
128
+ },
129
+ joint_weights=[
130
+ 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.
131
+ ],
132
+
133
+ # 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/'
134
+ # 'Evaluation/keypoint_eval/keypoint_eval.py#L50'
135
+ # delta = 2 x sigma
136
+ sigmas=[
137
+ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144,
138
+ 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081,
139
+ 0.01291456, 0.01236173
140
+ ])
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/aic_info.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aic_info = dict(
2
+ dataset_name='aic',
3
+ paper_info=dict(
4
+ author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
5
+ 'Li, Yixin and Yan, Baoming and Liang, Rui and '
6
+ 'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
7
+ 'Fu, Yanwei and others',
8
+ title='Ai challenger: A large-scale dataset for going '
9
+ 'deeper in image understanding',
10
+ container='arXiv',
11
+ year='2017',
12
+ homepage='https://github.com/AIChallenger/AI_Challenger_2017',
13
+ ),
14
+ keypoint_info={
15
+ 0:
16
+ dict(
17
+ name='right_shoulder',
18
+ id=0,
19
+ color=[255, 128, 0],
20
+ type='upper',
21
+ swap='left_shoulder'),
22
+ 1:
23
+ dict(
24
+ name='right_elbow',
25
+ id=1,
26
+ color=[255, 128, 0],
27
+ type='upper',
28
+ swap='left_elbow'),
29
+ 2:
30
+ dict(
31
+ name='right_wrist',
32
+ id=2,
33
+ color=[255, 128, 0],
34
+ type='upper',
35
+ swap='left_wrist'),
36
+ 3:
37
+ dict(
38
+ name='left_shoulder',
39
+ id=3,
40
+ color=[0, 255, 0],
41
+ type='upper',
42
+ swap='right_shoulder'),
43
+ 4:
44
+ dict(
45
+ name='left_elbow',
46
+ id=4,
47
+ color=[0, 255, 0],
48
+ type='upper',
49
+ swap='right_elbow'),
50
+ 5:
51
+ dict(
52
+ name='left_wrist',
53
+ id=5,
54
+ color=[0, 255, 0],
55
+ type='upper',
56
+ swap='right_wrist'),
57
+ 6:
58
+ dict(
59
+ name='right_hip',
60
+ id=6,
61
+ color=[255, 128, 0],
62
+ type='lower',
63
+ swap='left_hip'),
64
+ 7:
65
+ dict(
66
+ name='right_knee',
67
+ id=7,
68
+ color=[255, 128, 0],
69
+ type='lower',
70
+ swap='left_knee'),
71
+ 8:
72
+ dict(
73
+ name='right_ankle',
74
+ id=8,
75
+ color=[255, 128, 0],
76
+ type='lower',
77
+ swap='left_ankle'),
78
+ 9:
79
+ dict(
80
+ name='left_hip',
81
+ id=9,
82
+ color=[0, 255, 0],
83
+ type='lower',
84
+ swap='right_hip'),
85
+ 10:
86
+ dict(
87
+ name='left_knee',
88
+ id=10,
89
+ color=[0, 255, 0],
90
+ type='lower',
91
+ swap='right_knee'),
92
+ 11:
93
+ dict(
94
+ name='left_ankle',
95
+ id=11,
96
+ color=[0, 255, 0],
97
+ type='lower',
98
+ swap='right_ankle'),
99
+ 12:
100
+ dict(
101
+ name='head_top',
102
+ id=12,
103
+ color=[51, 153, 255],
104
+ type='upper',
105
+ swap=''),
106
+ 13:
107
+ dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap='')
108
+ },
109
+ skeleton_info={
110
+ 0:
111
+ dict(link=('right_wrist', 'right_elbow'), id=0, color=[255, 128, 0]),
112
+ 1: dict(
113
+ link=('right_elbow', 'right_shoulder'), id=1, color=[255, 128, 0]),
114
+ 2: dict(link=('right_shoulder', 'neck'), id=2, color=[51, 153, 255]),
115
+ 3: dict(link=('neck', 'left_shoulder'), id=3, color=[51, 153, 255]),
116
+ 4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
117
+ 5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
118
+ 6: dict(link=('right_ankle', 'right_knee'), id=6, color=[255, 128, 0]),
119
+ 7: dict(link=('right_knee', 'right_hip'), id=7, color=[255, 128, 0]),
120
+ 8: dict(link=('right_hip', 'left_hip'), id=8, color=[51, 153, 255]),
121
+ 9: dict(link=('left_hip', 'left_knee'), id=9, color=[0, 255, 0]),
122
+ 10: dict(link=('left_knee', 'left_ankle'), id=10, color=[0, 255, 0]),
123
+ 11: dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
124
+ 12: dict(
125
+ link=('right_shoulder', 'right_hip'), id=12, color=[51, 153, 255]),
126
+ 13:
127
+ dict(link=('left_shoulder', 'left_hip'), id=13, color=[51, 153, 255])
128
+ },
129
+ joint_weights=[
130
+ 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.
131
+ ],
132
+
133
+ # 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/'
134
+ # 'Evaluation/keypoint_eval/keypoint_eval.py#L50'
135
+ # delta = 2 x sigma
136
+ sigmas=[
137
+ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144,
138
+ 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081,
139
+ 0.01291456, 0.01236173
140
+ ])
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/animalpose.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_info = dict(
2
+ dataset_name='animalpose',
3
+ paper_info=dict(
4
+ author='Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and '
5
+ 'Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing',
6
+ title='Cross-Domain Adaptation for Animal Pose Estimation',
7
+ container='The IEEE International Conference on '
8
+ 'Computer Vision (ICCV)',
9
+ year='2019',
10
+ homepage='https://sites.google.com/view/animal-pose/',
11
+ ),
12
+ keypoint_info={
13
+ 0:
14
+ dict(
15
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
16
+ 1:
17
+ dict(
18
+ name='R_Eye',
19
+ id=1,
20
+ color=[255, 128, 0],
21
+ type='upper',
22
+ swap='L_Eye'),
23
+ 2:
24
+ dict(
25
+ name='L_EarBase',
26
+ id=2,
27
+ color=[0, 255, 0],
28
+ type='upper',
29
+ swap='R_EarBase'),
30
+ 3:
31
+ dict(
32
+ name='R_EarBase',
33
+ id=3,
34
+ color=[255, 128, 0],
35
+ type='upper',
36
+ swap='L_EarBase'),
37
+ 4:
38
+ dict(name='Nose', id=4, color=[51, 153, 255], type='upper', swap=''),
39
+ 5:
40
+ dict(name='Throat', id=5, color=[51, 153, 255], type='upper', swap=''),
41
+ 6:
42
+ dict(
43
+ name='TailBase', id=6, color=[51, 153, 255], type='lower',
44
+ swap=''),
45
+ 7:
46
+ dict(
47
+ name='Withers', id=7, color=[51, 153, 255], type='upper', swap=''),
48
+ 8:
49
+ dict(
50
+ name='L_F_Elbow',
51
+ id=8,
52
+ color=[0, 255, 0],
53
+ type='upper',
54
+ swap='R_F_Elbow'),
55
+ 9:
56
+ dict(
57
+ name='R_F_Elbow',
58
+ id=9,
59
+ color=[255, 128, 0],
60
+ type='upper',
61
+ swap='L_F_Elbow'),
62
+ 10:
63
+ dict(
64
+ name='L_B_Elbow',
65
+ id=10,
66
+ color=[0, 255, 0],
67
+ type='lower',
68
+ swap='R_B_Elbow'),
69
+ 11:
70
+ dict(
71
+ name='R_B_Elbow',
72
+ id=11,
73
+ color=[255, 128, 0],
74
+ type='lower',
75
+ swap='L_B_Elbow'),
76
+ 12:
77
+ dict(
78
+ name='L_F_Knee',
79
+ id=12,
80
+ color=[0, 255, 0],
81
+ type='upper',
82
+ swap='R_F_Knee'),
83
+ 13:
84
+ dict(
85
+ name='R_F_Knee',
86
+ id=13,
87
+ color=[255, 128, 0],
88
+ type='upper',
89
+ swap='L_F_Knee'),
90
+ 14:
91
+ dict(
92
+ name='L_B_Knee',
93
+ id=14,
94
+ color=[0, 255, 0],
95
+ type='lower',
96
+ swap='R_B_Knee'),
97
+ 15:
98
+ dict(
99
+ name='R_B_Knee',
100
+ id=15,
101
+ color=[255, 128, 0],
102
+ type='lower',
103
+ swap='L_B_Knee'),
104
+ 16:
105
+ dict(
106
+ name='L_F_Paw',
107
+ id=16,
108
+ color=[0, 255, 0],
109
+ type='upper',
110
+ swap='R_F_Paw'),
111
+ 17:
112
+ dict(
113
+ name='R_F_Paw',
114
+ id=17,
115
+ color=[255, 128, 0],
116
+ type='upper',
117
+ swap='L_F_Paw'),
118
+ 18:
119
+ dict(
120
+ name='L_B_Paw',
121
+ id=18,
122
+ color=[0, 255, 0],
123
+ type='lower',
124
+ swap='R_B_Paw'),
125
+ 19:
126
+ dict(
127
+ name='R_B_Paw',
128
+ id=19,
129
+ color=[255, 128, 0],
130
+ type='lower',
131
+ swap='L_B_Paw')
132
+ },
133
+ skeleton_info={
134
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[51, 153, 255]),
135
+ 1: dict(link=('L_Eye', 'L_EarBase'), id=1, color=[0, 255, 0]),
136
+ 2: dict(link=('R_Eye', 'R_EarBase'), id=2, color=[255, 128, 0]),
137
+ 3: dict(link=('L_Eye', 'Nose'), id=3, color=[0, 255, 0]),
138
+ 4: dict(link=('R_Eye', 'Nose'), id=4, color=[255, 128, 0]),
139
+ 5: dict(link=('Nose', 'Throat'), id=5, color=[51, 153, 255]),
140
+ 6: dict(link=('Throat', 'Withers'), id=6, color=[51, 153, 255]),
141
+ 7: dict(link=('TailBase', 'Withers'), id=7, color=[51, 153, 255]),
142
+ 8: dict(link=('Throat', 'L_F_Elbow'), id=8, color=[0, 255, 0]),
143
+ 9: dict(link=('L_F_Elbow', 'L_F_Knee'), id=9, color=[0, 255, 0]),
144
+ 10: dict(link=('L_F_Knee', 'L_F_Paw'), id=10, color=[0, 255, 0]),
145
+ 11: dict(link=('Throat', 'R_F_Elbow'), id=11, color=[255, 128, 0]),
146
+ 12: dict(link=('R_F_Elbow', 'R_F_Knee'), id=12, color=[255, 128, 0]),
147
+ 13: dict(link=('R_F_Knee', 'R_F_Paw'), id=13, color=[255, 128, 0]),
148
+ 14: dict(link=('TailBase', 'L_B_Elbow'), id=14, color=[0, 255, 0]),
149
+ 15: dict(link=('L_B_Elbow', 'L_B_Knee'), id=15, color=[0, 255, 0]),
150
+ 16: dict(link=('L_B_Knee', 'L_B_Paw'), id=16, color=[0, 255, 0]),
151
+ 17: dict(link=('TailBase', 'R_B_Elbow'), id=17, color=[255, 128, 0]),
152
+ 18: dict(link=('R_B_Elbow', 'R_B_Knee'), id=18, color=[255, 128, 0]),
153
+ 19: dict(link=('R_B_Knee', 'R_B_Paw'), id=19, color=[255, 128, 0])
154
+ },
155
+ joint_weights=[
156
+ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2,
157
+ 1.5, 1.5, 1.5, 1.5
158
+ ],
159
+
160
+ # Note: The original paper did not provide enough information about
161
+ # the sigmas. We modified from 'https://github.com/cocodataset/'
162
+ # 'cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L523'
163
+ sigmas=[
164
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.10, 0.10, 0.10, 0.107, 0.107,
165
+ 0.107, 0.107, 0.087, 0.087, 0.087, 0.087, 0.089, 0.089, 0.089, 0.089
166
+ ])
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/ap10k.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_info = dict(
2
+ dataset_name='ap10k',
3
+ paper_info=dict(
4
+ author='Yu, Hang and Xu, Yufei and Zhang, Jing and '
5
+ 'Zhao, Wei and Guan, Ziyu and Tao, Dacheng',
6
+ title='AP-10K: A Benchmark for Animal Pose Estimation in the Wild',
7
+ container='35th Conference on Neural Information Processing Systems '
8
+ '(NeurIPS 2021) Track on Datasets and Bench-marks.',
9
+ year='2021',
10
+ homepage='https://github.com/AlexTheBad/AP-10K',
11
+ ),
12
+ keypoint_info={
13
+ 0:
14
+ dict(
15
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
16
+ 1:
17
+ dict(
18
+ name='R_Eye',
19
+ id=1,
20
+ color=[255, 128, 0],
21
+ type='upper',
22
+ swap='L_Eye'),
23
+ 2:
24
+ dict(name='Nose', id=2, color=[51, 153, 255], type='upper', swap=''),
25
+ 3:
26
+ dict(name='Neck', id=3, color=[51, 153, 255], type='upper', swap=''),
27
+ 4:
28
+ dict(
29
+ name='Root of tail',
30
+ id=4,
31
+ color=[51, 153, 255],
32
+ type='lower',
33
+ swap=''),
34
+ 5:
35
+ dict(
36
+ name='L_Shoulder',
37
+ id=5,
38
+ color=[51, 153, 255],
39
+ type='upper',
40
+ swap='R_Shoulder'),
41
+ 6:
42
+ dict(
43
+ name='L_Elbow',
44
+ id=6,
45
+ color=[51, 153, 255],
46
+ type='upper',
47
+ swap='R_Elbow'),
48
+ 7:
49
+ dict(
50
+ name='L_F_Paw',
51
+ id=7,
52
+ color=[0, 255, 0],
53
+ type='upper',
54
+ swap='R_F_Paw'),
55
+ 8:
56
+ dict(
57
+ name='R_Shoulder',
58
+ id=8,
59
+ color=[0, 255, 0],
60
+ type='upper',
61
+ swap='L_Shoulder'),
62
+ 9:
63
+ dict(
64
+ name='R_Elbow',
65
+ id=9,
66
+ color=[255, 128, 0],
67
+ type='upper',
68
+ swap='L_Elbow'),
69
+ 10:
70
+ dict(
71
+ name='R_F_Paw',
72
+ id=10,
73
+ color=[0, 255, 0],
74
+ type='lower',
75
+ swap='L_F_Paw'),
76
+ 11:
77
+ dict(
78
+ name='L_Hip',
79
+ id=11,
80
+ color=[255, 128, 0],
81
+ type='lower',
82
+ swap='R_Hip'),
83
+ 12:
84
+ dict(
85
+ name='L_Knee',
86
+ id=12,
87
+ color=[255, 128, 0],
88
+ type='lower',
89
+ swap='R_Knee'),
90
+ 13:
91
+ dict(
92
+ name='L_B_Paw',
93
+ id=13,
94
+ color=[0, 255, 0],
95
+ type='lower',
96
+ swap='R_B_Paw'),
97
+ 14:
98
+ dict(
99
+ name='R_Hip', id=14, color=[0, 255, 0], type='lower',
100
+ swap='L_Hip'),
101
+ 15:
102
+ dict(
103
+ name='R_Knee',
104
+ id=15,
105
+ color=[0, 255, 0],
106
+ type='lower',
107
+ swap='L_Knee'),
108
+ 16:
109
+ dict(
110
+ name='R_B_Paw',
111
+ id=16,
112
+ color=[0, 255, 0],
113
+ type='lower',
114
+ swap='L_B_Paw'),
115
+ },
116
+ skeleton_info={
117
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[0, 0, 255]),
118
+ 1: dict(link=('L_Eye', 'Nose'), id=1, color=[0, 0, 255]),
119
+ 2: dict(link=('R_Eye', 'Nose'), id=2, color=[0, 0, 255]),
120
+ 3: dict(link=('Nose', 'Neck'), id=3, color=[0, 255, 0]),
121
+ 4: dict(link=('Neck', 'Root of tail'), id=4, color=[0, 255, 0]),
122
+ 5: dict(link=('Neck', 'L_Shoulder'), id=5, color=[0, 255, 255]),
123
+ 6: dict(link=('L_Shoulder', 'L_Elbow'), id=6, color=[0, 255, 255]),
124
+ 7: dict(link=('L_Elbow', 'L_F_Paw'), id=6, color=[0, 255, 255]),
125
+ 8: dict(link=('Neck', 'R_Shoulder'), id=7, color=[6, 156, 250]),
126
+ 9: dict(link=('R_Shoulder', 'R_Elbow'), id=8, color=[6, 156, 250]),
127
+ 10: dict(link=('R_Elbow', 'R_F_Paw'), id=9, color=[6, 156, 250]),
128
+ 11: dict(link=('Root of tail', 'L_Hip'), id=10, color=[0, 255, 255]),
129
+ 12: dict(link=('L_Hip', 'L_Knee'), id=11, color=[0, 255, 255]),
130
+ 13: dict(link=('L_Knee', 'L_B_Paw'), id=12, color=[0, 255, 255]),
131
+ 14: dict(link=('Root of tail', 'R_Hip'), id=13, color=[6, 156, 250]),
132
+ 15: dict(link=('R_Hip', 'R_Knee'), id=14, color=[6, 156, 250]),
133
+ 16: dict(link=('R_Knee', 'R_B_Paw'), id=15, color=[6, 156, 250]),
134
+ },
135
+ joint_weights=[
136
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
137
+ 1.5
138
+ ],
139
+ sigmas=[
140
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.079, 0.072, 0.062, 0.079, 0.072,
141
+ 0.062, 0.107, 0.087, 0.089, 0.107, 0.087, 0.089
142
+ ])
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/ap10k_info.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ap10k_info = dict(
2
+ dataset_name='ap10k',
3
+ paper_info=dict(
4
+ author='Yu, Hang and Xu, Yufei and Zhang, Jing and '
5
+ 'Zhao, Wei and Guan, Ziyu and Tao, Dacheng',
6
+ title='AP-10K: A Benchmark for Animal Pose Estimation in the Wild',
7
+ container='35th Conference on Neural Information Processing Systems '
8
+ '(NeurIPS 2021) Track on Datasets and Bench-marks.',
9
+ year='2021',
10
+ homepage='https://github.com/AlexTheBad/AP-10K',
11
+ ),
12
+ keypoint_info={
13
+ 0:
14
+ dict(
15
+ name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
16
+ 1:
17
+ dict(
18
+ name='R_Eye',
19
+ id=1,
20
+ color=[255, 128, 0],
21
+ type='upper',
22
+ swap='L_Eye'),
23
+ 2:
24
+ dict(name='Nose', id=2, color=[51, 153, 255], type='upper', swap=''),
25
+ 3:
26
+ dict(name='Neck', id=3, color=[51, 153, 255], type='upper', swap=''),
27
+ 4:
28
+ dict(
29
+ name='Root of tail',
30
+ id=4,
31
+ color=[51, 153, 255],
32
+ type='lower',
33
+ swap=''),
34
+ 5:
35
+ dict(
36
+ name='L_Shoulder',
37
+ id=5,
38
+ color=[51, 153, 255],
39
+ type='upper',
40
+ swap='R_Shoulder'),
41
+ 6:
42
+ dict(
43
+ name='L_Elbow',
44
+ id=6,
45
+ color=[51, 153, 255],
46
+ type='upper',
47
+ swap='R_Elbow'),
48
+ 7:
49
+ dict(
50
+ name='L_F_Paw',
51
+ id=7,
52
+ color=[0, 255, 0],
53
+ type='upper',
54
+ swap='R_F_Paw'),
55
+ 8:
56
+ dict(
57
+ name='R_Shoulder',
58
+ id=8,
59
+ color=[0, 255, 0],
60
+ type='upper',
61
+ swap='L_Shoulder'),
62
+ 9:
63
+ dict(
64
+ name='R_Elbow',
65
+ id=9,
66
+ color=[255, 128, 0],
67
+ type='upper',
68
+ swap='L_Elbow'),
69
+ 10:
70
+ dict(
71
+ name='R_F_Paw',
72
+ id=10,
73
+ color=[0, 255, 0],
74
+ type='lower',
75
+ swap='L_F_Paw'),
76
+ 11:
77
+ dict(
78
+ name='L_Hip',
79
+ id=11,
80
+ color=[255, 128, 0],
81
+ type='lower',
82
+ swap='R_Hip'),
83
+ 12:
84
+ dict(
85
+ name='L_Knee',
86
+ id=12,
87
+ color=[255, 128, 0],
88
+ type='lower',
89
+ swap='R_Knee'),
90
+ 13:
91
+ dict(
92
+ name='L_B_Paw',
93
+ id=13,
94
+ color=[0, 255, 0],
95
+ type='lower',
96
+ swap='R_B_Paw'),
97
+ 14:
98
+ dict(
99
+ name='R_Hip', id=14, color=[0, 255, 0], type='lower',
100
+ swap='L_Hip'),
101
+ 15:
102
+ dict(
103
+ name='R_Knee',
104
+ id=15,
105
+ color=[0, 255, 0],
106
+ type='lower',
107
+ swap='L_Knee'),
108
+ 16:
109
+ dict(
110
+ name='R_B_Paw',
111
+ id=16,
112
+ color=[0, 255, 0],
113
+ type='lower',
114
+ swap='L_B_Paw'),
115
+ },
116
+ skeleton_info={
117
+ 0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[0, 0, 255]),
118
+ 1: dict(link=('L_Eye', 'Nose'), id=1, color=[0, 0, 255]),
119
+ 2: dict(link=('R_Eye', 'Nose'), id=2, color=[0, 0, 255]),
120
+ 3: dict(link=('Nose', 'Neck'), id=3, color=[0, 255, 0]),
121
+ 4: dict(link=('Neck', 'Root of tail'), id=4, color=[0, 255, 0]),
122
+ 5: dict(link=('Neck', 'L_Shoulder'), id=5, color=[0, 255, 255]),
123
+ 6: dict(link=('L_Shoulder', 'L_Elbow'), id=6, color=[0, 255, 255]),
124
+ 7: dict(link=('L_Elbow', 'L_F_Paw'), id=6, color=[0, 255, 255]),
125
+ 8: dict(link=('Neck', 'R_Shoulder'), id=7, color=[6, 156, 250]),
126
+ 9: dict(link=('R_Shoulder', 'R_Elbow'), id=8, color=[6, 156, 250]),
127
+ 10: dict(link=('R_Elbow', 'R_F_Paw'), id=9, color=[6, 156, 250]),
128
+ 11: dict(link=('Root of tail', 'L_Hip'), id=10, color=[0, 255, 255]),
129
+ 12: dict(link=('L_Hip', 'L_Knee'), id=11, color=[0, 255, 255]),
130
+ 13: dict(link=('L_Knee', 'L_B_Paw'), id=12, color=[0, 255, 255]),
131
+ 14: dict(link=('Root of tail', 'R_Hip'), id=13, color=[6, 156, 250]),
132
+ 15: dict(link=('R_Hip', 'R_Knee'), id=14, color=[6, 156, 250]),
133
+ 16: dict(link=('R_Knee', 'R_B_Paw'), id=15, color=[6, 156, 250]),
134
+ },
135
+ joint_weights=[
136
+ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
137
+ 1.5
138
+ ],
139
+ sigmas=[
140
+ 0.025, 0.025, 0.026, 0.035, 0.035, 0.079, 0.072, 0.062, 0.079, 0.072,
141
+ 0.062, 0.107, 0.087, 0.089, 0.107, 0.087, 0.089
142
+ ])
ViTPose/easy_ViTPose/easy_ViTPose/configs/_base_/datasets/atrw.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_info = dict(
2
+ dataset_name='atrw',
3
+ paper_info=dict(
4
+ author='Li, Shuyuan and Li, Jianguo and Tang, Hanlin '
5
+ 'and Qian, Rui and Lin, Weiyao',
6
+ title='ATRW: A Benchmark for Amur Tiger '
7
+ 'Re-identification in the Wild',
8
+ container='Proceedings of the 28th ACM '
9
+ 'International Conference on Multimedia',
10
+ year='2020',
11
+ homepage='https://cvwc2019.github.io/challenge.html',
12
+ ),
13
+ keypoint_info={
14
+ 0:
15
+ dict(
16
+ name='left_ear',
17
+ id=0,
18
+ color=[51, 153, 255],
19
+ type='upper',
20
+ swap='right_ear'),
21
+ 1:
22
+ dict(
23
+ name='right_ear',
24
+ id=1,
25
+ color=[51, 153, 255],
26
+ type='upper',
27
+ swap='left_ear'),
28
+ 2:
29
+ dict(name='nose', id=2, color=[51, 153, 255], type='upper', swap=''),
30
+ 3:
31
+ dict(
32
+ name='right_shoulder',
33
+ id=3,
34
+ color=[255, 128, 0],
35
+ type='upper',
36
+ swap='left_shoulder'),
37
+ 4:
38
+ dict(
39
+ name='right_front_paw',
40
+ id=4,
41
+ color=[255, 128, 0],
42
+ type='upper',
43
+ swap='left_front_paw'),
44
+ 5:
45
+ dict(
46
+ name='left_shoulder',
47
+ id=5,
48
+ color=[0, 255, 0],
49
+ type='upper',
50
+ swap='right_shoulder'),
51
+ 6:
52
+ dict(
53
+ name='left_front_paw',
54
+ id=6,
55
+ color=[0, 255, 0],
56
+ type='upper',
57
+ swap='right_front_paw'),
58
+ 7:
59
+ dict(
60
+ name='right_hip',
61
+ id=7,
62
+ color=[255, 128, 0],
63
+ type='lower',
64
+ swap='left_hip'),
65
+ 8:
66
+ dict(
67
+ name='right_knee',
68
+ id=8,
69
+ color=[255, 128, 0],
70
+ type='lower',
71
+ swap='left_knee'),
72
+ 9:
73
+ dict(
74
+ name='right_back_paw',
75
+ id=9,
76
+ color=[255, 128, 0],
77
+ type='lower',
78
+ swap='left_back_paw'),
79
+ 10:
80
+ dict(
81
+ name='left_hip',
82
+ id=10,
83
+ color=[0, 255, 0],
84
+ type='lower',
85
+ swap='right_hip'),
86
+ 11:
87
+ dict(
88
+ name='left_knee',
89
+ id=11,
90
+ color=[0, 255, 0],
91
+ type='lower',
92
+ swap='right_knee'),
93
+ 12:
94
+ dict(
95
+ name='left_back_paw',
96
+ id=12,
97
+ color=[0, 255, 0],
98
+ type='lower',
99
+ swap='right_back_paw'),
100
+ 13:
101
+ dict(name='tail', id=13, color=[51, 153, 255], type='lower', swap=''),
102
+ 14:
103
+ dict(
104
+ name='center', id=14, color=[51, 153, 255], type='lower', swap=''),
105
+ },
106
+ skeleton_info={
107
+ 0:
108
+ dict(link=('left_ear', 'nose'), id=0, color=[51, 153, 255]),
109
+ 1:
110
+ dict(link=('right_ear', 'nose'), id=1, color=[51, 153, 255]),
111
+ 2:
112
+ dict(link=('nose', 'center'), id=2, color=[51, 153, 255]),
113
+ 3:
114
+ dict(
115
+ link=('left_shoulder', 'left_front_paw'), id=3, color=[0, 255, 0]),
116
+ 4:
117
+ dict(link=('left_shoulder', 'center'), id=4, color=[0, 255, 0]),
118
+ 5:
119
+ dict(
120
+ link=('right_shoulder', 'right_front_paw'),
121
+ id=5,
122
+ color=[255, 128, 0]),
123
+ 6:
124
+ dict(link=('right_shoulder', 'center'), id=6, color=[255, 128, 0]),
125
+ 7:
126
+ dict(link=('tail', 'center'), id=7, color=[51, 153, 255]),
127
+ 8:
128
+ dict(link=('right_back_paw', 'right_knee'), id=8, color=[255, 128, 0]),
129
+ 9:
130
+ dict(link=('right_knee', 'right_hip'), id=9, color=[255, 128, 0]),
131
+ 10:
132
+ dict(link=('right_hip', 'tail'), id=10, color=[255, 128, 0]),
133
+ 11:
134
+ dict(link=('left_back_paw', 'left_knee'), id=11, color=[0, 255, 0]),
135
+ 12:
136
+ dict(link=('left_knee', 'left_hip'), id=12, color=[0, 255, 0]),
137
+ 13:
138
+ dict(link=('left_hip', 'tail'), id=13, color=[0, 255, 0]),
139
+ },
140
+ joint_weights=[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
141
+ sigmas=[
142
+ 0.0277, 0.0823, 0.0831, 0.0202, 0.0716, 0.0263, 0.0646, 0.0302, 0.0440,
143
+ 0.0316, 0.0333, 0.0547, 0.0263, 0.0683, 0.0539
144
+ ])