File size: 5,041 Bytes
9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c 9c8f48a 9a02a1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
#############################################################################
#
# Source from:
# https://www.tensorflow.org/hub/tutorials/movenet
#
#
#############################################################################
import PIL.Image
import PIL.ImageOps
import numpy as np
import tensorflow as tf
from PIL import ImageDraw
from PIL import ImageFont
from huggingface_hub import snapshot_download
# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
'nose': 0,
'left_eye': 1,
'right_eye': 2,
'left_ear': 3,
'right_ear': 4,
'left_shoulder': 5,
'right_shoulder': 6,
'left_elbow': 7,
'right_elbow': 8,
'left_wrist': 9,
'right_wrist': 10,
'left_hip': 11,
'right_hip': 12,
'left_knee': 13,
'right_knee': 14,
'left_ankle': 15,
'right_ankle': 16
}
KEYPOINT_EDGE_INDS_TO_COLOR = {
(0, 1): 'Magenta',
(0, 2): 'Cyan',
(1, 3): 'Magenta',
(2, 4): 'Cyan',
(0, 5): 'Magenta',
(0, 6): 'Cyan',
(5, 7): 'Magenta',
(7, 9): 'Magenta',
(6, 8): 'Cyan',
(8, 10): 'Cyan',
(5, 6): 'Yellow',
(5, 11): 'Magenta',
(6, 12): 'Cyan',
(11, 12): 'Yellow',
(11, 13): 'Magenta',
(13, 15): 'Magenta',
(12, 14): 'Cyan',
(14, 16): 'Cyan'
}
def process_keypoints(keypoints_with_scores, height, width, threshold=0.11):
"""Returns high confidence keypoints and edges for visualization.
Args:
keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
the keypoint coordinates and scores returned from the MoveNet model.
height: height of the image in pixels.
width: width of the image in pixels.
threshold: minimum confidence score for a keypoint to be
visualized.
Returns:
A (joints, bones, colors) containing:
* the coordinates of all keypoints of all detected entities;
* the coordinates of all skeleton edges of all detected entities;
* the colors in which the edges should be plotted.
"""
keypoints_all = []
keypoint_edges_all = []
num_instances, _, _, _ = keypoints_with_scores.shape
for idx in range(num_instances):
kpts_x = keypoints_with_scores[0, idx, :, 1]
kpts_y = keypoints_with_scores[0, idx, :, 0]
kpts_scores = keypoints_with_scores[0, idx, :, 2]
kpts_dict = list(KEYPOINT_DICT.keys())
kpts_absolute_xy = np.stack([kpts_dict, width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
kpts_above_thresh_absolute = kpts_absolute_xy[kpts_scores > threshold, :]
keypoints_all.append(kpts_above_thresh_absolute)
for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
if kpts_scores[edge_pair[0]] > threshold and kpts_scores[edge_pair[1]] > threshold:
x_start = kpts_absolute_xy[edge_pair[0], 1]
y_start = kpts_absolute_xy[edge_pair[0], 2]
x_end = kpts_absolute_xy[edge_pair[1], 1]
y_end = kpts_absolute_xy[edge_pair[1], 2]
line_seg = np.array([[x_start, y_start], [x_end, y_end]])
keypoint_edges_all.append([line_seg, color])
if keypoints_all:
keypoints_xy = np.concatenate(keypoints_all, axis=0)
else:
keypoints_xy = np.zeros((0, 17, 2))
if keypoint_edges_all:
edges_xy = np.stack(keypoint_edges_all, axis=0)
else:
edges_xy = np.zeros((0, 2, 2))
return keypoints_xy, edges_xy
def draw_bones(pixmap: PIL.Image, keypoints):
draw = ImageDraw.Draw(pixmap)
joints, bones = process_keypoints(keypoints, pixmap.height, pixmap.width)
font = ImageFont.truetype("./Arial.ttf", 22)
print(joints)
for bone, color in bones:
bone = bone.astype(np.float32)
draw.line((*bone[0], *bone[1]), fill=color, width=4)
radio = 3
for label, c_x, c_y in joints:
c_x = float(c_x)
c_y = float(c_y)
shape = [(c_x - radio, c_y - radio), (c_x + radio, c_y + radio)]
draw.ellipse(shape, fill="red", outline="red")
draw.text((c_x, c_y), label, font=font, align="left", fill="blue")
return joints
def movenet(image):
"""Runs detection on an input image.
Args:
image: A [1, height, width, 3] tensor represents the input image
pixels. Note that the height/width should already be resized and match the
expected input resolution of the model before passing into this function.
Returns:
A [1, 1, 17, 3] float numpy array representing the predicted keypoint
coordinates and scores.
"""
model_path = snapshot_download("leonelhs/movenet")
module = tf.saved_model.load(model_path)
model = module.signatures['serving_default']
# SavedModel format expects tensor type of int32.
image = tf.cast(image, dtype=tf.int32)
# Run model inference.
outputs = model(image)
# Output is a [1, 1, 17, 3] tensor.
return outputs['output_0'].numpy()
|