File size: 5,041 Bytes
9a02a1c
 
 
 
 
 
 
 
 
 
 
 
9c8f48a
9a02a1c
 
 
 
 
9c8f48a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a02a1c
 
9c8f48a
9a02a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c8f48a
9a02a1c
 
 
9c8f48a
9a02a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
9c8f48a
9a02a1c
9c8f48a
 
 
 
 
 
9a02a1c
 
9c8f48a
 
 
 
 
 
9a02a1c
9c8f48a
9a02a1c
9c8f48a
9a02a1c
9c8f48a
9a02a1c
 
9c8f48a
9a02a1c
9c8f48a
 
9a02a1c
 
 
 
9c8f48a
 
 
 
9a02a1c
9c8f48a
 
9a02a1c
 
 
 
9c8f48a
 
 
9a02a1c
 
9c8f48a
 
 
9a02a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#############################################################################
#
#   Source from:
#   https://www.tensorflow.org/hub/tutorials/movenet
#
#
#############################################################################
import PIL.Image
import PIL.ImageOps
import numpy as np
import tensorflow as tf
from PIL import ImageDraw
from PIL import ImageFont
from huggingface_hub import snapshot_download

# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'Magenta',
    (0, 2): 'Cyan',
    (1, 3): 'Magenta',
    (2, 4): 'Cyan',
    (0, 5): 'Magenta',
    (0, 6): 'Cyan',
    (5, 7): 'Magenta',
    (7, 9): 'Magenta',
    (6, 8): 'Cyan',
    (8, 10): 'Cyan',
    (5, 6): 'Yellow',
    (5, 11): 'Magenta',
    (6, 12): 'Cyan',
    (11, 12): 'Yellow',
    (11, 13): 'Magenta',
    (13, 15): 'Magenta',
    (12, 14): 'Cyan',
    (14, 16): 'Cyan'
}


def process_keypoints(keypoints_with_scores, height, width, threshold=0.11):
    """Returns high confidence keypoints and edges for visualization.

      Args:
        keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
          the keypoint coordinates and scores returned from the MoveNet model.
        height: height of the image in pixels.
        width: width of the image in pixels.
        threshold: minimum confidence score for a keypoint to be
          visualized.

      Returns:
        A (joints, bones, colors) containing:
          * the coordinates of all keypoints of all detected entities;
          * the coordinates of all skeleton edges of all detected entities;
          * the colors in which the edges should be plotted.
      """
    keypoints_all = []
    keypoint_edges_all = []
    num_instances, _, _, _ = keypoints_with_scores.shape
    for idx in range(num_instances):
        kpts_x = keypoints_with_scores[0, idx, :, 1]
        kpts_y = keypoints_with_scores[0, idx, :, 0]
        kpts_scores = keypoints_with_scores[0, idx, :, 2]
        kpts_dict = list(KEYPOINT_DICT.keys())
        kpts_absolute_xy = np.stack([kpts_dict, width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
        kpts_above_thresh_absolute = kpts_absolute_xy[kpts_scores > threshold, :]
        keypoints_all.append(kpts_above_thresh_absolute)

        for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
            if kpts_scores[edge_pair[0]] > threshold and kpts_scores[edge_pair[1]] > threshold:
                x_start = kpts_absolute_xy[edge_pair[0], 1]
                y_start = kpts_absolute_xy[edge_pair[0], 2]
                x_end = kpts_absolute_xy[edge_pair[1], 1]
                y_end = kpts_absolute_xy[edge_pair[1], 2]
                line_seg = np.array([[x_start, y_start], [x_end, y_end]])
                keypoint_edges_all.append([line_seg, color])
    if keypoints_all:
        keypoints_xy = np.concatenate(keypoints_all, axis=0)
    else:
        keypoints_xy = np.zeros((0, 17, 2))

    if keypoint_edges_all:
        edges_xy = np.stack(keypoint_edges_all, axis=0)
    else:
        edges_xy = np.zeros((0, 2, 2))
    return keypoints_xy, edges_xy


def draw_bones(pixmap: PIL.Image, keypoints):
    draw = ImageDraw.Draw(pixmap)
    joints, bones = process_keypoints(keypoints, pixmap.height, pixmap.width)

    font = ImageFont.truetype("./Arial.ttf", 22)
    print(joints)

    for bone, color in bones:
        bone = bone.astype(np.float32)
        draw.line((*bone[0], *bone[1]), fill=color, width=4)

    radio = 3

    for label, c_x, c_y in joints:
        c_x = float(c_x)
        c_y = float(c_y)
        shape = [(c_x - radio, c_y - radio), (c_x + radio, c_y + radio)]
        draw.ellipse(shape, fill="red", outline="red")
        draw.text((c_x, c_y), label, font=font, align="left", fill="blue")

    return joints


def movenet(image):
    """Runs detection on an input image.

        Args:
          image: A [1, height, width, 3] tensor represents the input image
            pixels. Note that the height/width should already be resized and match the
            expected input resolution of the model before passing into this function.

        Returns:
          A [1, 1, 17, 3] float numpy array representing the predicted keypoint
          coordinates and scores.
    """
    model_path = snapshot_download("leonelhs/movenet")
    module = tf.saved_model.load(model_path)
    model = module.signatures['serving_default']
    # SavedModel format expects tensor type of int32.
    image = tf.cast(image, dtype=tf.int32)
    # Run model inference.
    outputs = model(image)
    # Output is a [1, 1, 17, 3] tensor.
    return outputs['output_0'].numpy()