Spaces:

leonelhs
/

poser-tf

Running

File size: 5,041 Bytes

9a02a1c
 
 
 
 
 
 
 
 
 
 
 
9c8f48a
9a02a1c
 
 
 
 
9c8f48a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a02a1c
 
9c8f48a
9a02a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c8f48a
9a02a1c
 
 
9c8f48a
9a02a1c
 
 
 
 
 
 
 
 
 
 
 
 
 
9c8f48a
9a02a1c
9c8f48a
 
 
 
 
 
9a02a1c
 
9c8f48a
 
 
 
 
 
9a02a1c
9c8f48a
9a02a1c
9c8f48a
9a02a1c
9c8f48a
9a02a1c
 
9c8f48a
9a02a1c
9c8f48a
 
9a02a1c
 
 
 
9c8f48a
 
 
 
9a02a1c
9c8f48a
 
9a02a1c
 
 
 
9c8f48a
 
 
9a02a1c
 
9c8f48a
 
 
9a02a1c

#############################################################################
#
#   Source from:
#   https://www.tensorflow.org/hub/tutorials/movenet
#
#
#############################################################################
import PIL.Image
import PIL.ImageOps
import numpy as np
import tensorflow as tf
from PIL import ImageDraw
from PIL import ImageFont
from huggingface_hub import snapshot_download

# Dictionary that maps from joint names to keypoint indices.
KEYPOINT_DICT = {
    'nose': 0,
    'left_eye': 1,
    'right_eye': 2,
    'left_ear': 3,
    'right_ear': 4,
    'left_shoulder': 5,
    'right_shoulder': 6,
    'left_elbow': 7,
    'right_elbow': 8,
    'left_wrist': 9,
    'right_wrist': 10,
    'left_hip': 11,
    'right_hip': 12,
    'left_knee': 13,
    'right_knee': 14,
    'left_ankle': 15,
    'right_ankle': 16
}

KEYPOINT_EDGE_INDS_TO_COLOR = {
    (0, 1): 'Magenta',
    (0, 2): 'Cyan',
    (1, 3): 'Magenta',
    (2, 4): 'Cyan',
    (0, 5): 'Magenta',
    (0, 6): 'Cyan',
    (5, 7): 'Magenta',
    (7, 9): 'Magenta',
    (6, 8): 'Cyan',
    (8, 10): 'Cyan',
    (5, 6): 'Yellow',
    (5, 11): 'Magenta',
    (6, 12): 'Cyan',
    (11, 12): 'Yellow',
    (11, 13): 'Magenta',
    (13, 15): 'Magenta',
    (12, 14): 'Cyan',
    (14, 16): 'Cyan'
}


def process_keypoints(keypoints_with_scores, height, width, threshold=0.11):
    """Returns high confidence keypoints and edges for visualization.

      Args:
        keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing
          the keypoint coordinates and scores returned from the MoveNet model.
        height: height of the image in pixels.
        width: width of the image in pixels.
        threshold: minimum confidence score for a keypoint to be
          visualized.

      Returns:
        A (joints, bones, colors) containing:
          * the coordinates of all keypoints of all detected entities;
          * the coordinates of all skeleton edges of all detected entities;
          * the colors in which the edges should be plotted.
      """
    keypoints_all = []
    keypoint_edges_all = []
    num_instances, _, _, _ = keypoints_with_scores.shape
    for idx in range(num_instances):
        kpts_x = keypoints_with_scores[0, idx, :, 1]
        kpts_y = keypoints_with_scores[0, idx, :, 0]
        kpts_scores = keypoints_with_scores[0, idx, :, 2]
        kpts_dict = list(KEYPOINT_DICT.keys())
        kpts_absolute_xy = np.stack([kpts_dict, width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)
        kpts_above_thresh_absolute = kpts_absolute_xy[kpts_scores > threshold, :]
        keypoints_all.append(kpts_above_thresh_absolute)

        for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():
            if kpts_scores[edge_pair[0]] > threshold and kpts_scores[edge_pair[1]] > threshold:
                x_start = kpts_absolute_xy[edge_pair[0], 1]
                y_start = kpts_absolute_xy[edge_pair[0], 2]
                x_end = kpts_absolute_xy[edge_pair[1], 1]
                y_end = kpts_absolute_xy[edge_pair[1], 2]
                line_seg = np.array([[x_start, y_start], [x_end, y_end]])
                keypoint_edges_all.append([line_seg, color])
    if keypoints_all:
        keypoints_xy = np.concatenate(keypoints_all, axis=0)
    else:
        keypoints_xy = np.zeros((0, 17, 2))

    if keypoint_edges_all:
        edges_xy = np.stack(keypoint_edges_all, axis=0)
    else:
        edges_xy = np.zeros((0, 2, 2))
    return keypoints_xy, edges_xy


def draw_bones(pixmap: PIL.Image, keypoints):
    draw = ImageDraw.Draw(pixmap)
    joints, bones = process_keypoints(keypoints, pixmap.height, pixmap.width)

    font = ImageFont.truetype("./Arial.ttf", 22)
    print(joints)

    for bone, color in bones:
        bone = bone.astype(np.float32)
        draw.line((*bone[0], *bone[1]), fill=color, width=4)

    radio = 3

    for label, c_x, c_y in joints:
        c_x = float(c_x)
        c_y = float(c_y)
        shape = [(c_x - radio, c_y - radio), (c_x + radio, c_y + radio)]
        draw.ellipse(shape, fill="red", outline="red")
        draw.text((c_x, c_y), label, font=font, align="left", fill="blue")

    return joints


def movenet(image):
    """Runs detection on an input image.

        Args:
          image: A [1, height, width, 3] tensor represents the input image
            pixels. Note that the height/width should already be resized and match the
            expected input resolution of the model before passing into this function.

        Returns:
          A [1, 1, 17, 3] float numpy array representing the predicted keypoint
          coordinates and scores.
    """
    model_path = snapshot_download("leonelhs/movenet")
    module = tf.saved_model.load(model_path)
    model = module.signatures['serving_default']
    # SavedModel format expects tensor type of int32.
    image = tf.cast(image, dtype=tf.int32)
    # Run model inference.
    outputs = model(image)
    # Output is a [1, 1, 17, 3] tensor.
    return outputs['output_0'].numpy()