Spaces:

CVPR
/

SPOTER_Sign_Language_Recognition

Build error

File size: 9,828 Bytes

a001524


import math
import logging
import cv2
import random

import numpy as np

from normalization.body_normalization import BODY_IDENTIFIERS
from normalization.hand_normalization import HAND_IDENTIFIERS


HAND_IDENTIFIERS = [id + "_0" for id in HAND_IDENTIFIERS] + [id + "_1" for id in HAND_IDENTIFIERS]
ARM_IDENTIFIERS_ORDER = ["neck", "$side$Shoulder", "$side$Elbow", "$side$Wrist"]


def __random_pass(prob):
    return random.random() < prob


def __numpy_to_dictionary(data_array: np.ndarray) -> dict:
    """
    Supplementary method converting a NumPy array of body landmark data into dictionaries. The array data must match the
    order of the BODY_IDENTIFIERS list.
    """

    output = {}

    for landmark_index, identifier in enumerate(BODY_IDENTIFIERS):
        output[identifier] = data_array[:, landmark_index].tolist()

    return output


def __dictionary_to_numpy(landmarks_dict: dict) -> np.ndarray:
    """
    Supplementary method converting dictionaries of body landmark data into respective NumPy arrays. The resulting array
    will match the order of the BODY_IDENTIFIERS list.
    """

    output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS), 2))

    for landmark_index, identifier in enumerate(BODY_IDENTIFIERS):
        output[:, landmark_index, 0] = np.array(landmarks_dict[identifier])[:, 0]
        output[:, landmark_index, 1] = np.array(landmarks_dict[identifier])[:, 1]

    return output


def __rotate(origin: tuple, point: tuple, angle: float):
    """
    Rotates a point counterclockwise by a given angle around a given origin.

    :param origin: Landmark in the (X, Y) format of the origin from which to count angle of rotation
    :param point: Landmark in the (X, Y) format to be rotated
    :param angle: Angle under which the point shall be rotated
    :return: New landmarks (coordinates)
    """

    ox, oy = origin
    px, py = point

    qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
    qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)

    return qx, qy


def __preprocess_row_sign(sign: dict) -> (dict, dict):
    """
    Supplementary method splitting the single-dictionary skeletal data into two dictionaries of body and hand landmarks
    respectively.
    """

    sign_eval = sign

    if "nose_X" in sign_eval:
        body_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])]
                          for identifier in BODY_IDENTIFIERS}
        hand_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])]
                          for identifier in HAND_IDENTIFIERS}

    else:
        body_landmarks = {identifier: sign_eval[identifier] for identifier in BODY_IDENTIFIERS}
        hand_landmarks = {identifier: sign_eval[identifier] for identifier in HAND_IDENTIFIERS}

    return body_landmarks, hand_landmarks


def __wrap_sign_into_row(body_identifiers: dict, hand_identifiers: dict) -> dict:
    """
    Supplementary method for merging body and hand data into a single dictionary.
    """

    return {**body_identifiers, **hand_identifiers}


def augment_rotate(sign: dict, angle_range: tuple) -> dict:
    """
    AUGMENTATION TECHNIQUE. All the joint coordinates in each frame are rotated by a random angle up to 13 degrees with
    the center of rotation lying in the center of the frame, which is equal to [0.5; 0.5].

    :param sign: Dictionary with sequential skeletal data of the signing person
    :param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the
                        angle by which the landmarks will be rotated from

    :return: Dictionary with augmented (by rotation) sequential skeletal data of the signing person
    """

    body_landmarks, hand_landmarks = __preprocess_row_sign(sign)
    angle = math.radians(random.uniform(*angle_range))

    body_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in
                      body_landmarks.items()}
    hand_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in
                      hand_landmarks.items()}

    return __wrap_sign_into_row(body_landmarks, hand_landmarks)


def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict:
    """
    AUGMENTATION TECHNIQUE.

        - Squeeze. All the frames are squeezed from both horizontal sides. Two different random proportions up to 15% of
        the original frame's width for both left and right side are cut.

        - Perspective transformation. The joint coordinates are projected onto a new plane with a spatially defined
        center of projection, which simulates recording the sign video with a slight tilt. Each time, the right or left
        side, as well as the proportion by which both the width and height will be reduced, are chosen randomly. This
        proportion is selected from a uniform distribution on the [0; 1) interval. Subsequently, the new plane is
        delineated by reducing the width at the desired side and the respective vertical edge (height) at both of its
        adjacent corners.

    :param sign: Dictionary with sequential skeletal data of the signing person
    :param type: Type of shear augmentation to perform (either 'squeeze' or 'perspective')
    :param squeeze_ratio: Tuple containing the relative range from what the proportion of the original width will be
                          randomly chosen. These proportions will either be cut from both sides or used to construct the
                          new projection

    :return: Dictionary with augmented (by squeezing or perspective transformation) sequential skeletal data of the
             signing person
    """

    body_landmarks, hand_landmarks = __preprocess_row_sign(sign)

    if type == "squeeze":
        move_left = random.uniform(*squeeze_ratio)
        move_right = random.uniform(*squeeze_ratio)

        src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32)
        dest = np.array(((0 + move_left, 1), (1 - move_right, 1), (0 + move_left, 0), (1 - move_right, 0)),
                        dtype=np.float32)
        mtx = cv2.getPerspectiveTransform(src, dest)

    elif type == "perspective":

        move_ratio = random.uniform(*squeeze_ratio)
        src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32)

        if __random_pass(0.5):
            dest = np.array(((0 + move_ratio, 1 - move_ratio), (1, 1), (0 + move_ratio, 0 + move_ratio), (1, 0)),
                            dtype=np.float32)
        else:
            dest = np.array(((0, 1), (1 - move_ratio, 1 - move_ratio), (0, 0), (1 - move_ratio, 0 + move_ratio)),
                            dtype=np.float32)

        mtx = cv2.getPerspectiveTransform(src, dest)

    else:

        logging.error("Unsupported shear type provided.")
        return {}

    landmarks_array = __dictionary_to_numpy(body_landmarks)
    augmented_landmarks = cv2.perspectiveTransform(np.array(landmarks_array, dtype=np.float32), mtx)

    augmented_zero_landmark = cv2.perspectiveTransform(np.array([[[0, 0]]], dtype=np.float32), mtx)[0][0]
    augmented_landmarks = np.stack([np.where(sub == augmented_zero_landmark, [0, 0], sub) for sub in augmented_landmarks])

    body_landmarks = __numpy_to_dictionary(augmented_landmarks)

    return __wrap_sign_into_row(body_landmarks, hand_landmarks)


def augment_arm_joint_rotate(sign: dict, probability: float, angle_range: tuple) -> dict:
    """
    AUGMENTATION TECHNIQUE. The joint coordinates of both arms are passed successively, and the impending landmark is
    slightly rotated with respect to the current one. The chance of each joint to be rotated is 3:10 and the angle of
    alternation is a uniform random angle up to +-4 degrees. This simulates slight, negligible variances in each
    execution of a sign, which do not change its semantic meaning.

    :param sign: Dictionary with sequential skeletal data of the signing person
    :param probability: Probability of each joint to be rotated (float from the range [0, 1])
    :param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the
                        angle by which the landmarks will be rotated from

    :return: Dictionary with augmented (by arm joint rotation) sequential skeletal data of the signing person
    """

    body_landmarks, hand_landmarks = __preprocess_row_sign(sign)

    # Iterate over both directions (both hands)
    for side in ["left", "right"]:
        # Iterate gradually over the landmarks on arm
        for landmark_index, landmark_origin in enumerate(ARM_IDENTIFIERS_ORDER):
            landmark_origin = landmark_origin.replace("$side$", side)

            # End the process on the current hand if the landmark is not present
            if landmark_origin not in body_landmarks:
                break

            # Perform rotation by provided probability
            if __random_pass(probability):
                angle = math.radians(random.uniform(*angle_range))

                for to_be_rotated in ARM_IDENTIFIERS_ORDER[landmark_index + 1:]:
                    to_be_rotated = to_be_rotated.replace("$side$", side)

                    # Skip if the landmark is not present
                    if to_be_rotated not in body_landmarks:
                        continue

                    body_landmarks[to_be_rotated] = [__rotate(body_landmarks[landmark_origin][frame_index], frame,
                        angle) for frame_index, frame in enumerate(body_landmarks[to_be_rotated])]

    return __wrap_sign_into_row(body_landmarks, hand_landmarks)


if __name__ == "__main__":
    pass