Spaces:
Build error
Build error
File size: 9,828 Bytes
a001524 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 |
import math
import logging
import cv2
import random
import numpy as np
from normalization.body_normalization import BODY_IDENTIFIERS
from normalization.hand_normalization import HAND_IDENTIFIERS
HAND_IDENTIFIERS = [id + "_0" for id in HAND_IDENTIFIERS] + [id + "_1" for id in HAND_IDENTIFIERS]
ARM_IDENTIFIERS_ORDER = ["neck", "$side$Shoulder", "$side$Elbow", "$side$Wrist"]
def __random_pass(prob):
return random.random() < prob
def __numpy_to_dictionary(data_array: np.ndarray) -> dict:
"""
Supplementary method converting a NumPy array of body landmark data into dictionaries. The array data must match the
order of the BODY_IDENTIFIERS list.
"""
output = {}
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS):
output[identifier] = data_array[:, landmark_index].tolist()
return output
def __dictionary_to_numpy(landmarks_dict: dict) -> np.ndarray:
"""
Supplementary method converting dictionaries of body landmark data into respective NumPy arrays. The resulting array
will match the order of the BODY_IDENTIFIERS list.
"""
output = np.empty(shape=(len(landmarks_dict["leftEar"]), len(BODY_IDENTIFIERS), 2))
for landmark_index, identifier in enumerate(BODY_IDENTIFIERS):
output[:, landmark_index, 0] = np.array(landmarks_dict[identifier])[:, 0]
output[:, landmark_index, 1] = np.array(landmarks_dict[identifier])[:, 1]
return output
def __rotate(origin: tuple, point: tuple, angle: float):
"""
Rotates a point counterclockwise by a given angle around a given origin.
:param origin: Landmark in the (X, Y) format of the origin from which to count angle of rotation
:param point: Landmark in the (X, Y) format to be rotated
:param angle: Angle under which the point shall be rotated
:return: New landmarks (coordinates)
"""
ox, oy = origin
px, py = point
qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
return qx, qy
def __preprocess_row_sign(sign: dict) -> (dict, dict):
"""
Supplementary method splitting the single-dictionary skeletal data into two dictionaries of body and hand landmarks
respectively.
"""
sign_eval = sign
if "nose_X" in sign_eval:
body_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])]
for identifier in BODY_IDENTIFIERS}
hand_landmarks = {identifier: [(x, y) for x, y in zip(sign_eval[identifier + "_X"], sign_eval[identifier + "_Y"])]
for identifier in HAND_IDENTIFIERS}
else:
body_landmarks = {identifier: sign_eval[identifier] for identifier in BODY_IDENTIFIERS}
hand_landmarks = {identifier: sign_eval[identifier] for identifier in HAND_IDENTIFIERS}
return body_landmarks, hand_landmarks
def __wrap_sign_into_row(body_identifiers: dict, hand_identifiers: dict) -> dict:
"""
Supplementary method for merging body and hand data into a single dictionary.
"""
return {**body_identifiers, **hand_identifiers}
def augment_rotate(sign: dict, angle_range: tuple) -> dict:
"""
AUGMENTATION TECHNIQUE. All the joint coordinates in each frame are rotated by a random angle up to 13 degrees with
the center of rotation lying in the center of the frame, which is equal to [0.5; 0.5].
:param sign: Dictionary with sequential skeletal data of the signing person
:param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the
angle by which the landmarks will be rotated from
:return: Dictionary with augmented (by rotation) sequential skeletal data of the signing person
"""
body_landmarks, hand_landmarks = __preprocess_row_sign(sign)
angle = math.radians(random.uniform(*angle_range))
body_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in
body_landmarks.items()}
hand_landmarks = {key: [__rotate((0.5, 0.5), frame, angle) for frame in value] for key, value in
hand_landmarks.items()}
return __wrap_sign_into_row(body_landmarks, hand_landmarks)
def augment_shear(sign: dict, type: str, squeeze_ratio: tuple) -> dict:
"""
AUGMENTATION TECHNIQUE.
- Squeeze. All the frames are squeezed from both horizontal sides. Two different random proportions up to 15% of
the original frame's width for both left and right side are cut.
- Perspective transformation. The joint coordinates are projected onto a new plane with a spatially defined
center of projection, which simulates recording the sign video with a slight tilt. Each time, the right or left
side, as well as the proportion by which both the width and height will be reduced, are chosen randomly. This
proportion is selected from a uniform distribution on the [0; 1) interval. Subsequently, the new plane is
delineated by reducing the width at the desired side and the respective vertical edge (height) at both of its
adjacent corners.
:param sign: Dictionary with sequential skeletal data of the signing person
:param type: Type of shear augmentation to perform (either 'squeeze' or 'perspective')
:param squeeze_ratio: Tuple containing the relative range from what the proportion of the original width will be
randomly chosen. These proportions will either be cut from both sides or used to construct the
new projection
:return: Dictionary with augmented (by squeezing or perspective transformation) sequential skeletal data of the
signing person
"""
body_landmarks, hand_landmarks = __preprocess_row_sign(sign)
if type == "squeeze":
move_left = random.uniform(*squeeze_ratio)
move_right = random.uniform(*squeeze_ratio)
src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32)
dest = np.array(((0 + move_left, 1), (1 - move_right, 1), (0 + move_left, 0), (1 - move_right, 0)),
dtype=np.float32)
mtx = cv2.getPerspectiveTransform(src, dest)
elif type == "perspective":
move_ratio = random.uniform(*squeeze_ratio)
src = np.array(((0, 1), (1, 1), (0, 0), (1, 0)), dtype=np.float32)
if __random_pass(0.5):
dest = np.array(((0 + move_ratio, 1 - move_ratio), (1, 1), (0 + move_ratio, 0 + move_ratio), (1, 0)),
dtype=np.float32)
else:
dest = np.array(((0, 1), (1 - move_ratio, 1 - move_ratio), (0, 0), (1 - move_ratio, 0 + move_ratio)),
dtype=np.float32)
mtx = cv2.getPerspectiveTransform(src, dest)
else:
logging.error("Unsupported shear type provided.")
return {}
landmarks_array = __dictionary_to_numpy(body_landmarks)
augmented_landmarks = cv2.perspectiveTransform(np.array(landmarks_array, dtype=np.float32), mtx)
augmented_zero_landmark = cv2.perspectiveTransform(np.array([[[0, 0]]], dtype=np.float32), mtx)[0][0]
augmented_landmarks = np.stack([np.where(sub == augmented_zero_landmark, [0, 0], sub) for sub in augmented_landmarks])
body_landmarks = __numpy_to_dictionary(augmented_landmarks)
return __wrap_sign_into_row(body_landmarks, hand_landmarks)
def augment_arm_joint_rotate(sign: dict, probability: float, angle_range: tuple) -> dict:
"""
AUGMENTATION TECHNIQUE. The joint coordinates of both arms are passed successively, and the impending landmark is
slightly rotated with respect to the current one. The chance of each joint to be rotated is 3:10 and the angle of
alternation is a uniform random angle up to +-4 degrees. This simulates slight, negligible variances in each
execution of a sign, which do not change its semantic meaning.
:param sign: Dictionary with sequential skeletal data of the signing person
:param probability: Probability of each joint to be rotated (float from the range [0, 1])
:param angle_range: Tuple containing the angle range (minimal and maximal angle in degrees) to randomly choose the
angle by which the landmarks will be rotated from
:return: Dictionary with augmented (by arm joint rotation) sequential skeletal data of the signing person
"""
body_landmarks, hand_landmarks = __preprocess_row_sign(sign)
# Iterate over both directions (both hands)
for side in ["left", "right"]:
# Iterate gradually over the landmarks on arm
for landmark_index, landmark_origin in enumerate(ARM_IDENTIFIERS_ORDER):
landmark_origin = landmark_origin.replace("$side$", side)
# End the process on the current hand if the landmark is not present
if landmark_origin not in body_landmarks:
break
# Perform rotation by provided probability
if __random_pass(probability):
angle = math.radians(random.uniform(*angle_range))
for to_be_rotated in ARM_IDENTIFIERS_ORDER[landmark_index + 1:]:
to_be_rotated = to_be_rotated.replace("$side$", side)
# Skip if the landmark is not present
if to_be_rotated not in body_landmarks:
continue
body_landmarks[to_be_rotated] = [__rotate(body_landmarks[landmark_origin][frame_index], frame,
angle) for frame_index, frame in enumerate(body_landmarks[to_be_rotated])]
return __wrap_sign_into_row(body_landmarks, hand_landmarks)
if __name__ == "__main__":
pass
|