Sam / VideoToNPZ /tools /preprocess.py
Amanpreet
added 2
1cdc47e
import json
import numpy as np
from tools.mpii_coco_h36m import coco_h36m
import os
h36m_coco_order = [9, 11, 14, 12, 15, 13, 16, 4, 1, 5, 2, 6, 3]
coco_order = [0, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
num_person = 2
num_joints = 17
img_3d = 100.
ratio_2d_3d = 500.
def load_json(file_path):
with open(file_path, 'r') as fr:
video_info = json.load(fr)
label = video_info['label']
label_index = video_info['label_index']
num_frames = video_info['data'][-1]['frame_index']
keypoints = np.zeros((num_person, num_frames, num_joints, 2), dtype=np.float32)
scores = np.zeros((num_person, num_frames, num_joints), dtype=np.float32)
for frame_info in video_info['data']:
frame_index = frame_info['frame_index']
for index, skeleton_info in enumerate(frame_info['skeleton']):
pose = skeleton_info['pose']
score = skeleton_info['score']
bbox = skeleton_info['bbox']
if len(bbox) == 0 or index+1 > num_person:
continue
pose = np.asarray(pose, dtype=np.float32)
score = np.asarray(score, dtype=np.float32)
score = score.reshape(-1)
keypoints[index, frame_index-1] = pose
scores[index, frame_index-1] = score
return keypoints, scores, label, label_index
def h36m_coco_format(keypoints, scores):
assert len(keypoints.shape) == 4 and len(scores.shape) == 3
h36m_kpts = []
h36m_scores = []
valid_frames = []
for i in range(keypoints.shape[0]):
kpts = keypoints[i]
score = scores[i]
new_score = np.zeros_like(score, dtype=np.float32)
if np.sum(kpts) != 0.:
kpts, valid_frame = coco_h36m(kpts)
h36m_kpts.append(kpts)
valid_frames.append(valid_frame)
new_score[:, h36m_coco_order] = score[:, coco_order]
new_score[:, 0] = np.mean(score[:, [11, 12]], axis=1, dtype=np.float32)
new_score[:, 8] = np.mean(score[:, [5, 6]], axis=1, dtype=np.float32)
new_score[:, 7] = np.mean(new_score[:, [0, 8]], axis=1, dtype=np.float32)
new_score[:, 10] = np.mean(score[:, [1, 2, 3, 4]], axis=1, dtype=np.float32)
h36m_scores.append(new_score)
h36m_kpts = np.asarray(h36m_kpts, dtype=np.float32)
h36m_scores = np.asarray(h36m_scores, dtype=np.float32)
return h36m_kpts, h36m_scores, valid_frames
def revise_kpts(h36m_kpts, h36m_scores, valid_frames):
new_h36m_kpts = np.zeros_like(h36m_kpts)
for index, frames in enumerate(valid_frames):
kpts = h36m_kpts[index, frames]
score = h36m_scores[index, frames]
# threshold_score = score > 0.3
# if threshold_score.all():
# continue
index_frame = np.where(np.sum(score < 0.3, axis=1) > 0)[0]
for frame in index_frame:
less_threshold_joints = np.where(score[frame] < 0.3)[0]
intersect = [i for i in [2, 3, 5, 6] if i in less_threshold_joints]
if [2, 3, 5, 6] == intersect:
kpts[frame, [2, 3, 5, 6]] = kpts[frame, [1, 1, 4, 4]]
elif [2, 3, 6] == intersect:
kpts[frame, [2, 3, 6]] = kpts[frame, [1, 1, 5]]
elif [3, 5, 6] == intersect:
kpts[frame, [3, 5, 6]] = kpts[frame, [2, 4, 4]]
elif [3, 6] == intersect:
kpts[frame, [3, 6]] = kpts[frame, [2, 5]]
elif [3] == intersect:
kpts[frame, 3] = kpts[frame, 2]
elif [6] == intersect:
kpts[frame, 6] = kpts[frame, 5]
else:
continue
new_h36m_kpts[index, frames] = kpts
return new_h36m_kpts
def load_kpts_json(kpts_json):
keypoints, scores, label, label_index = load_json(kpts_json)
h36m_kpts, h36m_scores, valid_frames = h36m_coco_format(keypoints, scores)
re_kpts = revise_kpts(h36m_kpts, h36m_scores, valid_frames)
return re_kpts, valid_frames, scores, label, label_index
def revise_skes(prediction, re_kpts, valid_frames):
new_prediction = np.zeros((*re_kpts.shape[:-1], 3), dtype=np.float32)
for i, frames in enumerate(valid_frames):
new_prediction[i, frames] = prediction[i]
# The origin of (x, y) is in the upper right corner,
# while the (x,y) coordinates in the image are in the upper left corner.
distance = re_kpts[i, frames[1:], :, :2] - re_kpts[i, frames[:1], :, :2]
distance = np.mean(distance[:, [1, 4, 11, 14]], axis=-2, keepdims=True)
new_prediction[i, frames[1:], :, 0] -= distance[..., 0] / ratio_2d_3d
new_prediction[i, frames[1:], :, 1] += distance[..., 1] / ratio_2d_3d
# The origin of (x, y) is in the upper right corner,
# while the (x,y) coordinates in the image are in the upper left corner.
# Calculate the relative distance between two people
if len(valid_frames) == 2:
intersec_frames = [frame for frame in valid_frames[0] if frame in valid_frames[1]]
absolute_distance = re_kpts[0, intersec_frames[:1], :, :2] - re_kpts[1, intersec_frames[:1], :, :2]
absolute_distance = np.mean(absolute_distance[:, [1, 4, 11, 14]], axis=-2, keepdims=True) / 2.
new_prediction[0, valid_frames[0], :, 0] -= absolute_distance[..., 0] / ratio_2d_3d
new_prediction[0, valid_frames[0], :, 1] += absolute_distance[..., 1] / ratio_2d_3d
new_prediction[1, valid_frames[1], :, 0] += absolute_distance[..., 0] / ratio_2d_3d
new_prediction[1, valid_frames[1], :, 1] -= absolute_distance[..., 1] / ratio_2d_3d
# Pre-processing the case where the movement of Z axis is relatively large, such as 'sitting down'
# Remove the absolute distance
# new_prediction[:, :, 1:] -= new_prediction[:, :, :1]
# new_prediction[:, :, 0] = 0
new_prediction[:, :, :, 2] -= np.amin(new_prediction[:, :, :, 2])
return new_prediction
def revise_skes_real_time(prediction, re_kpts, width):
ratio_2d_3d_width = ratio_2d_3d * (width / 1920)
# prediction: (M, N, 3)
new_prediction = np.zeros((len(prediction), 17, 3), dtype=np.float32)
for i in range(len(prediction)):
new_prediction[i] = prediction[i]
initial_distance = re_kpts[i]
initial_distance = np.mean(initial_distance[[1, 4, 11, 14], :], axis=0)
new_prediction[i, :, 0] -= (initial_distance[0] - 3*width/5) / ratio_2d_3d_width
new_prediction[i, :, 1] += (initial_distance[1] - width/5) / ratio_2d_3d_width
new_prediction[:, :, 2] -= np.amin(new_prediction[:, :, 2])
return new_prediction