Spaces:

xiexh20
/

HDM-interaction-recon

Sleeping

App Files Files Community

HDM-interaction-recon / dataset /base_data.py

xiexh20

add hdm demo v1

2fd6166 over 1 year ago

raw

history blame

4.48 kB

	from os import path as osp

	import cv2
	import numpy as np
	from torch.utils.data import Dataset

	from dataset.img_utils import masks2bbox, resize, crop


	class BaseDataset(Dataset):
	def __init__(self, data_paths, input_size=(224, 224)):
	self.data_paths = data_paths # RGB image files
	self.input_size = input_size
	opencv2py3d = np.eye(4)
	opencv2py3d[0, 0] = opencv2py3d[1, 1] = -1
	self.opencv2py3d = opencv2py3d

	def __len__(self):
	return len(self.data_paths)

	def load_masks(self, rgb_file):
	person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.png")
	if not osp.isfile(person_mask_file):
	person_mask_file = rgb_file.replace('.color.jpg', ".person_mask.jpg")
	obj_mask_file = None
	for pat in [".obj_rend_mask.png", ".obj_rend_mask.jpg", ".obj_mask.png", ".obj_mask.jpg", ".object_rend.png"]:
	obj_mask_file = rgb_file.replace('.color.jpg', pat)
	if osp.isfile(obj_mask_file):
	break
	person_mask = cv2.imread(person_mask_file, cv2.IMREAD_GRAYSCALE)
	obj_mask = cv2.imread(obj_mask_file, cv2.IMREAD_GRAYSCALE)

	return person_mask, obj_mask

	def get_crop_params(self, mask_hum, mask_obj, bbox_exp=1.0):
	"compute bounding box based on masks"
	bmin, bmax = masks2bbox([mask_hum, mask_obj])
	crop_center = (bmin + bmax) // 2
	# crop_size = np.max(bmax - bmin)
	crop_size = int(np.max(bmax - bmin) * bbox_exp)
	if crop_size % 2 == 1:
	crop_size += 1 # make sure it is an even number
	return bmax, bmin, crop_center, crop_size

	def is_behave_dataset(self, image_width):
	assert image_width in [2048, 1920, 1024, 960], f'unknwon image width {image_width}!'
	if image_width in [2048, 1024]:
	is_behave = True
	else:
	is_behave = False
	return is_behave

	def compute_K_roi(self, bbox_square,
	image_width=2048,
	image_height=1536,
	fx=979.7844, fy=979.840,
	cx=1018.952, cy=779.486):
	"return results in ndc coordinate, this is correct!!!"
	x, y, b, w = bbox_square
	assert b == w
	is_behave = self.is_behave_dataset(image_width)

	if is_behave:
	assert image_height / image_width == 0.75, f"invalid image aspect ratio: width={image_width}, height={image_height}"
	# the image might be rendered at different size
	ratio = image_width/2048.
	fx, fy = 979.7844ratio, 979.840ratio
	cx, cy = 1018.952ratio, 779.486ratio
	else:
	assert image_height / image_width == 9/16, f"invalid image aspect ratio: width={image_width}, height={image_height}"
	# intercap camera
	ratio = image_width/1920
	fx, fy = 918.457763671875ratio, 918.4373779296875ratio
	cx, cy = 956.9661865234375ratio, 555.944580078125ratio

	cx, cy = cx - x, cy - y
	scale = b/2.
	# in ndc
	cx_ = (scale - cx)/scale
	cy_ = (scale - cy)/scale
	fx_ = fx/scale
	fy_ = fy/scale

	K_roi = np.array([
	[fx_, 0, cx_, 0],
	[0., fy_, cy_, 0, ],
	[0, 0, 0, 1.],
	[0, 0, 1, 0]
	])
	return K_roi

	def crop_full_image(self, mask_hum, mask_obj, rgb_full, crop_masks, bbox_exp=1.0):
	"""
	crop the image based on the given masks
	:param mask_hum:
	:param mask_obj:
	:param rgb_full:
	:param crop_masks: a list of masks used to do the crop
	:return: Kroi, cropped human, object mask and RGB images (background masked out).
	"""
	bmax, bmin, crop_center, crop_size = self.get_crop_params(*crop_masks, bbox_exp)
	rgb = resize(crop(rgb_full, crop_center, crop_size), self.input_size) / 255.
	person_mask = resize(crop(mask_hum, crop_center, crop_size), self.input_size) / 255.
	obj_mask = resize(crop(mask_obj, crop_center, crop_size), self.input_size) / 255.
	xywh = np.concatenate([crop_center - crop_size // 2, np.array([crop_size, crop_size])])
	Kroi = self.compute_K_roi(xywh, rgb_full.shape[1], rgb_full.shape[0])
	# mask bkg out
	mask_comb = (person_mask > 0.5) \| (obj_mask > 0.5)
	rgb = rgb * np.expand_dims(mask_comb, -1)
	return Kroi, obj_mask, person_mask, rgb