Spaces:

gensim2
/

Gen1

Runtime error

App Files Files Community

Gen1 / cliport /utils /utils.py

gensim2

init

ff66cf3 almost 2 years ago

raw

history blame contribute delete

44.4 kB

	"""Miscellaneous utilities."""

	import cv2
	import random
	import matplotlib
	import matplotlib.pyplot as plt
	import meshcat
	import meshcat.geometry as g
	import meshcat.transformations as mtf

	import PIL
	import yaml
	import numpy as np
	from transforms3d import euler

	import pybullet as p
	import kornia
	from omegaconf import OmegaConf

	import os
	import torch
	import torchvision


	# -----------------------------------------------------------------------------
	# HEIGHTMAP UTILS
	# -----------------------------------------------------------------------------

	def get_heightmap(points, colors, bounds, pixel_size):
	"""Get top-down (z-axis) orthographic heightmap image from 3D pointcloud.

	Args:
	points: HxWx3 float array of 3D points in world coordinates.
	colors: HxWx3 uint8 array of values in range 0-255 aligned with points.
	bounds: 3x2 float array of values (rows: X,Y,Z; columns: min,max) defining
	region in 3D space to generate heightmap in world coordinates.
	pixel_size: float defining size of each pixel in meters.

	Returns:
	heightmap: HxW float array of height (from lower z-bound) in meters.
	colormap: HxWx3 uint8 array of backprojected color aligned with heightmap.
	"""
	width = int(np.round((bounds[0, 1] - bounds[0, 0]) / pixel_size))
	height = int(np.round((bounds[1, 1] - bounds[1, 0]) / pixel_size))
	heightmap = np.zeros((height, width), dtype=np.float32)
	colormap = np.zeros((height, width, colors.shape[-1]), dtype=np.uint8)

	# Filter out 3D points that are outside of the predefined bounds.
	ix = (points[Ellipsis, 0] >= bounds[0, 0]) & (points[Ellipsis, 0] < bounds[0, 1])
	iy = (points[Ellipsis, 1] >= bounds[1, 0]) & (points[Ellipsis, 1] < bounds[1, 1])
	iz = (points[Ellipsis, 2] >= bounds[2, 0]) & (points[Ellipsis, 2] < bounds[2, 1])
	valid = ix & iy & iz
	points = points[valid]
	colors = colors[valid]

	# Sort 3D points by z-value, which works with array assignment to simulate
	# z-buffering for rendering the heightmap image.
	iz = np.argsort(points[:, -1])
	points, colors = points[iz], colors[iz]
	px = np.int32(np.floor((points[:, 0] - bounds[0, 0]) / pixel_size))
	py = np.int32(np.floor((points[:, 1] - bounds[1, 0]) / pixel_size))
	px = np.clip(px, 0, width - 1)
	py = np.clip(py, 0, height - 1)
	heightmap[py, px] = points[:, 2] - bounds[2, 0]
	for c in range(colors.shape[-1]):
	colormap[py, px, c] = colors[:, c]
	return heightmap, colormap


	def get_pointcloud(depth, intrinsics):
	"""Get 3D pointcloud from perspective depth image.

	Args:
	depth: HxW float array of perspective depth in meters.
	intrinsics: 3x3 float array of camera intrinsics matrix.

	Returns:
	points: HxWx3 float array of 3D points in camera coordinates.
	"""
	height, width = depth.shape
	xlin = np.linspace(0, width - 1, width)
	ylin = np.linspace(0, height - 1, height)
	px, py = np.meshgrid(xlin, ylin)
	px = (px - intrinsics[0, 2]) * (depth / intrinsics[0, 0])
	py = (py - intrinsics[1, 2]) * (depth / intrinsics[1, 1])
	points = np.float32([px, py, depth]).transpose(1, 2, 0)
	return points


	def transform_pointcloud(points, transform):
	"""Apply rigid transformation to 3D pointcloud.

	Args:
	points: HxWx3 float array of 3D points in camera coordinates.
	transform: 4x4 float array representing a rigid transformation matrix.

	Returns:
	points: HxWx3 float array of transformed 3D points.
	"""
	padding = ((0, 0), (0, 0), (0, 1))
	homogen_points = np.pad(points.copy(), padding,
	'constant', constant_values=1)
	for i in range(3):
	points[Ellipsis, i] = np.sum(transform[i, :] * homogen_points, axis=-1)
	return points


	def reconstruct_heightmaps(color, depth, configs, bounds, pixel_size):
	"""Reconstruct top-down heightmap views from multiple 3D pointclouds."""
	heightmaps, colormaps = [], []
	for color, depth, config in zip(color, depth, configs):
	intrinsics = np.array(config['intrinsics']).reshape(3, 3)
	xyz = get_pointcloud(depth, intrinsics)
	position = np.array(config['position']).reshape(3, 1)
	rotation = p.getMatrixFromQuaternion(config['rotation'])
	rotation = np.array(rotation).reshape(3, 3)
	transform = np.eye(4)
	transform[:3, :] = np.hstack((rotation, position))
	xyz = transform_pointcloud(xyz, transform)
	heightmap, colormap = get_heightmap(xyz, color, bounds, pixel_size)
	heightmaps.append(heightmap)
	colormaps.append(colormap)
	return heightmaps, colormaps


	def pix_to_xyz(pixel, height, bounds, pixel_size, skip_height=False):
	"""Convert from pixel location on heightmap to 3D position."""
	u, v = pixel
	x = bounds[0, 0] + v * pixel_size
	y = bounds[1, 0] + u * pixel_size
	if not skip_height:
	z = bounds[2, 0] + height[u, v]
	else:
	z = 0.0
	return (x, y, z)


	def xyz_to_pix(position, bounds, pixel_size):
	"""Convert from 3D position to pixel location on heightmap."""
	u = int(np.round((position[1] - bounds[1, 0]) / pixel_size))
	v = int(np.round((position[0] - bounds[0, 0]) / pixel_size))
	return (u, v)


	def unproject_vectorized(uv_coordinates, depth_values,
	intrinsic,
	distortion):
	"""Vectorized version of unproject(), for N points.

	Args:
	uv_coordinates: pixel coordinates to unproject of shape (n, 2).
	depth_values: depth values corresponding index-wise to the uv_coordinates of
	shape (n).
	intrinsic: array of shape (3, 3). This is typically the return value of
	intrinsics_to_matrix.
	distortion: camera distortion parameters of shape (5,).

	Returns:
	xyz coordinates in camera frame of shape (n, 3).
	"""
	cam_mtx = intrinsic # shape [3, 3]
	cam_dist = np.array(distortion) # shape [5]

	# shape of points_undistorted is [N, 2] after the squeeze().
	points_undistorted = cv2.undistortPoints(
	uv_coordinates.reshape((-1, 1, 2)), cam_mtx, cam_dist).squeeze()

	x = points_undistorted[:, 0] * depth_values
	y = points_undistorted[:, 1] * depth_values

	xyz = np.vstack((x, y, depth_values)).T
	return xyz


	def unproject_depth_vectorized(im_depth, depth_dist,
	camera_mtx,
	camera_dist):
	"""Unproject depth image into 3D point cloud, using calibration.

	Args:
	im_depth: raw depth image, pre-calibration of shape (height, width).
	depth_dist: depth distortion parameters of shape (8,)
	camera_mtx: intrinsics matrix of shape (3, 3). This is typically the return
	value of intrinsics_to_matrix.
	camera_dist: camera distortion parameters shape (5,).

	Returns:
	numpy array of shape [3, H*W]. each column is xyz coordinates
	"""
	h, w = im_depth.shape

	# shape of each u_map, v_map is [H, W].
	u_map, v_map = np.meshgrid(np.linspace(
	0, w - 1, w), np.linspace(0, h - 1, h))

	adjusted_depth = depth_dist[0] + im_depth * depth_dist[1]

	# shape after stack is [N, 2], where N = H * W.
	uv_coordinates = np.stack((u_map.reshape(-1), v_map.reshape(-1)), axis=-1)

	return unproject_vectorized(uv_coordinates, adjusted_depth.reshape(-1),
	camera_mtx, camera_dist)


	# -----------------------------------------------------------------------------
	# MATH UTILS
	# -----------------------------------------------------------------------------


	def sample_distribution(prob, n_samples=1):
	"""Sample data point from a custom distribution."""
	flat_prob = prob.flatten() / np.sum(prob)
	rand_ind = np.random.choice(
	np.arange(len(flat_prob)), n_samples, p=flat_prob, replace=False)
	rand_ind_coords = np.array(np.unravel_index(rand_ind, prob.shape)).T
	return np.int32(rand_ind_coords.squeeze())


	# -------------------------------------------------------------------------
	# Transformation Helper Functions
	# -------------------------------------------------------------------------


	def invert(pose):
	return p.invertTransform(pose[0], pose[1])


	def multiply(pose0, pose1):
	return p.multiplyTransforms(pose0[0], pose0[1], pose1[0], pose1[1])


	def apply(pose, position):
	position = np.float32(position)
	position_shape = position.shape
	position = np.float32(position).reshape(3, -1)
	rotation = np.float32(p.getMatrixFromQuaternion(pose[1])).reshape(3, 3)
	translation = np.float32(pose[0]).reshape(3, 1)
	position = rotation @ position + translation
	return tuple(position.reshape(position_shape))


	def eulerXYZ_to_quatXYZW(rotation): # pylint: disable=invalid-name
	"""Abstraction for converting from a 3-parameter rotation to quaterion.

	This will help us easily switch which rotation parameterization we use.
	Quaternion should be in xyzw order for pybullet.

	Args:
	rotation: a 3-parameter rotation, in xyz order tuple of 3 floats

	Returns:
	quaternion, in xyzw order, tuple of 4 floats
	"""
	euler_zxy = (rotation[2], rotation[0], rotation[1])
	quaternion_wxyz = euler.euler2quat(*euler_zxy, axes='szxy')
	q = quaternion_wxyz
	quaternion_xyzw = (q[1], q[2], q[3], q[0])
	return quaternion_xyzw


	def quatXYZW_to_eulerXYZ(quaternion_xyzw): # pylint: disable=invalid-name
	"""Abstraction for converting from quaternion to a 3-parameter toation.

	This will help us easily switch which rotation parameterization we use.
	Quaternion should be in xyzw order for pybullet.

	Args:
	quaternion_xyzw: in xyzw order, tuple of 4 floats

	Returns:
	rotation: a 3-parameter rotation, in xyz order, tuple of 3 floats
	"""
	q = quaternion_xyzw
	quaternion_wxyz = np.array([q[3], q[0], q[1], q[2]])
	euler_zxy = euler.quat2euler(quaternion_wxyz, axes='szxy')
	euler_xyz = (euler_zxy[1], euler_zxy[2], euler_zxy[0])
	return euler_xyz


	def apply_transform(transform_to_from, points_from):
	r"""Transforms points (3D) into new frame.

	Using transform_to_from notation.

	Args:
	transform_to_from: numpy.ndarray of shape [B,4,4], SE3
	points_from: numpy.ndarray of shape [B,3,N]

	Returns:
	points_to: numpy.ndarray of shape [B,3,N]
	"""
	num_points = points_from.shape[-1]

	# non-batched
	if len(transform_to_from.shape) == 2:
	ones = np.ones((1, num_points))

	# makes these each into homogenous vectors
	points_from = np.vstack((points_from, ones)) # [4,N]
	points_to = transform_to_from @ points_from # [4,N]
	return points_to[0:3, :] # [3,N]

	# batched
	else:
	assert len(transform_to_from.shape) == 3
	batch_size = transform_to_from.shape[0]
	zeros = np.ones((batch_size, 1, num_points))
	points_from = np.concatenate((points_from, zeros), axis=1)
	assert points_from.shape[1] == 4
	points_to = transform_to_from @ points_from
	return points_to[:, 0:3, :]


	# -----------------------------------------------------------------------------
	# IMAGE UTILS
	# -----------------------------------------------------------------------------


	def preprocess(img, dist='transporter'):
	"""Pre-process input (subtract mean, divide by std)."""

	transporter_color_mean = [0.18877631, 0.18877631, 0.18877631]
	transporter_color_std = [0.07276466, 0.07276466, 0.07276466]
	transporter_depth_mean = 0.00509261
	transporter_depth_std = 0.00903967

	franka_color_mean = [0.622291933, 0.628313992, 0.623031488]
	franka_color_std = [0.168154213, 0.17626014, 0.184527364]
	franka_depth_mean = 0.872146842
	franka_depth_std = 0.195743116

	clip_color_mean = [0.48145466, 0.4578275, 0.40821073]
	clip_color_std = [0.26862954, 0.26130258, 0.27577711]

	# choose distribution
	if dist == 'clip':
	color_mean = clip_color_mean
	color_std = clip_color_std
	elif dist == 'mdetr':
	color_mean = [0.485, 0.456, 0.406]
	color_std = [0.229, 0.224, 0.225]
	elif dist == 'franka':
	color_mean = franka_color_mean
	color_std = franka_color_std
	else:
	color_mean = transporter_color_mean
	color_std = transporter_color_std

	if dist == 'franka':
	depth_mean = franka_depth_mean
	depth_std = franka_depth_std
	else:
	depth_mean = transporter_depth_mean
	depth_std = transporter_depth_std

	# convert to pytorch tensor (if required)
	if type(img) == torch.Tensor:
	def cast_shape(stat, img):
	tensor = torch.from_numpy(np.array(stat)).to(device=img.device, dtype=img.dtype)
	tensor = tensor.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
	tensor = tensor.repeat(img.shape[0], 1, img.shape[-2], img.shape[-1])
	return tensor

	color_mean = cast_shape(color_mean, img)
	color_std = cast_shape(color_std, img)
	depth_mean = cast_shape(depth_mean, img)
	depth_std = cast_shape(depth_std, img)

	# normalize
	img = img.clone()
	img[:, :3, :, :] = ((img[:, :3, :, :] / 255 - color_mean) / color_std)
	img[:, 3:, :, :] = ((img[:, 3:, :, :] - depth_mean) / depth_std)
	else:
	# normalize
	img[:, :, :3] = (img[:, :, :3] / 255 - color_mean) / color_std
	img[:, :, 3:] = (img[:, :, 3:] - depth_mean) / depth_std

	# if dist == 'franka' or dist == 'transporter':
	# print(np.mean(img[:,:3,:,:].detach().cpu().numpy(), axis=(0,2,3)),
	# np.mean(img[:,3,:,:].detach().cpu().numpy()))

	return img

	def map_kit_scale(scale):
	return (scale[0] / 10, scale[1] / 10, scale[2] / 10)

	def deprocess(img):
	color_mean = 0.18877631
	depth_mean = 0.00509261
	color_std = 0.07276466
	depth_std = 0.00903967

	img[:, :, :3] = np.uint8(((img[:, :, :3] * color_std) + color_mean) * 255)
	img[:, :, 3:] = np.uint8(((img[:, :, 3:] * depth_std) + depth_mean) * 255)
	return img


	def get_fused_heightmap(obs, configs, bounds, pix_size):
	"""Reconstruct orthographic heightmaps with segmentation masks."""
	heightmaps, colormaps = reconstruct_heightmaps(
	obs['color'], obs['depth'], configs, bounds, pix_size)
	colormaps = np.float32(colormaps)
	heightmaps = np.float32(heightmaps)

	# Fuse maps from different views.
	valid = np.sum(colormaps, axis=3) > 0
	repeat = np.sum(valid, axis=0)
	repeat[repeat == 0] = 1
	cmap = np.sum(colormaps, axis=0) / repeat[Ellipsis, None]
	cmap = np.uint8(np.round(cmap))
	hmap = np.max(heightmaps, axis=0) # Max to handle occlusions.
	return cmap, hmap


	def get_image_transform(theta, trans, pivot=(0, 0)):
	"""Compute composite 2D rigid transformation matrix."""
	# Get 2D rigid transformation matrix that rotates an image by theta (in
	# radians) around pivot (in pixels) and translates by trans vector (in
	# pixels)
	pivot_t_image = np.array([[1., 0., -pivot[0]], [0., 1., -pivot[1]],
	[0., 0., 1.]])
	image_t_pivot = np.array([[1., 0., pivot[0]], [0., 1., pivot[1]],
	[0., 0., 1.]])
	transform = np.array([[np.cos(theta), -np.sin(theta), trans[0]],
	[np.sin(theta), np.cos(theta), trans[1]], [0., 0., 1.]])
	return np.dot(image_t_pivot, np.dot(transform, pivot_t_image))


	def check_transform(image, pixel, transform):
	"""Valid transform only if pixel locations are still in FoV after transform."""
	new_pixel = np.flip(
	np.int32(
	np.round(
	np.dot(transform,
	np.float32([pixel[1], pixel[0],
	1.]).reshape(3, 1))))[:2].squeeze())
	valid = np.all(
	new_pixel >= 0
	) and new_pixel[0] < image.shape[0] and new_pixel[1] < image.shape[1]
	return valid, new_pixel


	def get_se3_from_image_transform(theta, trans, pivot, heightmap, bounds,
	pixel_size):
	"""Calculate SE3 from image transform."""
	position_center = pix_to_xyz(
	np.flip(np.int32(np.round(pivot))),
	heightmap,
	bounds,
	pixel_size,
	skip_height=False)
	new_position_center = pix_to_xyz(
	np.flip(np.int32(np.round(pivot + trans))),
	heightmap,
	bounds,
	pixel_size,
	skip_height=True)
	# Don't look up the z height, it might get augmented out of frame
	new_position_center = (new_position_center[0], new_position_center[1],
	position_center[2])

	delta_position = np.array(new_position_center) - np.array(position_center)

	t_world_center = np.eye(4)
	t_world_center[0:3, 3] = np.array(position_center)

	t_centernew_center = np.eye(4)
	euler_zxy = (-theta, 0, 0)
	t_centernew_center[0:3, 0:3] = euler.euler2mat(
	*euler_zxy, axes='szxy')[0:3, 0:3]

	t_centernew_center_tonly = np.eye(4)
	t_centernew_center_tonly[0:3, 3] = -delta_position
	t_centernew_center = t_centernew_center @ t_centernew_center_tonly

	t_world_centernew = t_world_center @ np.linalg.inv(t_centernew_center)
	return t_world_center, t_world_centernew


	def get_random_image_transform_params(image_size, theta_sigma=60):
	theta = np.random.normal(0, np.deg2rad(theta_sigma))

	trans_sigma = np.min(image_size) / 6
	trans = np.random.normal(0, trans_sigma, size=2) # [x, y]
	pivot = (image_size[1] / 2, image_size[0] / 2)
	return theta, trans, pivot


	def q_mult(q1, q2):
	w1, x1, y1, z1 = q1
	w2, x2, y2, z2 = q2
	w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
	x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
	y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2
	z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2
	return (w, x, y, z)

	def perturb(input_image, pixels, theta_sigma=60, add_noise=False):
	"""Data augmentation on images."""
	image_size = input_image.shape[:2]

	# Compute random rigid transform.
	while True:
	theta, trans, pivot = get_random_image_transform_params(image_size, theta_sigma=theta_sigma)
	transform = get_image_transform(theta, trans, pivot)
	transform_params = theta, trans, pivot

	# Ensure pixels remain in the image after transform.
	is_valid = True
	new_pixels = []
	new_rounded_pixels = []
	for pixel in pixels:
	pixel = np.float32([pixel[1], pixel[0], 1.]).reshape(3, 1)

	rounded_pixel = np.int32(np.round(transform @ pixel))[:2].squeeze()
	rounded_pixel = np.flip(rounded_pixel)

	pixel = (transform @ pixel)[:2].squeeze()
	pixel = np.flip(pixel)

	in_fov_rounded = rounded_pixel[0] < image_size[0] and rounded_pixel[
	1] < image_size[1]
	in_fov = pixel[0] < image_size[0] and pixel[1] < image_size[1]

	is_valid = is_valid and np.all(rounded_pixel >= 0) and np.all(
	pixel >= 0) and in_fov_rounded and in_fov

	new_pixels.append(pixel)
	new_rounded_pixels.append(rounded_pixel)
	if is_valid:
	break

	# Apply rigid transform to image and pixel labels.
	input_image = cv2.warpAffine(
	input_image,
	transform[:2, :], (image_size[1], image_size[0]),
	flags=cv2.INTER_LINEAR)

	# Apply noise
	color = np.int32(input_image[:,:,:3])
	depth = np.float32(input_image[:,:,3:])

	if add_noise:
	color += np.int32(np.random.normal(0, 3, image_size + (3,)))
	color = np.uint8(np.clip(color, 0, 255))

	depth += np.float32(np.random.normal(0, 0.003, image_size + (3,)))

	input_image = np.concatenate((color, depth), axis=2)

	# length of 5
	transform_params = np.array([theta, trans[0], trans[1], pivot[0], pivot[1]])
	return input_image, new_pixels, new_rounded_pixels, transform_params


	def apply_perturbation(input_image, transform_params):
	'''Apply data augmentation with specific transform params'''
	image_size = input_image.shape[:2]

	# Apply rigid transform to image and pixel labels.
	theta, trans, pivot = transform_params[0], transform_params[1:3], transform_params[3:5]
	transform = get_image_transform(theta, trans, pivot)

	input_image = cv2.warpAffine(
	input_image,
	transform[:2, :], (image_size[1], image_size[0]),
	flags=cv2.INTER_LINEAR)
	return input_image


	class ImageRotator:
	"""Rotate for n rotations."""
	# Reference: https://kornia.readthedocs.io/en/latest/tutorials/warp_affine.html?highlight=rotate

	def __init__(self, n_rotations):
	self.angles = []
	for i in range(n_rotations):
	theta = i * 2 * 180 / n_rotations
	self.angles.append(theta)

	def __call__(self, x_list, pivot, reverse=False):
	rot_x_list = []
	for i, angle in enumerate(self.angles):
	x = x_list[i]# .unsqueeze(0)
	# create transformation (rotation)
	size = len(x)
	alpha = angle if not reverse else (-1.0 * angle) # in degrees
	angle = torch.ones(size) * alpha

	# define the rotation center
	if type(pivot) is not torch.Tensor:
	center = torch.FloatTensor(pivot)[...,[1,0]]
	center = center.view(1,-1).repeat((size,1))
	else:
	center = pivot[...,[1,0]].view(1,-1).clone().to(angle.device)
	# center: torch.tensor = torch.ones(size, 2)
	# center[..., 0] = int(pivot[1])
	# center[..., 1] = int(pivot[0])

	# define the scale factor
	scale = torch.ones(size, 2)

	# # compute the transformation matrix
	M = kornia.geometry.get_rotation_matrix2d(center, angle, scale)
	# x_warped = torchvision.transforms.functional.affine(x.float(), scale=1.,
	# center=[int(pivot[1]),int(pivot[0])],
	# angle=alpha, translate=[0,0], shear=0,
	# interpolation= torchvision.transforms.InterpolationMode.BILINEAR)


	# apply the transformation to original image
	# M = M.repeat(len(x), 1, 1)
	_, _, h, w = x.shape
	x_warped = kornia.geometry.transform.warp_affine(x.float(), M.to(x.device), dsize=(h, w))
	x_warped = x_warped
	rot_x_list.append(x_warped)

	return rot_x_list

	# KD Tree Utils
	# Construct K-D Tree to roughly estimate how many objects can fit inside the box.
	class TreeNode:

	def __init__(self, parent, children, bbox):
	self.parent = parent
	self.children = children
	self.bbox = bbox # min x, min y, min z, max x, max y, max z

	def KDTree(node, min_object_dim, margin, bboxes):
	size = node.bbox[3:] - node.bbox[:3]

	# Choose which axis to split.
	split = size > 2 * min_object_dim
	if np.sum(split) == 0:
	bboxes.append(node.bbox)
	return
	split = np.float32(split) / np.sum(split)
	split_axis = np.random.choice(range(len(split)), 1, p=split)[0]

	# Split along chosen axis and create 2 children
	cut_ind = np.random.rand() * \
	(size[split_axis] - 2 * min_object_dim) + \
	node.bbox[split_axis] + min_object_dim
	child1_bbox = node.bbox.copy()
	child1_bbox[3 + split_axis] = cut_ind - margin / 2.
	child2_bbox = node.bbox.copy()
	child2_bbox[split_axis] = cut_ind + margin / 2.
	node.children = [
	TreeNode(node, [], bbox=child1_bbox),
	TreeNode(node, [], bbox=child2_bbox)
	]
	KDTree(node.children[0], min_object_dim, margin, bboxes)
	KDTree(node.children[1], min_object_dim, margin, bboxes)

	# -----------------------------------------------------------------------------
	# Shape Name UTILS
	# -----------------------------------------------------------------------------
	google_seen_obj_shapes = {
	'train': [
	'alarm clock',
	'android toy',
	'black boot with leopard print',
	'black fedora',
	'black razer mouse',
	'black sandal',
	'black shoe with orange stripes',
	'bull figure',
	'butterfinger chocolate',
	'c clamp',
	'can opener',
	'crayon box',
	'dog statue',
	'frypan',
	'green and white striped towel',
	'grey soccer shoe with cleats',
	'hard drive',
	'honey dipper',
	'magnifying glass',
	'mario figure',
	'nintendo 3ds',
	'nintendo cartridge',
	'office depot box',
	'orca plush toy',
	'pepsi gold caffeine free box',
	'pepsi wild cherry box',
	'porcelain cup',
	'purple tape',
	'red and white flashlight',
	'rhino figure',
	'rocket racoon figure',
	'scissors',
	'silver tape',
	'spatula with purple head',
	'spiderman figure',
	'tablet',
	'toy school bus',
	],
	'val': [
	'ball puzzle',
	'black and blue sneakers',
	'black shoe with green stripes',
	'brown fedora',
	'dinosaur figure',
	'hammer',
	'light brown boot with golden laces',
	'lion figure',
	'pepsi max box',
	'pepsi next box',
	'porcelain salad plate',
	'porcelain spoon',
	'red and white striped towel',
	'red cup',
	'screwdriver',
	'toy train',
	'unicorn toy',
	'white razer mouse',
	'yoshi figure'
	],
	'test': [
	'ball puzzle',
	'black and blue sneakers',
	'black shoe with green stripes',
	'brown fedora',
	'dinosaur figure',
	'hammer',
	'light brown boot with golden laces',
	'lion figure',
	'pepsi max box',
	'pepsi next box',
	'porcelain salad plate',
	'porcelain spoon',
	'red and white striped towel',
	'red cup',
	'screwdriver',
	'toy train',
	'unicorn toy',
	'white razer mouse',
	'yoshi figure'
	],
	}

	google_unseen_obj_shapes = {
	'train': [
	'alarm clock',
	'android toy',
	'black boot with leopard print',
	'black fedora',
	'black razer mouse',
	'black sandal',
	'black shoe with orange stripes',
	'bull figure',
	'butterfinger chocolate',
	'c clamp',
	'can opener',
	'crayon box',
	'dog statue',
	'frypan',
	'green and white striped towel',
	'grey soccer shoe with cleats',
	'hard drive',
	'honey dipper',
	'magnifying glass',
	'mario figure',
	'nintendo 3ds',
	'nintendo cartridge',
	'office depot box',
	'orca plush toy',
	'pepsi gold caffeine free box',
	'pepsi wild cherry box',
	'porcelain cup',
	'purple tape',
	'red and white flashlight',
	'rhino figure',
	'rocket racoon figure',
	'scissors',
	'silver tape',
	'spatula with purple head',
	'spiderman figure',
	'tablet',
	'toy school bus',
	],
	'val': [
	'ball puzzle',
	'black and blue sneakers',
	'black shoe with green stripes',
	'brown fedora',
	'dinosaur figure',
	'hammer',
	'light brown boot with golden laces',
	'lion figure',
	'pepsi max box',
	'pepsi next box',
	'porcelain salad plate',
	'porcelain spoon',
	'red and white striped towel',
	'red cup',
	'screwdriver',
	'toy train',
	'unicorn toy',
	'white razer mouse',
	'yoshi figure'
	],
	'test': [
	'ball puzzle',
	'black and blue sneakers',
	'black shoe with green stripes',
	'brown fedora',
	'dinosaur figure',
	'hammer',
	'light brown boot with golden laces',
	'lion figure',
	'pepsi max box',
	'pepsi next box',
	'porcelain salad plate',
	'porcelain spoon',
	'red and white striped towel',
	'red cup',
	'screwdriver',
	'toy train',
	'unicorn toy',
	'white razer mouse',
	'yoshi figure'
	],
	}

	google_all_shapes = {
	'train': [
	'alarm clock',
	'android toy',
	'ball puzzle',
	'black and blue sneakers',
	'black boot with leopard print',
	'black fedora',
	'black razer mouse',
	'black sandal',
	'black shoe with green stripes',
	'black shoe with orange stripes',
	'brown fedora',
	'bull figure',
	'butterfinger chocolate',
	'c clamp',
	'can opener',
	'crayon box',
	'dinosaur figure',
	'dog statue',
	'frypan',
	'green and white striped towel',
	'grey soccer shoe with cleats',
	'hammer',
	'hard drive',
	'honey dipper',
	'light brown boot with golden laces',
	'lion figure',
	'magnifying glass',
	'mario figure',
	'nintendo 3ds',
	'nintendo cartridge',
	'office depot box',
	'orca plush toy',
	'pepsi gold caffeine free box',
	'pepsi max box',
	'pepsi next box',
	'pepsi wild cherry box',
	'porcelain cup',
	'porcelain salad plate',
	'porcelain spoon',
	'purple tape',
	'red and white flashlight',
	'red and white striped towel',
	'red cup',
	'rhino figure',
	'rocket racoon figure',
	'scissors',
	'screwdriver',
	'silver tape',
	'spatula with purple head',
	'spiderman figure',
	'tablet',
	'toy school bus',
	'toy train',
	'unicorn toy',
	'white razer mouse',
	'yoshi figure',
	],
	'val': [
	'alarm clock',
	'android toy',
	'ball puzzle',
	'black and blue sneakers',
	'black boot with leopard print',
	'black fedora',
	'black razer mouse',
	'black sandal',
	'black shoe with green stripes',
	'black shoe with orange stripes',
	'brown fedora',
	'bull figure',
	'butterfinger chocolate',
	'c clamp',
	'can opener',
	'crayon box',
	'dinosaur figure',
	'dog statue',
	'frypan',
	'green and white striped towel',
	'grey soccer shoe with cleats',
	'hammer',
	'hard drive',
	'honey dipper',
	'light brown boot with golden laces',
	'lion figure',
	'magnifying glass',
	'mario figure',
	'nintendo 3ds',
	'nintendo cartridge',
	'office depot box',
	'orca plush toy',
	'pepsi gold caffeine free box',
	'pepsi max box',
	'pepsi next box',
	'pepsi wild cherry box',
	'porcelain cup',
	'porcelain salad plate',
	'porcelain spoon',
	'purple tape',
	'red and white flashlight',
	'red and white striped towel',
	'red cup',
	'rhino figure',
	'rocket racoon figure',
	'scissors',
	'screwdriver',
	'silver tape',
	'spatula with purple head',
	'spiderman figure',
	'tablet',
	'toy school bus',
	'toy train',
	'unicorn toy',
	'white razer mouse',
	'yoshi figure',
	],
	'test': [
	'alarm clock',
	'android toy',
	'ball puzzle',
	'black and blue sneakers',
	'black boot with leopard print',
	'black fedora',
	'black razer mouse',
	'black sandal',
	'black shoe with green stripes',
	'black shoe with orange stripes',
	'brown fedora',
	'bull figure',
	'butterfinger chocolate',
	'c clamp',
	'can opener',
	'crayon box',
	'dinosaur figure',
	'dog statue',
	'frypan',
	'green and white striped towel',
	'grey soccer shoe with cleats',
	'hammer',
	'hard drive',
	'honey dipper',
	'light brown boot with golden laces',
	'lion figure',
	'magnifying glass',
	'mario figure',
	'nintendo 3ds',
	'nintendo cartridge',
	'office depot box',
	'orca plush toy',
	'pepsi gold caffeine free box',
	'pepsi max box',
	'pepsi next box',
	'pepsi wild cherry box',
	'porcelain cup',
	'porcelain salad plate',
	'porcelain spoon',
	'purple tape',
	'red and white flashlight',
	'red and white striped towel',
	'red cup',
	'rhino figure',
	'rocket racoon figure',
	'scissors',
	'screwdriver',
	'silver tape',
	'spatula with purple head',
	'spiderman figure',
	'tablet',
	'toy school bus',
	'toy train',
	'unicorn toy',
	'white razer mouse',
	'yoshi figure',
	],
	}
	assembling_kit_shapes = {
	0: "letter R shape",
	1: "letter A shape",
	2: "triangle",
	3: "square",
	4: "plus",
	5: "letter T shape",
	6: "diamond",
	7: "pentagon",
	8: "rectangle",
	9: "flower",
	10: "star",
	11: "circle",
	12: "letter G shape",
	13: "letter V shape",
	14: "letter E shape",
	15: "letter L shape",
	16: "ring",
	17: "hexagon",
	18: "heart",
	19: "letter M shape",
	}

	# -----------------------------------------------------------------------------
	# COLOR AND PLOT UTILS
	# -----------------------------------------------------------------------------


	# Colors (Tableau palette).
	COLORS = {
	'blue': [78.0 / 255.0, 121.0 / 255.0, 167.0 / 255.0],
	'red': [255.0 / 255.0, 087.0 / 255.0, 089.0 / 255.0],
	'green': [089.0 / 255.0, 169.0 / 255.0, 078.0 / 255.0],
	'orange': [242.0 / 255.0, 142.0 / 255.0, 043.0 / 255.0],
	'yellow': [237.0 / 255.0, 201.0 / 255.0, 072.0 / 255.0],
	'purple': [176.0 / 255.0, 122.0 / 255.0, 161.0 / 255.0],
	'pink': [255.0 / 255.0, 157.0 / 255.0, 167.0 / 255.0],
	'cyan': [118.0 / 255.0, 183.0 / 255.0, 178.0 / 255.0],
	'brown': [156.0 / 255.0, 117.0 / 255.0, 095.0 / 255.0],
	'white': [255.0 / 255.0, 255.0 / 255.0, 255.0 / 255.0],
	'gray': [186.0 / 255.0, 176.0 / 255.0, 172.0 / 255.0],
	'indigo': [75.0 / 255.0, 0.0 / 255.0, 130.0 / 255.0],
	'violet': [143.0 / 255.0, 0.0 / 255.0, 255.0 / 255.0],
	'black': [0.0 / 255.0, 0.0 / 255.0, 0.0 / 255.0],
	'silver': [192.0 / 255.0, 192.0 / 255.0, 192.0 / 255.0],
	'gold': [255.0 / 255.0, 215.0 / 255.0, 0.0 / 255.0],

	}

	COLORS_NAMES = list(COLORS.keys())
	TRAIN_COLORS = ['blue', 'red', 'green', 'yellow', 'brown', 'gray', 'cyan']
	EVAL_COLORS = ['blue', 'red', 'green', 'orange', 'purple', 'pink', 'white']


	def get_colors(mode, n_colors=-1, **kwargs):
	all_color_names = get_colors_names(mode)

	if n_colors == -1:
	all_color_names = all_color_names
	else:
	all_color_names = random.sample(all_color_names, n_colors)
	return [COLORS[cn] for cn in all_color_names], all_color_names

	def get_colors_names(mode):
	if mode == 'train':
	return TRAIN_COLORS
	elif mode == 'full':
	return TRAIN_COLORS
	else:
	return TRAIN_COLORS

	def get_random_color():
	return get_colors(mode='train', n_colors=1)

	def solve_hanoi_all(n_disks):
	# Solve Hanoi sequence with dynamic programming.
	hanoi_steps = [] # [[object index, from rod, to rod], ...]

	def solve_hanoi(n, t0, t1, t2):
	if n == 0:
	hanoi_steps.append([n, t0, t1])
	return
	solve_hanoi(n - 1, t0, t2, t1)
	hanoi_steps.append([n, t0, t1])
	solve_hanoi(n - 1, t2, t1, t0)

	solve_hanoi(n_disks - 1, 0, 2, 1)
	return hanoi_steps

	def plot(fname, # pylint: disable=dangerous-default-value
	title,
	ylabel,
	xlabel,
	data,
	xlim=[-np.inf, 0],
	xticks=None,
	ylim=[np.inf, -np.inf],
	show_std=True):
	"""Plot frame data."""
	# Data is a dictionary that maps experiment names to tuples with 3
	# elements: x (size N array) and y (size N array) and y_std (size N array)

	# Get data limits.
	for name, (x, y, _) in data.items():
	del name
	y = np.array(y)
	xlim[0] = max(xlim[0], np.min(x))
	xlim[1] = max(xlim[1], np.max(x))
	ylim[0] = min(ylim[0], np.min(y))
	ylim[1] = max(ylim[1], np.max(y))

	# Draw background.
	plt.title(title, fontsize=14)
	plt.ylim(ylim)
	plt.ylabel(ylabel, fontsize=14)
	plt.yticks(fontsize=14)
	plt.xlim(xlim)
	plt.xlabel(xlabel, fontsize=14)
	plt.grid(True, linestyle='-', color=[0.8, 0.8, 0.8])
	ax = plt.gca()
	for axis in ['top', 'bottom', 'left', 'right']:
	ax.spines[axis].set_color('#000000')
	plt.rcParams.update({'font.size': 14})
	plt.rcParams['mathtext.default'] = 'regular'
	matplotlib.rcParams['pdf.fonttype'] = 42
	matplotlib.rcParams['ps.fonttype'] = 42

	# Draw data.
	color_iter = 0
	for name, (x, y, std) in data.items():
	del name
	x, y, std = np.float32(x), np.float32(y), np.float32(std)
	upper = np.clip(y + std, ylim[0], ylim[1])
	lower = np.clip(y - std, ylim[0], ylim[1])
	color = COLORS[list(COLORS.keys())[color_iter]]
	if show_std:
	plt.fill_between(x, upper, lower, color=color, linewidth=0, alpha=0.3)
	plt.plot(x, y, color=color, linewidth=2, marker='o', alpha=1.)
	color_iter += 1

	if xticks:
	plt.xticks(ticks=range(len(xticks)), labels=xticks, fontsize=14)
	else:
	plt.xticks(fontsize=14)
	plt.legend([name for name, _ in data.items()],
	loc='lower right', fontsize=14)
	plt.tight_layout()
	plt.savefig(fname)
	plt.clf()


	# -----------------------------------------------------------------------------
	# MESHCAT UTILS
	# -----------------------------------------------------------------------------

	def create_visualizer(clear=True):
	print('Waiting for meshcat server... have you started a server?')
	vis = meshcat.Visualizer(zmq_url='tcp://127.0.0.1:6000')
	if clear:
	vis.delete()
	return vis


	def make_frame(vis, name, h, radius, o=1.0):
	"""Add a red-green-blue triad to the Meschat visualizer.

	Args:
	vis (MeshCat Visualizer): the visualizer
	name (string): name for this frame (should be unique)
	h (float): height of frame visualization
	radius (float): radius of frame visualization
	o (float): opacity
	"""
	vis[name]['x'].set_object(
	g.Cylinder(height=h, radius=radius),
	g.MeshLambertMaterial(color=0xff0000, reflectivity=0.8, opacity=o))
	rotate_x = mtf.rotation_matrix(np.pi / 2.0, [0, 0, 1])
	rotate_x[0, 3] = h / 2
	vis[name]['x'].set_transform(rotate_x)

	vis[name]['y'].set_object(
	g.Cylinder(height=h, radius=radius),
	g.MeshLambertMaterial(color=0x00ff00, reflectivity=0.8, opacity=o))
	rotate_y = mtf.rotation_matrix(np.pi / 2.0, [0, 1, 0])
	rotate_y[1, 3] = h / 2
	vis[name]['y'].set_transform(rotate_y)

	vis[name]['z'].set_object(
	g.Cylinder(height=h, radius=radius),
	g.MeshLambertMaterial(color=0x0000ff, reflectivity=0.8, opacity=o))
	rotate_z = mtf.rotation_matrix(np.pi / 2.0, [1, 0, 0])
	rotate_z[2, 3] = h / 2
	vis[name]['z'].set_transform(rotate_z)


	def meshcat_visualize(vis, obs, act, info):
	"""Visualize data using meshcat."""

	for key in sorted(info.keys()):
	pose = info[key]
	pick_transform = np.eye(4)
	pick_transform[0:3, 3] = pose[0]
	quaternion_wxyz = np.asarray(
	[pose[1][3], pose[1][0], pose[1][1], pose[1][2]])
	pick_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3]
	label = 'obj_' + str(key)
	make_frame(vis, label, h=0.05, radius=0.0012, o=1.0)
	vis[label].set_transform(pick_transform)

	for cam_index in range(len(act['camera_config'])):
	verts = unproject_depth_vectorized(
	obs['depth'][cam_index], np.array([0, 1]),
	np.array(act['camera_config'][cam_index]['intrinsics']).reshape(3, 3),
	np.zeros(5))

	# switch from [N,3] to [3,N]
	verts = verts.T

	cam_transform = np.eye(4)
	cam_transform[0:3, 3] = act['camera_config'][cam_index]['position']
	quaternion_xyzw = act['camera_config'][cam_index]['rotation']
	quaternion_wxyz = np.asarray([
	quaternion_xyzw[3], quaternion_xyzw[0], quaternion_xyzw[1],
	quaternion_xyzw[2]
	])
	cam_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3]
	verts = apply_transform(cam_transform, verts)

	colors = obs['color'][cam_index].reshape(-1, 3).T / 255.0

	vis['pointclouds/' + str(cam_index)].set_object(
	g.PointCloud(position=verts, color=colors))


	# -----------------------------------------------------------------------------
	# CONFIG UTILS
	# -----------------------------------------------------------------------------

	def set_seed(seed, torch=False):
	random.seed(seed)
	os.environ['PYTHONHASHSEED'] = str(seed)
	np.random.seed(seed)

	if torch:
	import torch
	torch.manual_seed(seed)


	def load_cfg(yaml_path):
	with open(yaml_path, 'r') as f:
	data = yaml.safe_load(f)
	return data


	def load_hydra_config(config_path):
	return OmegaConf.load(config_path)