|
"""Miscellaneous utilities.""" |
|
|
|
import cv2 |
|
import random |
|
import matplotlib |
|
import matplotlib.pyplot as plt |
|
import meshcat |
|
import meshcat.geometry as g |
|
import meshcat.transformations as mtf |
|
|
|
import PIL |
|
import yaml |
|
import numpy as np |
|
from transforms3d import euler |
|
|
|
import pybullet as p |
|
import kornia |
|
from omegaconf import OmegaConf |
|
|
|
import os |
|
import torch |
|
import torchvision |
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_heightmap(points, colors, bounds, pixel_size): |
|
"""Get top-down (z-axis) orthographic heightmap image from 3D pointcloud. |
|
|
|
Args: |
|
points: HxWx3 float array of 3D points in world coordinates. |
|
colors: HxWx3 uint8 array of values in range 0-255 aligned with points. |
|
bounds: 3x2 float array of values (rows: X,Y,Z; columns: min,max) defining |
|
region in 3D space to generate heightmap in world coordinates. |
|
pixel_size: float defining size of each pixel in meters. |
|
|
|
Returns: |
|
heightmap: HxW float array of height (from lower z-bound) in meters. |
|
colormap: HxWx3 uint8 array of backprojected color aligned with heightmap. |
|
""" |
|
width = int(np.round((bounds[0, 1] - bounds[0, 0]) / pixel_size)) |
|
height = int(np.round((bounds[1, 1] - bounds[1, 0]) / pixel_size)) |
|
heightmap = np.zeros((height, width), dtype=np.float32) |
|
colormap = np.zeros((height, width, colors.shape[-1]), dtype=np.uint8) |
|
|
|
|
|
ix = (points[Ellipsis, 0] >= bounds[0, 0]) & (points[Ellipsis, 0] < bounds[0, 1]) |
|
iy = (points[Ellipsis, 1] >= bounds[1, 0]) & (points[Ellipsis, 1] < bounds[1, 1]) |
|
iz = (points[Ellipsis, 2] >= bounds[2, 0]) & (points[Ellipsis, 2] < bounds[2, 1]) |
|
valid = ix & iy & iz |
|
points = points[valid] |
|
colors = colors[valid] |
|
|
|
|
|
|
|
iz = np.argsort(points[:, -1]) |
|
points, colors = points[iz], colors[iz] |
|
px = np.int32(np.floor((points[:, 0] - bounds[0, 0]) / pixel_size)) |
|
py = np.int32(np.floor((points[:, 1] - bounds[1, 0]) / pixel_size)) |
|
px = np.clip(px, 0, width - 1) |
|
py = np.clip(py, 0, height - 1) |
|
heightmap[py, px] = points[:, 2] - bounds[2, 0] |
|
for c in range(colors.shape[-1]): |
|
colormap[py, px, c] = colors[:, c] |
|
return heightmap, colormap |
|
|
|
|
|
def get_pointcloud(depth, intrinsics): |
|
"""Get 3D pointcloud from perspective depth image. |
|
|
|
Args: |
|
depth: HxW float array of perspective depth in meters. |
|
intrinsics: 3x3 float array of camera intrinsics matrix. |
|
|
|
Returns: |
|
points: HxWx3 float array of 3D points in camera coordinates. |
|
""" |
|
height, width = depth.shape |
|
xlin = np.linspace(0, width - 1, width) |
|
ylin = np.linspace(0, height - 1, height) |
|
px, py = np.meshgrid(xlin, ylin) |
|
px = (px - intrinsics[0, 2]) * (depth / intrinsics[0, 0]) |
|
py = (py - intrinsics[1, 2]) * (depth / intrinsics[1, 1]) |
|
points = np.float32([px, py, depth]).transpose(1, 2, 0) |
|
return points |
|
|
|
|
|
def transform_pointcloud(points, transform): |
|
"""Apply rigid transformation to 3D pointcloud. |
|
|
|
Args: |
|
points: HxWx3 float array of 3D points in camera coordinates. |
|
transform: 4x4 float array representing a rigid transformation matrix. |
|
|
|
Returns: |
|
points: HxWx3 float array of transformed 3D points. |
|
""" |
|
padding = ((0, 0), (0, 0), (0, 1)) |
|
homogen_points = np.pad(points.copy(), padding, |
|
'constant', constant_values=1) |
|
for i in range(3): |
|
points[Ellipsis, i] = np.sum(transform[i, :] * homogen_points, axis=-1) |
|
return points |
|
|
|
|
|
def reconstruct_heightmaps(color, depth, configs, bounds, pixel_size): |
|
"""Reconstruct top-down heightmap views from multiple 3D pointclouds.""" |
|
heightmaps, colormaps = [], [] |
|
for color, depth, config in zip(color, depth, configs): |
|
intrinsics = np.array(config['intrinsics']).reshape(3, 3) |
|
xyz = get_pointcloud(depth, intrinsics) |
|
position = np.array(config['position']).reshape(3, 1) |
|
rotation = p.getMatrixFromQuaternion(config['rotation']) |
|
rotation = np.array(rotation).reshape(3, 3) |
|
transform = np.eye(4) |
|
transform[:3, :] = np.hstack((rotation, position)) |
|
xyz = transform_pointcloud(xyz, transform) |
|
heightmap, colormap = get_heightmap(xyz, color, bounds, pixel_size) |
|
heightmaps.append(heightmap) |
|
colormaps.append(colormap) |
|
return heightmaps, colormaps |
|
|
|
|
|
def pix_to_xyz(pixel, height, bounds, pixel_size, skip_height=False): |
|
"""Convert from pixel location on heightmap to 3D position.""" |
|
u, v = pixel |
|
x = bounds[0, 0] + v * pixel_size |
|
y = bounds[1, 0] + u * pixel_size |
|
if not skip_height: |
|
z = bounds[2, 0] + height[u, v] |
|
else: |
|
z = 0.0 |
|
return (x, y, z) |
|
|
|
|
|
def xyz_to_pix(position, bounds, pixel_size): |
|
"""Convert from 3D position to pixel location on heightmap.""" |
|
u = int(np.round((position[1] - bounds[1, 0]) / pixel_size)) |
|
v = int(np.round((position[0] - bounds[0, 0]) / pixel_size)) |
|
return (u, v) |
|
|
|
|
|
def unproject_vectorized(uv_coordinates, depth_values, |
|
intrinsic, |
|
distortion): |
|
"""Vectorized version of unproject(), for N points. |
|
|
|
Args: |
|
uv_coordinates: pixel coordinates to unproject of shape (n, 2). |
|
depth_values: depth values corresponding index-wise to the uv_coordinates of |
|
shape (n). |
|
intrinsic: array of shape (3, 3). This is typically the return value of |
|
intrinsics_to_matrix. |
|
distortion: camera distortion parameters of shape (5,). |
|
|
|
Returns: |
|
xyz coordinates in camera frame of shape (n, 3). |
|
""" |
|
cam_mtx = intrinsic |
|
cam_dist = np.array(distortion) |
|
|
|
|
|
points_undistorted = cv2.undistortPoints( |
|
uv_coordinates.reshape((-1, 1, 2)), cam_mtx, cam_dist).squeeze() |
|
|
|
x = points_undistorted[:, 0] * depth_values |
|
y = points_undistorted[:, 1] * depth_values |
|
|
|
xyz = np.vstack((x, y, depth_values)).T |
|
return xyz |
|
|
|
|
|
def unproject_depth_vectorized(im_depth, depth_dist, |
|
camera_mtx, |
|
camera_dist): |
|
"""Unproject depth image into 3D point cloud, using calibration. |
|
|
|
Args: |
|
im_depth: raw depth image, pre-calibration of shape (height, width). |
|
depth_dist: depth distortion parameters of shape (8,) |
|
camera_mtx: intrinsics matrix of shape (3, 3). This is typically the return |
|
value of intrinsics_to_matrix. |
|
camera_dist: camera distortion parameters shape (5,). |
|
|
|
Returns: |
|
numpy array of shape [3, H*W]. each column is xyz coordinates |
|
""" |
|
h, w = im_depth.shape |
|
|
|
|
|
u_map, v_map = np.meshgrid(np.linspace( |
|
0, w - 1, w), np.linspace(0, h - 1, h)) |
|
|
|
adjusted_depth = depth_dist[0] + im_depth * depth_dist[1] |
|
|
|
|
|
uv_coordinates = np.stack((u_map.reshape(-1), v_map.reshape(-1)), axis=-1) |
|
|
|
return unproject_vectorized(uv_coordinates, adjusted_depth.reshape(-1), |
|
camera_mtx, camera_dist) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def sample_distribution(prob, n_samples=1): |
|
"""Sample data point from a custom distribution.""" |
|
flat_prob = prob.flatten() / np.sum(prob) |
|
rand_ind = np.random.choice( |
|
np.arange(len(flat_prob)), n_samples, p=flat_prob, replace=False) |
|
rand_ind_coords = np.array(np.unravel_index(rand_ind, prob.shape)).T |
|
return np.int32(rand_ind_coords.squeeze()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def invert(pose): |
|
return p.invertTransform(pose[0], pose[1]) |
|
|
|
|
|
def multiply(pose0, pose1): |
|
return p.multiplyTransforms(pose0[0], pose0[1], pose1[0], pose1[1]) |
|
|
|
|
|
def apply(pose, position): |
|
position = np.float32(position) |
|
position_shape = position.shape |
|
position = np.float32(position).reshape(3, -1) |
|
rotation = np.float32(p.getMatrixFromQuaternion(pose[1])).reshape(3, 3) |
|
translation = np.float32(pose[0]).reshape(3, 1) |
|
position = rotation @ position + translation |
|
return tuple(position.reshape(position_shape)) |
|
|
|
|
|
def eulerXYZ_to_quatXYZW(rotation): |
|
"""Abstraction for converting from a 3-parameter rotation to quaterion. |
|
|
|
This will help us easily switch which rotation parameterization we use. |
|
Quaternion should be in xyzw order for pybullet. |
|
|
|
Args: |
|
rotation: a 3-parameter rotation, in xyz order tuple of 3 floats |
|
|
|
Returns: |
|
quaternion, in xyzw order, tuple of 4 floats |
|
""" |
|
euler_zxy = (rotation[2], rotation[0], rotation[1]) |
|
quaternion_wxyz = euler.euler2quat(*euler_zxy, axes='szxy') |
|
q = quaternion_wxyz |
|
quaternion_xyzw = (q[1], q[2], q[3], q[0]) |
|
return quaternion_xyzw |
|
|
|
|
|
def quatXYZW_to_eulerXYZ(quaternion_xyzw): |
|
"""Abstraction for converting from quaternion to a 3-parameter toation. |
|
|
|
This will help us easily switch which rotation parameterization we use. |
|
Quaternion should be in xyzw order for pybullet. |
|
|
|
Args: |
|
quaternion_xyzw: in xyzw order, tuple of 4 floats |
|
|
|
Returns: |
|
rotation: a 3-parameter rotation, in xyz order, tuple of 3 floats |
|
""" |
|
q = quaternion_xyzw |
|
quaternion_wxyz = np.array([q[3], q[0], q[1], q[2]]) |
|
euler_zxy = euler.quat2euler(quaternion_wxyz, axes='szxy') |
|
euler_xyz = (euler_zxy[1], euler_zxy[2], euler_zxy[0]) |
|
return euler_xyz |
|
|
|
|
|
def apply_transform(transform_to_from, points_from): |
|
r"""Transforms points (3D) into new frame. |
|
|
|
Using transform_to_from notation. |
|
|
|
Args: |
|
transform_to_from: numpy.ndarray of shape [B,4,4], SE3 |
|
points_from: numpy.ndarray of shape [B,3,N] |
|
|
|
Returns: |
|
points_to: numpy.ndarray of shape [B,3,N] |
|
""" |
|
num_points = points_from.shape[-1] |
|
|
|
|
|
if len(transform_to_from.shape) == 2: |
|
ones = np.ones((1, num_points)) |
|
|
|
|
|
points_from = np.vstack((points_from, ones)) |
|
points_to = transform_to_from @ points_from |
|
return points_to[0:3, :] |
|
|
|
|
|
else: |
|
assert len(transform_to_from.shape) == 3 |
|
batch_size = transform_to_from.shape[0] |
|
zeros = np.ones((batch_size, 1, num_points)) |
|
points_from = np.concatenate((points_from, zeros), axis=1) |
|
assert points_from.shape[1] == 4 |
|
points_to = transform_to_from @ points_from |
|
return points_to[:, 0:3, :] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess(img, dist='transporter'): |
|
"""Pre-process input (subtract mean, divide by std).""" |
|
|
|
transporter_color_mean = [0.18877631, 0.18877631, 0.18877631] |
|
transporter_color_std = [0.07276466, 0.07276466, 0.07276466] |
|
transporter_depth_mean = 0.00509261 |
|
transporter_depth_std = 0.00903967 |
|
|
|
franka_color_mean = [0.622291933, 0.628313992, 0.623031488] |
|
franka_color_std = [0.168154213, 0.17626014, 0.184527364] |
|
franka_depth_mean = 0.872146842 |
|
franka_depth_std = 0.195743116 |
|
|
|
clip_color_mean = [0.48145466, 0.4578275, 0.40821073] |
|
clip_color_std = [0.26862954, 0.26130258, 0.27577711] |
|
|
|
|
|
if dist == 'clip': |
|
color_mean = clip_color_mean |
|
color_std = clip_color_std |
|
elif dist == 'mdetr': |
|
color_mean = [0.485, 0.456, 0.406] |
|
color_std = [0.229, 0.224, 0.225] |
|
elif dist == 'franka': |
|
color_mean = franka_color_mean |
|
color_std = franka_color_std |
|
else: |
|
color_mean = transporter_color_mean |
|
color_std = transporter_color_std |
|
|
|
if dist == 'franka': |
|
depth_mean = franka_depth_mean |
|
depth_std = franka_depth_std |
|
else: |
|
depth_mean = transporter_depth_mean |
|
depth_std = transporter_depth_std |
|
|
|
|
|
if type(img) == torch.Tensor: |
|
def cast_shape(stat, img): |
|
tensor = torch.from_numpy(np.array(stat)).to(device=img.device, dtype=img.dtype) |
|
tensor = tensor.unsqueeze(0).unsqueeze(-1).unsqueeze(-1) |
|
tensor = tensor.repeat(img.shape[0], 1, img.shape[-2], img.shape[-1]) |
|
return tensor |
|
|
|
color_mean = cast_shape(color_mean, img) |
|
color_std = cast_shape(color_std, img) |
|
depth_mean = cast_shape(depth_mean, img) |
|
depth_std = cast_shape(depth_std, img) |
|
|
|
|
|
img = img.clone() |
|
img[:, :3, :, :] = ((img[:, :3, :, :] / 255 - color_mean) / color_std) |
|
img[:, 3:, :, :] = ((img[:, 3:, :, :] - depth_mean) / depth_std) |
|
else: |
|
|
|
img[:, :, :3] = (img[:, :, :3] / 255 - color_mean) / color_std |
|
img[:, :, 3:] = (img[:, :, 3:] - depth_mean) / depth_std |
|
|
|
|
|
|
|
|
|
|
|
return img |
|
|
|
def map_kit_scale(scale): |
|
return (scale[0] / 10, scale[1] / 10, scale[2] / 10) |
|
|
|
def deprocess(img): |
|
color_mean = 0.18877631 |
|
depth_mean = 0.00509261 |
|
color_std = 0.07276466 |
|
depth_std = 0.00903967 |
|
|
|
img[:, :, :3] = np.uint8(((img[:, :, :3] * color_std) + color_mean) * 255) |
|
img[:, :, 3:] = np.uint8(((img[:, :, 3:] * depth_std) + depth_mean) * 255) |
|
return img |
|
|
|
|
|
def get_fused_heightmap(obs, configs, bounds, pix_size): |
|
"""Reconstruct orthographic heightmaps with segmentation masks.""" |
|
heightmaps, colormaps = reconstruct_heightmaps( |
|
obs['color'], obs['depth'], configs, bounds, pix_size) |
|
colormaps = np.float32(colormaps) |
|
heightmaps = np.float32(heightmaps) |
|
|
|
|
|
valid = np.sum(colormaps, axis=3) > 0 |
|
repeat = np.sum(valid, axis=0) |
|
repeat[repeat == 0] = 1 |
|
cmap = np.sum(colormaps, axis=0) / repeat[Ellipsis, None] |
|
cmap = np.uint8(np.round(cmap)) |
|
hmap = np.max(heightmaps, axis=0) |
|
return cmap, hmap |
|
|
|
|
|
def get_image_transform(theta, trans, pivot=(0, 0)): |
|
"""Compute composite 2D rigid transformation matrix.""" |
|
|
|
|
|
|
|
pivot_t_image = np.array([[1., 0., -pivot[0]], [0., 1., -pivot[1]], |
|
[0., 0., 1.]]) |
|
image_t_pivot = np.array([[1., 0., pivot[0]], [0., 1., pivot[1]], |
|
[0., 0., 1.]]) |
|
transform = np.array([[np.cos(theta), -np.sin(theta), trans[0]], |
|
[np.sin(theta), np.cos(theta), trans[1]], [0., 0., 1.]]) |
|
return np.dot(image_t_pivot, np.dot(transform, pivot_t_image)) |
|
|
|
|
|
def check_transform(image, pixel, transform): |
|
"""Valid transform only if pixel locations are still in FoV after transform.""" |
|
new_pixel = np.flip( |
|
np.int32( |
|
np.round( |
|
np.dot(transform, |
|
np.float32([pixel[1], pixel[0], |
|
1.]).reshape(3, 1))))[:2].squeeze()) |
|
valid = np.all( |
|
new_pixel >= 0 |
|
) and new_pixel[0] < image.shape[0] and new_pixel[1] < image.shape[1] |
|
return valid, new_pixel |
|
|
|
|
|
def get_se3_from_image_transform(theta, trans, pivot, heightmap, bounds, |
|
pixel_size): |
|
"""Calculate SE3 from image transform.""" |
|
position_center = pix_to_xyz( |
|
np.flip(np.int32(np.round(pivot))), |
|
heightmap, |
|
bounds, |
|
pixel_size, |
|
skip_height=False) |
|
new_position_center = pix_to_xyz( |
|
np.flip(np.int32(np.round(pivot + trans))), |
|
heightmap, |
|
bounds, |
|
pixel_size, |
|
skip_height=True) |
|
|
|
new_position_center = (new_position_center[0], new_position_center[1], |
|
position_center[2]) |
|
|
|
delta_position = np.array(new_position_center) - np.array(position_center) |
|
|
|
t_world_center = np.eye(4) |
|
t_world_center[0:3, 3] = np.array(position_center) |
|
|
|
t_centernew_center = np.eye(4) |
|
euler_zxy = (-theta, 0, 0) |
|
t_centernew_center[0:3, 0:3] = euler.euler2mat( |
|
*euler_zxy, axes='szxy')[0:3, 0:3] |
|
|
|
t_centernew_center_tonly = np.eye(4) |
|
t_centernew_center_tonly[0:3, 3] = -delta_position |
|
t_centernew_center = t_centernew_center @ t_centernew_center_tonly |
|
|
|
t_world_centernew = t_world_center @ np.linalg.inv(t_centernew_center) |
|
return t_world_center, t_world_centernew |
|
|
|
|
|
def get_random_image_transform_params(image_size, theta_sigma=60): |
|
theta = np.random.normal(0, np.deg2rad(theta_sigma)) |
|
|
|
trans_sigma = np.min(image_size) / 6 |
|
trans = np.random.normal(0, trans_sigma, size=2) |
|
pivot = (image_size[1] / 2, image_size[0] / 2) |
|
return theta, trans, pivot |
|
|
|
|
|
def q_mult(q1, q2): |
|
w1, x1, y1, z1 = q1 |
|
w2, x2, y2, z2 = q2 |
|
w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 |
|
x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 |
|
y = w1 * y2 + y1 * w2 + z1 * x2 - x1 * z2 |
|
z = w1 * z2 + z1 * w2 + x1 * y2 - y1 * x2 |
|
return (w, x, y, z) |
|
|
|
def perturb(input_image, pixels, theta_sigma=60, add_noise=False): |
|
"""Data augmentation on images.""" |
|
image_size = input_image.shape[:2] |
|
|
|
|
|
while True: |
|
theta, trans, pivot = get_random_image_transform_params(image_size, theta_sigma=theta_sigma) |
|
transform = get_image_transform(theta, trans, pivot) |
|
transform_params = theta, trans, pivot |
|
|
|
|
|
is_valid = True |
|
new_pixels = [] |
|
new_rounded_pixels = [] |
|
for pixel in pixels: |
|
pixel = np.float32([pixel[1], pixel[0], 1.]).reshape(3, 1) |
|
|
|
rounded_pixel = np.int32(np.round(transform @ pixel))[:2].squeeze() |
|
rounded_pixel = np.flip(rounded_pixel) |
|
|
|
pixel = (transform @ pixel)[:2].squeeze() |
|
pixel = np.flip(pixel) |
|
|
|
in_fov_rounded = rounded_pixel[0] < image_size[0] and rounded_pixel[ |
|
1] < image_size[1] |
|
in_fov = pixel[0] < image_size[0] and pixel[1] < image_size[1] |
|
|
|
is_valid = is_valid and np.all(rounded_pixel >= 0) and np.all( |
|
pixel >= 0) and in_fov_rounded and in_fov |
|
|
|
new_pixels.append(pixel) |
|
new_rounded_pixels.append(rounded_pixel) |
|
if is_valid: |
|
break |
|
|
|
|
|
input_image = cv2.warpAffine( |
|
input_image, |
|
transform[:2, :], (image_size[1], image_size[0]), |
|
flags=cv2.INTER_LINEAR) |
|
|
|
|
|
color = np.int32(input_image[:,:,:3]) |
|
depth = np.float32(input_image[:,:,3:]) |
|
|
|
if add_noise: |
|
color += np.int32(np.random.normal(0, 3, image_size + (3,))) |
|
color = np.uint8(np.clip(color, 0, 255)) |
|
|
|
depth += np.float32(np.random.normal(0, 0.003, image_size + (3,))) |
|
|
|
input_image = np.concatenate((color, depth), axis=2) |
|
|
|
|
|
transform_params = np.array([theta, trans[0], trans[1], pivot[0], pivot[1]]) |
|
return input_image, new_pixels, new_rounded_pixels, transform_params |
|
|
|
|
|
def apply_perturbation(input_image, transform_params): |
|
'''Apply data augmentation with specific transform params''' |
|
image_size = input_image.shape[:2] |
|
|
|
|
|
theta, trans, pivot = transform_params[0], transform_params[1:3], transform_params[3:5] |
|
transform = get_image_transform(theta, trans, pivot) |
|
|
|
input_image = cv2.warpAffine( |
|
input_image, |
|
transform[:2, :], (image_size[1], image_size[0]), |
|
flags=cv2.INTER_LINEAR) |
|
return input_image |
|
|
|
|
|
class ImageRotator: |
|
"""Rotate for n rotations.""" |
|
|
|
|
|
def __init__(self, n_rotations): |
|
self.angles = [] |
|
for i in range(n_rotations): |
|
theta = i * 2 * 180 / n_rotations |
|
self.angles.append(theta) |
|
|
|
def __call__(self, x_list, pivot, reverse=False): |
|
rot_x_list = [] |
|
for i, angle in enumerate(self.angles): |
|
x = x_list[i] |
|
|
|
size = len(x) |
|
alpha = angle if not reverse else (-1.0 * angle) |
|
angle = torch.ones(size) * alpha |
|
|
|
|
|
if type(pivot) is not torch.Tensor: |
|
center = torch.FloatTensor(pivot)[...,[1,0]] |
|
center = center.view(1,-1).repeat((size,1)) |
|
else: |
|
center = pivot[...,[1,0]].view(1,-1).clone().to(angle.device) |
|
|
|
|
|
|
|
|
|
|
|
scale = torch.ones(size, 2) |
|
|
|
|
|
M = kornia.geometry.get_rotation_matrix2d(center, angle, scale) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_, _, h, w = x.shape |
|
x_warped = kornia.geometry.transform.warp_affine(x.float(), M.to(x.device), dsize=(h, w)) |
|
x_warped = x_warped |
|
rot_x_list.append(x_warped) |
|
|
|
return rot_x_list |
|
|
|
|
|
|
|
class TreeNode: |
|
|
|
def __init__(self, parent, children, bbox): |
|
self.parent = parent |
|
self.children = children |
|
self.bbox = bbox |
|
|
|
def KDTree(node, min_object_dim, margin, bboxes): |
|
size = node.bbox[3:] - node.bbox[:3] |
|
|
|
|
|
split = size > 2 * min_object_dim |
|
if np.sum(split) == 0: |
|
bboxes.append(node.bbox) |
|
return |
|
split = np.float32(split) / np.sum(split) |
|
split_axis = np.random.choice(range(len(split)), 1, p=split)[0] |
|
|
|
|
|
cut_ind = np.random.rand() * \ |
|
(size[split_axis] - 2 * min_object_dim) + \ |
|
node.bbox[split_axis] + min_object_dim |
|
child1_bbox = node.bbox.copy() |
|
child1_bbox[3 + split_axis] = cut_ind - margin / 2. |
|
child2_bbox = node.bbox.copy() |
|
child2_bbox[split_axis] = cut_ind + margin / 2. |
|
node.children = [ |
|
TreeNode(node, [], bbox=child1_bbox), |
|
TreeNode(node, [], bbox=child2_bbox) |
|
] |
|
KDTree(node.children[0], min_object_dim, margin, bboxes) |
|
KDTree(node.children[1], min_object_dim, margin, bboxes) |
|
|
|
|
|
|
|
|
|
google_seen_obj_shapes = { |
|
'train': [ |
|
'alarm clock', |
|
'android toy', |
|
'black boot with leopard print', |
|
'black fedora', |
|
'black razer mouse', |
|
'black sandal', |
|
'black shoe with orange stripes', |
|
'bull figure', |
|
'butterfinger chocolate', |
|
'c clamp', |
|
'can opener', |
|
'crayon box', |
|
'dog statue', |
|
'frypan', |
|
'green and white striped towel', |
|
'grey soccer shoe with cleats', |
|
'hard drive', |
|
'honey dipper', |
|
'magnifying glass', |
|
'mario figure', |
|
'nintendo 3ds', |
|
'nintendo cartridge', |
|
'office depot box', |
|
'orca plush toy', |
|
'pepsi gold caffeine free box', |
|
'pepsi wild cherry box', |
|
'porcelain cup', |
|
'purple tape', |
|
'red and white flashlight', |
|
'rhino figure', |
|
'rocket racoon figure', |
|
'scissors', |
|
'silver tape', |
|
'spatula with purple head', |
|
'spiderman figure', |
|
'tablet', |
|
'toy school bus', |
|
], |
|
'val': [ |
|
'ball puzzle', |
|
'black and blue sneakers', |
|
'black shoe with green stripes', |
|
'brown fedora', |
|
'dinosaur figure', |
|
'hammer', |
|
'light brown boot with golden laces', |
|
'lion figure', |
|
'pepsi max box', |
|
'pepsi next box', |
|
'porcelain salad plate', |
|
'porcelain spoon', |
|
'red and white striped towel', |
|
'red cup', |
|
'screwdriver', |
|
'toy train', |
|
'unicorn toy', |
|
'white razer mouse', |
|
'yoshi figure' |
|
], |
|
'test': [ |
|
'ball puzzle', |
|
'black and blue sneakers', |
|
'black shoe with green stripes', |
|
'brown fedora', |
|
'dinosaur figure', |
|
'hammer', |
|
'light brown boot with golden laces', |
|
'lion figure', |
|
'pepsi max box', |
|
'pepsi next box', |
|
'porcelain salad plate', |
|
'porcelain spoon', |
|
'red and white striped towel', |
|
'red cup', |
|
'screwdriver', |
|
'toy train', |
|
'unicorn toy', |
|
'white razer mouse', |
|
'yoshi figure' |
|
], |
|
} |
|
|
|
google_unseen_obj_shapes = { |
|
'train': [ |
|
'alarm clock', |
|
'android toy', |
|
'black boot with leopard print', |
|
'black fedora', |
|
'black razer mouse', |
|
'black sandal', |
|
'black shoe with orange stripes', |
|
'bull figure', |
|
'butterfinger chocolate', |
|
'c clamp', |
|
'can opener', |
|
'crayon box', |
|
'dog statue', |
|
'frypan', |
|
'green and white striped towel', |
|
'grey soccer shoe with cleats', |
|
'hard drive', |
|
'honey dipper', |
|
'magnifying glass', |
|
'mario figure', |
|
'nintendo 3ds', |
|
'nintendo cartridge', |
|
'office depot box', |
|
'orca plush toy', |
|
'pepsi gold caffeine free box', |
|
'pepsi wild cherry box', |
|
'porcelain cup', |
|
'purple tape', |
|
'red and white flashlight', |
|
'rhino figure', |
|
'rocket racoon figure', |
|
'scissors', |
|
'silver tape', |
|
'spatula with purple head', |
|
'spiderman figure', |
|
'tablet', |
|
'toy school bus', |
|
], |
|
'val': [ |
|
'ball puzzle', |
|
'black and blue sneakers', |
|
'black shoe with green stripes', |
|
'brown fedora', |
|
'dinosaur figure', |
|
'hammer', |
|
'light brown boot with golden laces', |
|
'lion figure', |
|
'pepsi max box', |
|
'pepsi next box', |
|
'porcelain salad plate', |
|
'porcelain spoon', |
|
'red and white striped towel', |
|
'red cup', |
|
'screwdriver', |
|
'toy train', |
|
'unicorn toy', |
|
'white razer mouse', |
|
'yoshi figure' |
|
], |
|
'test': [ |
|
'ball puzzle', |
|
'black and blue sneakers', |
|
'black shoe with green stripes', |
|
'brown fedora', |
|
'dinosaur figure', |
|
'hammer', |
|
'light brown boot with golden laces', |
|
'lion figure', |
|
'pepsi max box', |
|
'pepsi next box', |
|
'porcelain salad plate', |
|
'porcelain spoon', |
|
'red and white striped towel', |
|
'red cup', |
|
'screwdriver', |
|
'toy train', |
|
'unicorn toy', |
|
'white razer mouse', |
|
'yoshi figure' |
|
], |
|
} |
|
|
|
google_all_shapes = { |
|
'train': [ |
|
'alarm clock', |
|
'android toy', |
|
'ball puzzle', |
|
'black and blue sneakers', |
|
'black boot with leopard print', |
|
'black fedora', |
|
'black razer mouse', |
|
'black sandal', |
|
'black shoe with green stripes', |
|
'black shoe with orange stripes', |
|
'brown fedora', |
|
'bull figure', |
|
'butterfinger chocolate', |
|
'c clamp', |
|
'can opener', |
|
'crayon box', |
|
'dinosaur figure', |
|
'dog statue', |
|
'frypan', |
|
'green and white striped towel', |
|
'grey soccer shoe with cleats', |
|
'hammer', |
|
'hard drive', |
|
'honey dipper', |
|
'light brown boot with golden laces', |
|
'lion figure', |
|
'magnifying glass', |
|
'mario figure', |
|
'nintendo 3ds', |
|
'nintendo cartridge', |
|
'office depot box', |
|
'orca plush toy', |
|
'pepsi gold caffeine free box', |
|
'pepsi max box', |
|
'pepsi next box', |
|
'pepsi wild cherry box', |
|
'porcelain cup', |
|
'porcelain salad plate', |
|
'porcelain spoon', |
|
'purple tape', |
|
'red and white flashlight', |
|
'red and white striped towel', |
|
'red cup', |
|
'rhino figure', |
|
'rocket racoon figure', |
|
'scissors', |
|
'screwdriver', |
|
'silver tape', |
|
'spatula with purple head', |
|
'spiderman figure', |
|
'tablet', |
|
'toy school bus', |
|
'toy train', |
|
'unicorn toy', |
|
'white razer mouse', |
|
'yoshi figure', |
|
], |
|
'val': [ |
|
'alarm clock', |
|
'android toy', |
|
'ball puzzle', |
|
'black and blue sneakers', |
|
'black boot with leopard print', |
|
'black fedora', |
|
'black razer mouse', |
|
'black sandal', |
|
'black shoe with green stripes', |
|
'black shoe with orange stripes', |
|
'brown fedora', |
|
'bull figure', |
|
'butterfinger chocolate', |
|
'c clamp', |
|
'can opener', |
|
'crayon box', |
|
'dinosaur figure', |
|
'dog statue', |
|
'frypan', |
|
'green and white striped towel', |
|
'grey soccer shoe with cleats', |
|
'hammer', |
|
'hard drive', |
|
'honey dipper', |
|
'light brown boot with golden laces', |
|
'lion figure', |
|
'magnifying glass', |
|
'mario figure', |
|
'nintendo 3ds', |
|
'nintendo cartridge', |
|
'office depot box', |
|
'orca plush toy', |
|
'pepsi gold caffeine free box', |
|
'pepsi max box', |
|
'pepsi next box', |
|
'pepsi wild cherry box', |
|
'porcelain cup', |
|
'porcelain salad plate', |
|
'porcelain spoon', |
|
'purple tape', |
|
'red and white flashlight', |
|
'red and white striped towel', |
|
'red cup', |
|
'rhino figure', |
|
'rocket racoon figure', |
|
'scissors', |
|
'screwdriver', |
|
'silver tape', |
|
'spatula with purple head', |
|
'spiderman figure', |
|
'tablet', |
|
'toy school bus', |
|
'toy train', |
|
'unicorn toy', |
|
'white razer mouse', |
|
'yoshi figure', |
|
], |
|
'test': [ |
|
'alarm clock', |
|
'android toy', |
|
'ball puzzle', |
|
'black and blue sneakers', |
|
'black boot with leopard print', |
|
'black fedora', |
|
'black razer mouse', |
|
'black sandal', |
|
'black shoe with green stripes', |
|
'black shoe with orange stripes', |
|
'brown fedora', |
|
'bull figure', |
|
'butterfinger chocolate', |
|
'c clamp', |
|
'can opener', |
|
'crayon box', |
|
'dinosaur figure', |
|
'dog statue', |
|
'frypan', |
|
'green and white striped towel', |
|
'grey soccer shoe with cleats', |
|
'hammer', |
|
'hard drive', |
|
'honey dipper', |
|
'light brown boot with golden laces', |
|
'lion figure', |
|
'magnifying glass', |
|
'mario figure', |
|
'nintendo 3ds', |
|
'nintendo cartridge', |
|
'office depot box', |
|
'orca plush toy', |
|
'pepsi gold caffeine free box', |
|
'pepsi max box', |
|
'pepsi next box', |
|
'pepsi wild cherry box', |
|
'porcelain cup', |
|
'porcelain salad plate', |
|
'porcelain spoon', |
|
'purple tape', |
|
'red and white flashlight', |
|
'red and white striped towel', |
|
'red cup', |
|
'rhino figure', |
|
'rocket racoon figure', |
|
'scissors', |
|
'screwdriver', |
|
'silver tape', |
|
'spatula with purple head', |
|
'spiderman figure', |
|
'tablet', |
|
'toy school bus', |
|
'toy train', |
|
'unicorn toy', |
|
'white razer mouse', |
|
'yoshi figure', |
|
], |
|
} |
|
assembling_kit_shapes = { |
|
0: "letter R shape", |
|
1: "letter A shape", |
|
2: "triangle", |
|
3: "square", |
|
4: "plus", |
|
5: "letter T shape", |
|
6: "diamond", |
|
7: "pentagon", |
|
8: "rectangle", |
|
9: "flower", |
|
10: "star", |
|
11: "circle", |
|
12: "letter G shape", |
|
13: "letter V shape", |
|
14: "letter E shape", |
|
15: "letter L shape", |
|
16: "ring", |
|
17: "hexagon", |
|
18: "heart", |
|
19: "letter M shape", |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
COLORS = { |
|
'blue': [78.0 / 255.0, 121.0 / 255.0, 167.0 / 255.0], |
|
'red': [255.0 / 255.0, 087.0 / 255.0, 089.0 / 255.0], |
|
'green': [089.0 / 255.0, 169.0 / 255.0, 078.0 / 255.0], |
|
'orange': [242.0 / 255.0, 142.0 / 255.0, 043.0 / 255.0], |
|
'yellow': [237.0 / 255.0, 201.0 / 255.0, 072.0 / 255.0], |
|
'purple': [176.0 / 255.0, 122.0 / 255.0, 161.0 / 255.0], |
|
'pink': [255.0 / 255.0, 157.0 / 255.0, 167.0 / 255.0], |
|
'cyan': [118.0 / 255.0, 183.0 / 255.0, 178.0 / 255.0], |
|
'brown': [156.0 / 255.0, 117.0 / 255.0, 095.0 / 255.0], |
|
'white': [255.0 / 255.0, 255.0 / 255.0, 255.0 / 255.0], |
|
'gray': [186.0 / 255.0, 176.0 / 255.0, 172.0 / 255.0], |
|
'indigo': [75.0 / 255.0, 0.0 / 255.0, 130.0 / 255.0], |
|
'violet': [143.0 / 255.0, 0.0 / 255.0, 255.0 / 255.0], |
|
'black': [0.0 / 255.0, 0.0 / 255.0, 0.0 / 255.0], |
|
'silver': [192.0 / 255.0, 192.0 / 255.0, 192.0 / 255.0], |
|
'gold': [255.0 / 255.0, 215.0 / 255.0, 0.0 / 255.0], |
|
|
|
} |
|
|
|
COLORS_NAMES = list(COLORS.keys()) |
|
TRAIN_COLORS = ['blue', 'red', 'green', 'yellow', 'brown', 'gray', 'cyan'] |
|
EVAL_COLORS = ['blue', 'red', 'green', 'orange', 'purple', 'pink', 'white'] |
|
|
|
|
|
def get_colors(mode, n_colors=-1, **kwargs): |
|
all_color_names = get_colors_names(mode) |
|
|
|
if n_colors == -1: |
|
all_color_names = all_color_names |
|
else: |
|
all_color_names = random.sample(all_color_names, n_colors) |
|
return [COLORS[cn] for cn in all_color_names], all_color_names |
|
|
|
def get_colors_names(mode): |
|
if mode == 'train': |
|
return TRAIN_COLORS |
|
elif mode == 'full': |
|
return TRAIN_COLORS |
|
else: |
|
return TRAIN_COLORS |
|
|
|
def get_random_color(): |
|
return get_colors(mode='train', n_colors=1) |
|
|
|
def solve_hanoi_all(n_disks): |
|
|
|
hanoi_steps = [] |
|
|
|
def solve_hanoi(n, t0, t1, t2): |
|
if n == 0: |
|
hanoi_steps.append([n, t0, t1]) |
|
return |
|
solve_hanoi(n - 1, t0, t2, t1) |
|
hanoi_steps.append([n, t0, t1]) |
|
solve_hanoi(n - 1, t2, t1, t0) |
|
|
|
solve_hanoi(n_disks - 1, 0, 2, 1) |
|
return hanoi_steps |
|
|
|
def plot(fname, |
|
title, |
|
ylabel, |
|
xlabel, |
|
data, |
|
xlim=[-np.inf, 0], |
|
xticks=None, |
|
ylim=[np.inf, -np.inf], |
|
show_std=True): |
|
"""Plot frame data.""" |
|
|
|
|
|
|
|
|
|
for name, (x, y, _) in data.items(): |
|
del name |
|
y = np.array(y) |
|
xlim[0] = max(xlim[0], np.min(x)) |
|
xlim[1] = max(xlim[1], np.max(x)) |
|
ylim[0] = min(ylim[0], np.min(y)) |
|
ylim[1] = max(ylim[1], np.max(y)) |
|
|
|
|
|
plt.title(title, fontsize=14) |
|
plt.ylim(ylim) |
|
plt.ylabel(ylabel, fontsize=14) |
|
plt.yticks(fontsize=14) |
|
plt.xlim(xlim) |
|
plt.xlabel(xlabel, fontsize=14) |
|
plt.grid(True, linestyle='-', color=[0.8, 0.8, 0.8]) |
|
ax = plt.gca() |
|
for axis in ['top', 'bottom', 'left', 'right']: |
|
ax.spines[axis].set_color('#000000') |
|
plt.rcParams.update({'font.size': 14}) |
|
plt.rcParams['mathtext.default'] = 'regular' |
|
matplotlib.rcParams['pdf.fonttype'] = 42 |
|
matplotlib.rcParams['ps.fonttype'] = 42 |
|
|
|
|
|
color_iter = 0 |
|
for name, (x, y, std) in data.items(): |
|
del name |
|
x, y, std = np.float32(x), np.float32(y), np.float32(std) |
|
upper = np.clip(y + std, ylim[0], ylim[1]) |
|
lower = np.clip(y - std, ylim[0], ylim[1]) |
|
color = COLORS[list(COLORS.keys())[color_iter]] |
|
if show_std: |
|
plt.fill_between(x, upper, lower, color=color, linewidth=0, alpha=0.3) |
|
plt.plot(x, y, color=color, linewidth=2, marker='o', alpha=1.) |
|
color_iter += 1 |
|
|
|
if xticks: |
|
plt.xticks(ticks=range(len(xticks)), labels=xticks, fontsize=14) |
|
else: |
|
plt.xticks(fontsize=14) |
|
plt.legend([name for name, _ in data.items()], |
|
loc='lower right', fontsize=14) |
|
plt.tight_layout() |
|
plt.savefig(fname) |
|
plt.clf() |
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_visualizer(clear=True): |
|
print('Waiting for meshcat server... have you started a server?') |
|
vis = meshcat.Visualizer(zmq_url='tcp://127.0.0.1:6000') |
|
if clear: |
|
vis.delete() |
|
return vis |
|
|
|
|
|
def make_frame(vis, name, h, radius, o=1.0): |
|
"""Add a red-green-blue triad to the Meschat visualizer. |
|
|
|
Args: |
|
vis (MeshCat Visualizer): the visualizer |
|
name (string): name for this frame (should be unique) |
|
h (float): height of frame visualization |
|
radius (float): radius of frame visualization |
|
o (float): opacity |
|
""" |
|
vis[name]['x'].set_object( |
|
g.Cylinder(height=h, radius=radius), |
|
g.MeshLambertMaterial(color=0xff0000, reflectivity=0.8, opacity=o)) |
|
rotate_x = mtf.rotation_matrix(np.pi / 2.0, [0, 0, 1]) |
|
rotate_x[0, 3] = h / 2 |
|
vis[name]['x'].set_transform(rotate_x) |
|
|
|
vis[name]['y'].set_object( |
|
g.Cylinder(height=h, radius=radius), |
|
g.MeshLambertMaterial(color=0x00ff00, reflectivity=0.8, opacity=o)) |
|
rotate_y = mtf.rotation_matrix(np.pi / 2.0, [0, 1, 0]) |
|
rotate_y[1, 3] = h / 2 |
|
vis[name]['y'].set_transform(rotate_y) |
|
|
|
vis[name]['z'].set_object( |
|
g.Cylinder(height=h, radius=radius), |
|
g.MeshLambertMaterial(color=0x0000ff, reflectivity=0.8, opacity=o)) |
|
rotate_z = mtf.rotation_matrix(np.pi / 2.0, [1, 0, 0]) |
|
rotate_z[2, 3] = h / 2 |
|
vis[name]['z'].set_transform(rotate_z) |
|
|
|
|
|
def meshcat_visualize(vis, obs, act, info): |
|
"""Visualize data using meshcat.""" |
|
|
|
for key in sorted(info.keys()): |
|
pose = info[key] |
|
pick_transform = np.eye(4) |
|
pick_transform[0:3, 3] = pose[0] |
|
quaternion_wxyz = np.asarray( |
|
[pose[1][3], pose[1][0], pose[1][1], pose[1][2]]) |
|
pick_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3] |
|
label = 'obj_' + str(key) |
|
make_frame(vis, label, h=0.05, radius=0.0012, o=1.0) |
|
vis[label].set_transform(pick_transform) |
|
|
|
for cam_index in range(len(act['camera_config'])): |
|
verts = unproject_depth_vectorized( |
|
obs['depth'][cam_index], np.array([0, 1]), |
|
np.array(act['camera_config'][cam_index]['intrinsics']).reshape(3, 3), |
|
np.zeros(5)) |
|
|
|
|
|
verts = verts.T |
|
|
|
cam_transform = np.eye(4) |
|
cam_transform[0:3, 3] = act['camera_config'][cam_index]['position'] |
|
quaternion_xyzw = act['camera_config'][cam_index]['rotation'] |
|
quaternion_wxyz = np.asarray([ |
|
quaternion_xyzw[3], quaternion_xyzw[0], quaternion_xyzw[1], |
|
quaternion_xyzw[2] |
|
]) |
|
cam_transform[0:3, 0:3] = mtf.quaternion_matrix(quaternion_wxyz)[0:3, 0:3] |
|
verts = apply_transform(cam_transform, verts) |
|
|
|
colors = obs['color'][cam_index].reshape(-1, 3).T / 255.0 |
|
|
|
vis['pointclouds/' + str(cam_index)].set_object( |
|
g.PointCloud(position=verts, color=colors)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_seed(seed, torch=False): |
|
random.seed(seed) |
|
os.environ['PYTHONHASHSEED'] = str(seed) |
|
np.random.seed(seed) |
|
|
|
if torch: |
|
import torch |
|
torch.manual_seed(seed) |
|
|
|
|
|
def load_cfg(yaml_path): |
|
with open(yaml_path, 'r') as f: |
|
data = yaml.safe_load(f) |
|
return data |
|
|
|
|
|
def load_hydra_config(config_path): |
|
return OmegaConf.load(config_path) |
|
|