Spaces:

wliu88
/

StructDiffusionDemo

Paused

StructDiffusionDemo / src /StructDiffusion /utils /rearrangement.py

Weiyu Liu

add natural language model and app

f392320 almost 2 years ago

46.2 kB

	import copy
	import os
	import torch
	import trimesh
	import numpy as np
	import open3d
	from PIL import Image, ImageDraw, ImageFont
	from sklearn.metrics import classification_report
	from collections import defaultdict
	import matplotlib.pyplot as plt
	import itertools
	import matplotlib
	import h5py
	import json

	import StructDiffusion.utils.transformations as tra
	from StructDiffusion.utils.rotation_continuity import compute_geodesic_distance_from_two_matrices

	# from pointnet_utils import farthest_point_sample, index_points


	def flatten1d(img):
	return img.reshape(-1)


	def flatten3d(img):
	hw = img.shape[0] * img.shape[1]
	return img.reshape(hw, -1)


	def array_to_tensor(array):
	""" Assume arrays are in numpy (channels-last) format and put them into the right one """
	if array.ndim == 4: # NHWC
	tensor = torch.from_numpy(array).permute(0,3,1,2).float()
	elif array.ndim == 3: # HWC
	tensor = torch.from_numpy(array).permute(2,0,1).float()
	else: # everything else - just keep it as-is
	tensor = torch.from_numpy(array).float()
	return tensor


	def get_pts(xyz_in, rgb_in, mask, bg_mask=None, num_pts=1024, center=None,
	radius=0.5, filename=None, to_tensor=True):

	# Get the XYZ and RGB
	mask = flatten1d(mask)
	assert(np.sum(mask) > 0)
	xyz = flatten3d(xyz_in)[mask > 0]
	if rgb_in is not None:
	rgb = flatten3d(rgb_in)[mask > 0]

	if xyz.shape[0] == 0:
	raise RuntimeError('this should not happen')
	ok = False
	xyz = flatten3d(xyz_in)
	if rgb_in is not None:
	rgb = flatten3d(rgb_in)
	else:
	ok = True

	# prune to this region
	if center is not None:
	# numpy matrix
	# use the full xyz point cloud to determine what is close enough
	# now that we have the closest background point we can place the object on it
	# Just center on the point
	center = center.numpy()
	center = center[None].repeat(xyz.shape[0], axis=0)
	dists = np.linalg.norm(xyz - center, axis=-1)
	idx = dists < radius
	xyz = xyz[idx]
	if rgb_in is not None:
	rgb = rgb[idx]
	center = center[0]
	else:
	center = None

	# Compute number of points we are using
	if num_pts is not None:
	if xyz.shape[0] < 1:
	print("!!!! bad shape:", xyz.shape, filename, "!!!!")
	return (None, None, None, None)
	idx = np.random.randint(0, xyz.shape[0], num_pts)
	xyz = xyz[idx]
	if rgb_in is not None:
	rgb = rgb[idx]

	# Shuffle the points
	if rgb_in is not None:
	rgb = array_to_tensor(rgb) if to_tensor else rgb
	else:
	rgb = None
	xyz = array_to_tensor(xyz) if to_tensor else xyz
	return (ok, xyz, rgb, center)


	def align(y_true, y_pred):
	""" Add or remove 2*pi to predicted angle to minimize difference from GT"""
	y_pred = y_pred.copy()
	y_pred[y_true - y_pred > np.pi] += np.pi * 2
	y_pred[y_true - y_pred < -np.pi] -= np.pi * 2
	return y_pred


	def random_move_obj_xyz(obj_xyz,
	min_translation, max_translation,
	min_rotation, max_rotation, mode,
	visualize=False, return_perturbed_obj_xyzs=True):

	assert mode in ["planar", "6d", "3d_planar"]

	if mode == "planar":
	random_translation = np.random.uniform(low=min_translation, high=max_translation, size=2) * np.random.choice(
	[-1, 1], size=2)
	random_rotation = np.random.uniform(low=min_rotation, high=max_rotation) * np.random.choice([-1, 1])
	random_rotation = tra.euler_matrix(0, 0, random_rotation)
	elif mode == "6d":
	random_rotation = np.random.uniform(low=min_rotation, high=max_rotation, size=3) * np.random.choice([-1, 1], size=3)
	random_rotation = tra.euler_matrix(*random_rotation)
	random_translation = np.random.uniform(low=min_translation, high=max_translation, size=3) * np.random.choice([-1, 1], size=3)
	elif mode == "3d_planar":
	random_translation = np.random.uniform(low=min_translation, high=max_translation, size=3) * np.random.choice(
	[-1, 1], size=3)
	random_rotation = np.random.uniform(low=min_rotation, high=max_rotation) * np.random.choice([-1, 1])
	random_rotation = tra.euler_matrix(0, 0, random_rotation)

	if return_perturbed_obj_xyzs:
	raise Exception("return_perturbed_obj_xyzs=True is no longer supported")
	# xyz_mean = np.mean(obj_xyz, axis=0)
	# new_obj_xyz = obj_xyz - xyz_mean
	# new_obj_xyz = trimesh.transform_points(new_obj_xyz, random_rotation, translate=False)
	# new_obj_xyz = new_obj_xyz + xyz_mean + random_translation
	else:
	new_obj_xyz = obj_xyz

	# test moving the perturbed obj pc back
	# new_xyz_mean = np.mean(new_obj_xyz, axis=0)
	# old_obj_xyz = new_obj_xyz - new_xyz_mean
	# old_obj_xyz = trimesh.transform_points(old_obj_xyz, np.linalg.inv(random_rotation), translate=False)
	# old_obj_xyz = old_obj_xyz + new_xyz_mean - random_translation

	# even though we are putting perturbation rotation and translation in the same matrix, they should be applied
	# independently. More specifically, rotate the object pc in place and then translate it.
	perturbation_matrix = random_rotation
	perturbation_matrix[:3, 3] = random_translation

	if visualize:
	show_pcs([new_obj_xyz, obj_xyz],
	[np.tile(np.array([1, 0, 0], dtype=np.float), (obj_xyz.shape[0], 1)),
	np.tile(np.array([0, 1, 0], dtype=np.float), (obj_xyz.shape[0], 1))], add_coordinate_frame=True)

	return new_obj_xyz, perturbation_matrix


	def random_move_obj_xyzs(obj_xyzs,
	min_translation, max_translation,
	min_rotation, max_rotation, mode, move_obj_idxs=None, visualize=False, return_moved_obj_idxs=False,
	return_perturbation=False, return_perturbed_obj_xyzs=True):
	"""

	:param obj_xyzs:
	:param min_translation:
	:param max_translation:
	:param min_rotation:
	:param max_rotation:
	:param mode:
	:param move_obj_idxs:
	:param visualize:
	:param return_moved_obj_idxs:
	:param return_perturbation:
	:param return_perturbed_obj_xyzs:
	:return:
	"""

	new_obj_xyzs = []
	new_obj_rgbs = []
	old_obj_rgbs = []
	perturbation_matrices = []

	if move_obj_idxs is None:
	move_obj_idxs = list(range(len(obj_xyzs)))

	# this many objects will not be randomly moved
	stationary_obj_idxs = np.random.choice(move_obj_idxs, np.random.randint(0, len(move_obj_idxs)), replace=False).tolist()

	moved_obj_idxs = []
	for obj_idx, obj_xyz in enumerate(obj_xyzs):

	if obj_idx in stationary_obj_idxs:
	new_obj_xyzs.append(obj_xyz)
	perturbation_matrices.append(np.eye(4))
	if visualize:
	new_obj_rgbs.append(np.tile(np.array([1, 0, 0], dtype=np.float), (obj_xyz.shape[0], 1)))
	old_obj_rgbs.append(np.tile(np.array([0, 0, 1], dtype=np.float), (obj_xyz.shape[0], 1)))
	else:
	new_obj_xyz, perturbation_matrix = random_move_obj_xyz(obj_xyz,
	min_translation=min_translation, max_translation=max_translation,
	min_rotation=min_rotation, max_rotation=max_rotation, mode=mode,
	return_perturbed_obj_xyzs=return_perturbed_obj_xyzs)
	new_obj_xyzs.append(new_obj_xyz)
	moved_obj_idxs.append(obj_idx)
	perturbation_matrices.append(perturbation_matrix)
	if visualize:
	new_obj_rgbs.append(np.tile(np.array([1, 0, 0], dtype=np.float), (obj_xyz.shape[0], 1)))
	old_obj_rgbs.append(np.tile(np.array([0, 1, 0], dtype=np.float), (obj_xyz.shape[0], 1)))
	if visualize:
	show_pcs(new_obj_xyzs + obj_xyzs,
	new_obj_rgbs + old_obj_rgbs, add_coordinate_frame=True)

	if return_moved_obj_idxs:
	if return_perturbation:
	return new_obj_xyzs, moved_obj_idxs, perturbation_matrices
	else:
	return new_obj_xyzs, moved_obj_idxs
	else:
	if return_perturbation:
	return new_obj_xyzs, perturbation_matrices
	else:
	return new_obj_xyzs


	def check_pairwise_collision(pcs, visualize=False):

	voxel_extents = [0.005] * 3

	collision_managers = []
	collision_objects = []

	for pc in pcs:

	# farthest point sample
	pc = pc.unsqueeze(0)
	fps_idx = farthest_point_sample(pc, 100) # [B, npoint]
	pc = index_points(pc, fps_idx).squeeze(0)

	pc = np.asanyarray(pc)
	# ignore empty pc
	if np.all(pc == 0):
	continue

	n_points = pc.shape[0]
	collision_object = []
	collision_manager = trimesh.collision.CollisionManager()

	# Construct collision objects
	for i in range(n_points):
	extents = voxel_extents
	transform = np.eye(4)
	transform[:3, 3] = pc[i, :3]
	voxel = trimesh.primitives.Box(extents=extents, transform=transform)
	collision_object.append((voxel, extents, transform))

	# Add to collision manager
	for i, (voxel, _, _) in enumerate(collision_object):
	collision_manager.add_object("voxel_{}".format(i), voxel)

	collision_managers.append(collision_manager)
	collision_objects.append(collision_object)

	in_collision = False
	for i, cm_i in enumerate(collision_managers):
	for j, cm_j in enumerate(collision_managers):
	if i == j:
	continue
	if cm_i.in_collision_other(cm_j):
	in_collision = True

	if visualize:
	visualize_collision_objects(collision_objects[i] + collision_objects[j])

	break

	if in_collision:
	break

	return in_collision


	def check_collision_with(this_pc, other_pcs, visualize=False):

	voxel_extents = [0.005] * 3

	this_collision_manager = None
	this_collision_object = None
	other_collision_managers = []
	other_collision_objects = []

	for oi, pc in enumerate([this_pc] + other_pcs):

	# farthest point sample
	pc = pc.unsqueeze(0)
	fps_idx = farthest_point_sample(pc, 100) # [B, npoint]
	pc = index_points(pc, fps_idx).squeeze(0)

	pc = np.asanyarray(pc)
	# ignore empty pc
	if np.all(pc == 0):
	continue

	n_points = pc.shape[0]
	collision_object = []
	collision_manager = trimesh.collision.CollisionManager()

	# Construct collision objects
	for i in range(n_points):
	extents = voxel_extents
	transform = np.eye(4)
	transform[:3, 3] = pc[i, :3]
	voxel = trimesh.primitives.Box(extents=extents, transform=transform)
	collision_object.append((voxel, extents, transform))

	# Add to collision manager
	for i, (voxel, _, _) in enumerate(collision_object):
	collision_manager.add_object("voxel_{}".format(i), voxel)

	if oi == 0:
	this_collision_manager = collision_manager
	this_collision_object = collision_object
	else:
	other_collision_managers.append(collision_manager)
	other_collision_objects.append(collision_object)

	collisions = []
	for i, cm_i in enumerate(other_collision_managers):
	if this_collision_manager.in_collision_other(cm_i):
	collisions.append(i)

	if visualize:
	visualize_collision_objects(this_collision_object + other_collision_objects[i])

	return collisions


	def visualize_collision_objects(collision_objects):

	# Convert from trimesh to open3d
	meshes_o3d = []
	for elem in collision_objects:
	(voxel, extents, transform) = elem
	voxel_o3d = open3d.geometry.TriangleMesh.create_box(width=extents[0], height=extents[1],
	depth=extents[2])
	voxel_o3d.compute_vertex_normals()
	voxel_o3d.paint_uniform_color([0.8, 0.2, 0])
	voxel_o3d.transform(transform)
	meshes_o3d.append(voxel_o3d)
	meshes = meshes_o3d

	vis = open3d.visualization.Visualizer()
	vis.create_window()

	for mesh in meshes:
	vis.add_geometry(mesh)

	vis.run()
	vis.destroy_window()


	# def test_collision(pc):
	# n_points = pc.shape[0]
	# voxel_extents = [0.005] * 3
	# collision_objects = []
	# collision_manager = trimesh.collision.CollisionManager()
	#
	# # Construct collision objects
	# for i in range(n_points):
	# extents = voxel_extents
	# transform = np.eye(4)
	# transform[:3, 3] = pc[i, :3]
	# voxel = trimesh.primitives.Box(extents=extents, transform=transform)
	# collision_objects.append((voxel, extents, transform))
	#
	# # Add to collision manager
	# for i, (voxel, _, _) in enumerate(collision_objects):
	# collision_manager.add_object("voxel_{}".format(i), voxel)
	#
	# for i, (voxel, _, _) in enumerate(collision_objects):
	# c, names = collision_manager.in_collision_single(voxel, return_names=True)
	# if c:
	# print(i, names)
	#
	# # Convert from trimesh to open3d
	# meshes_o3d = []
	# for elem in collision_objects:
	# (voxel, extents, transform) = elem
	# voxel_o3d = open3d.geometry.TriangleMesh.create_box(width=extents[0], height=extents[1],
	# depth=extents[2])
	# voxel_o3d.compute_vertex_normals()
	# voxel_o3d.paint_uniform_color([0.8, 0.2, 0])
	# voxel_o3d.transform(transform)
	# meshes_o3d.append(voxel_o3d)
	# meshes = meshes_o3d
	#
	# vis = open3d.visualization.Visualizer()
	# vis.create_window()
	#
	# for mesh in meshes:
	# vis.add_geometry(mesh)
	#
	# vis.run()
	# vis.destroy_window()
	#
	#
	# def test_collision2(pc):
	# pcd = open3d.geometry.PointCloud()
	# pcd.points = open3d.utility.Vector3dVector(pc)
	# pcd.estimate_normals()
	# open3d.visualization.draw_geometries([pcd])
	#
	# # poisson_mesh = open3d.geometry.TriangleMesh.create_from_point_cloud_poisson(pcd, depth=8, width=0, scale=1.1, linear_fit=False)[0]
	# # bbox = pcd.get_axis_aligned_bounding_box()
	# # p_mesh_crop = poisson_mesh.crop(bbox)
	# # open3d.visualization.draw_geometries([p_mesh_crop, pcd])
	#
	# distances = pcd.compute_nearest_neighbor_distance()
	# avg_dist = np.mean(distances)
	# radius = 3 * avg_dist
	# bpa_mesh = open3d.geometry.TriangleMesh.create_from_point_cloud_ball_pivoting(pcd, open3d.utility.DoubleVector(
	# [radius, radius * 2]))
	# dec_mesh = bpa_mesh.simplify_quadric_decimation(100000)
	# dec_mesh.remove_degenerate_triangles()
	# dec_mesh.remove_duplicated_triangles()
	# dec_mesh.remove_duplicated_vertices()
	# dec_mesh.remove_non_manifold_edges()
	# open3d.visualization.draw_geometries([dec_mesh, pcd])
	# open3d.visualization.draw_geometries([dec_mesh])


	def make_gifs(imgs, save_path, texts=None, numpy_img=True, duration=10):
	gif_filename = os.path.join(save_path)
	pil_imgs = []
	for i, img in enumerate(imgs):
	if numpy_img:
	img = Image.fromarray(img)
	if texts:
	text = texts[i]
	draw = ImageDraw.Draw(img)
	font = ImageFont.truetype("FreeMono.ttf", 40)
	draw.text((0, 0), text, (120, 120, 120), font=font)
	pil_imgs.append(img)

	pil_imgs[0].save(gif_filename, save_all=True,
	append_images=pil_imgs[1:], optimize=True,
	duration=duration*len(pil_imgs), loop=0)


	def save_img(img, save_path, text=None, numpy_img=True):
	if numpy_img:
	img = Image.fromarray(img)
	if text:
	draw = ImageDraw.Draw(img)
	font = ImageFont.truetype("FreeMono.ttf", 40)
	draw.text((0, 0), text, (120, 120, 120), font=font)
	img.save(save_path)


	def move_one_object_pc(obj_xyz, obj_rgb, struct_params, object_params, euler_angles=False):
	struct_params = np.asanyarray(struct_params)
	object_params = np.asanyarray(object_params)

	R_struct = np.eye(4)
	if not euler_angles:
	R_struct[:3, :3] = struct_params[3:].reshape(3, 3)
	else:
	R_struct[:3, :3] = tra.euler_matrix(*struct_params[3:])[:3, :3]
	R_obj = np.eye(4)
	if not euler_angles:
	R_obj[:3, :3] = object_params[3:].reshape(3, 3)
	else:
	R_obj[:3, :3] = tra.euler_matrix(*object_params[3:])[:3, :3]

	T_struct = R_struct
	T_struct[:3, 3] = [struct_params[0], struct_params[1], struct_params[2]]

	# translate to structure frame
	t = np.eye(4)
	obj_center = torch.mean(obj_xyz, dim=0)
	t[:3, 3] = [object_params[0] - obj_center[0], object_params[1] - obj_center[1], object_params[2] - obj_center[2]]
	new_obj_xyz = trimesh.transform_points(obj_xyz, t)

	# rotate in place
	R = R_obj
	obj_center = np.mean(new_obj_xyz, axis=0)
	centered_obj_xyz = new_obj_xyz - obj_center
	new_centered_obj_xyz = trimesh.transform_points(centered_obj_xyz, R, translate=True)
	new_obj_xyz = new_centered_obj_xyz + obj_center

	# transform to the global frame from the structure frame
	new_obj_xyz = trimesh.transform_points(new_obj_xyz, T_struct)

	# convert back to torch
	new_obj_xyz = torch.tensor(new_obj_xyz, dtype=obj_xyz.dtype)

	return new_obj_xyz, obj_rgb


	def move_one_object_pc_no_struct(obj_xyz, obj_rgb, object_params, euler_angles=False):
	object_params = np.asanyarray(object_params)

	R_obj = np.eye(4)
	if not euler_angles:
	R_obj[:3, :3] = object_params[3:].reshape(3, 3)
	else:
	R_obj[:3, :3] = tra.euler_matrix(*object_params[3:])[:3, :3]

	t = np.eye(4)
	obj_center = torch.mean(obj_xyz, dim=0)
	t[:3, 3] = [object_params[0] - obj_center[0], object_params[1] - obj_center[1], object_params[2] - obj_center[2]]
	new_obj_xyz = trimesh.transform_points(obj_xyz, t)

	# rotate in place
	R = R_obj
	obj_center = np.mean(new_obj_xyz, axis=0)
	centered_obj_xyz = new_obj_xyz - obj_center
	new_centered_obj_xyz = trimesh.transform_points(centered_obj_xyz, R, translate=True)
	new_obj_xyz = new_centered_obj_xyz + obj_center

	# convert back to torch
	new_obj_xyz = torch.tensor(new_obj_xyz, dtype=obj_xyz.dtype)

	return new_obj_xyz, obj_rgb


	def modify_language(sentence, radius=None, position_x=None, position_y=None, rotation=None, shape=None):
	# "radius": [0.0, 0.5, 3], "position_x": [-0.1, 1.0, 3], "position_y": [-0.5, 0.5, 3], "rotation": [-3.15, 3.15, 4]

	sentence = copy.deepcopy(sentence)
	for pi, pair in enumerate(sentence):
	if radius is not None and len(pair) == 2 and pair[1] == "radius":
	sentence[pi] = (radius, 'radius')
	if position_y is not None and len(pair) == 2 and pair[1] == "position_y":
	sentence[pi] = (position_y, 'position_y')
	if position_x is not None and len(pair) == 2 and pair[1] == "position_x":
	sentence[pi] = (position_x, 'position_x')
	if rotation is not None and len(pair) == 2 and pair[1] == "rotation":
	sentence[pi] = (rotation, 'rotation')
	if shape is not None and len(pair) == 2 and pair[1] == "shape":
	sentence[pi] = (shape, 'shape')

	return sentence


	def sample_gaussians(mus, sigmas, sample_size):
	# mus: [number of individual gaussians]
	# sigmas: [number of individual gaussians]
	normal = torch.distributions.Normal(mus, sigmas)
	samples = normal.sample((sample_size,))
	# samples: [sample_size, number of individual gaussians]
	return samples


	def fit_gaussians(samples, sigma_eps=0.01):
	# samples: [sample_size, number of individual gaussians]
	num_gs = samples.shape[1]
	mus = torch.mean(samples, dim=0)
	sigmas = torch.std(samples, dim=0) + sigma_eps * torch.ones(num_gs)
	# mus: [number of individual gaussians]
	# sigmas: [number of individual gaussians]
	return mus, sigmas


	def show_pcs_with_trimesh(obj_xyzs, obj_rgbs=None, return_scene=False):
	if obj_rgbs is not None:
	vis_pcs = [trimesh.PointCloud(obj_xyz, colors=np.concatenate([obj_rgb * 255, np.ones([obj_rgb.shape[0], 1]) * 255], axis=-1)) for
	obj_xyz, obj_rgb in zip(obj_xyzs, obj_rgbs)]
	else:
	vis_pcs = [trimesh.PointCloud(obj_xyz) for obj_xyz in obj_xyzs]
	scene = trimesh.Scene()
	# add the coordinate frame first
	geom = trimesh.creation.axis(0.01)
	# scene.add_geometry(geom)
	table = trimesh.creation.box(extents=[1.0, 1.0, 0.02])
	table.apply_translation([0.5, 0, -0.01])
	table.visual.vertex_colors = [150, 111, 87, 125]
	scene.add_geometry(table)
	# bounds = trimesh.creation.box(extents=[4.0, 4.0, 4.0])
	bounds = trimesh.creation.icosphere(subdivisions=3, radius=3.1)
	bounds.apply_translation([0, 0, 0])
	bounds.visual.vertex_colors = [30, 30, 30, 30]
	# scene.add_geometry(bounds)
	scene.add_geometry(vis_pcs)
	RT_4x4 = np.array([[-0.39560353822208355, -0.9183993826406329, 0.006357240869497738, 0.2651463080169481],
	[-0.797630370081598, 0.3401340617616391, -0.4980909683511864, 0.2225696480721997],
	[0.45528412367406523, -0.2021172778236285, -0.8671014777611122, 0.9449050652025951],
	[0.0, 0.0, 0.0, 1.0]])
	RT_4x4 = np.linalg.inv(RT_4x4)
	RT_4x4 = RT_4x4 @ np.diag([1, -1, -1, 1])
	scene.camera_transform = RT_4x4
	if return_scene:
	return scene
	else:
	scene.show()


	def get_trimesh_scene_with_table():
	scene = trimesh.Scene()
	# add the coordinate frame first
	geom = trimesh.creation.axis(0.01)
	scene.add_geometry(geom)
	table = trimesh.creation.box(extents=[1.0, 1.0, 0.02])
	table.apply_translation([0.5, 0, -0.01])
	table.visual.vertex_colors = [150, 111, 87, 125]
	scene.add_geometry(table)
	# bounds = trimesh.creation.box(extents=[4.0, 4.0, 4.0])
	bounds = trimesh.creation.icosphere(subdivisions=3, radius=3.1)
	bounds.apply_translation([0, 0, 0])
	bounds.visual.vertex_colors = [30, 30, 30, 30]
	# scene.add_geometry(bounds)
	RT_4x4 = np.array([[-0.39560353822208355, -0.9183993826406329, 0.006357240869497738, 0.2651463080169481],
	[-0.797630370081598, 0.3401340617616391, -0.4980909683511864, 0.2225696480721997],
	[0.45528412367406523, -0.2021172778236285, -0.8671014777611122, 0.9449050652025951],
	[0.0, 0.0, 0.0, 1.0]])
	RT_4x4 = np.linalg.inv(RT_4x4)
	RT_4x4 = RT_4x4 @ np.diag([1, -1, -1, 1])
	scene.camera_transform = RT_4x4
	return scene

	def show_pcs_with_predictions(xyz, rgb, gts, predictions, add_coordinate_frame=False, return_buffer=False, add_table=True, side_view=True):
	""" Display point clouds """

	assert len(gts) == len(predictions) == len(xyz) == len(rgb)

	unordered_pc = np.concatenate(xyz, axis=0)
	unordered_rgb = np.concatenate(rgb, axis=0)
	pcd = open3d.geometry.PointCloud()
	pcd.points = open3d.utility.Vector3dVector(unordered_pc)
	pcd.colors = open3d.utility.Vector3dVector(unordered_rgb)

	vis = open3d.visualization.Visualizer()
	vis.create_window()
	vis.add_geometry(pcd)

	if add_table:
	table_color = [0.7, 0.7, 0.7]
	origin = [0, -0.5, -0.05]
	table = open3d.geometry.TriangleMesh.create_box(width=1.0, height=1.0, depth=0.02)
	table.paint_uniform_color(table_color)
	table.translate(origin)
	vis.add_geometry(table)

	if add_coordinate_frame:
	mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])
	vis.add_geometry(mesh_frame)

	for i in range(len(xyz)):
	pred_color = [0.0, 1.0, 0] if predictions[i] else [1.0, 0.0, 0]
	gt_color = [0.0, 1.0, 0] if gts[i] else [1.0, 0.0, 0]
	origin = torch.mean(xyz[i], dim=0)
	origin[2] += 0.02
	pred_vis = open3d.geometry.TriangleMesh.create_torus(torus_radius=0.02, tube_radius=0.01)
	pred_vis.paint_uniform_color(pred_color)
	pred_vis.translate(origin)
	gt_vis = open3d.geometry.TriangleMesh.create_sphere(radius=0.01)
	gt_vis.paint_uniform_color(gt_color)
	gt_vis.translate(origin)
	vis.add_geometry(pred_vis)
	vis.add_geometry(gt_vis)

	if side_view:
	open3d_set_side_view(vis)

	if return_buffer:
	vis.poll_events()
	vis.update_renderer()
	buffer = vis.capture_screen_float_buffer(False)
	vis.destroy_window()
	return buffer
	else:
	vis.run()
	vis.destroy_window()


	def show_pcs_with_only_predictions(xyz, rgb, gts, predictions, add_coordinate_frame=False, return_buffer=False, add_table=True, side_view=True):
	""" Display point clouds """

	assert len(gts) == len(predictions) == len(xyz) == len(rgb)

	unordered_pc = np.concatenate(xyz, axis=0)
	unordered_rgb = np.concatenate(rgb, axis=0)
	pcd = open3d.geometry.PointCloud()
	pcd.points = open3d.utility.Vector3dVector(unordered_pc)
	pcd.colors = open3d.utility.Vector3dVector(unordered_rgb)

	vis = open3d.visualization.Visualizer()
	vis.create_window()
	vis.add_geometry(pcd)

	if add_table:
	table_color = [0.7, 0.7, 0.7]
	origin = [0, -0.5, -0.05]
	table = open3d.geometry.TriangleMesh.create_box(width=1.0, height=1.0, depth=0.02)
	table.paint_uniform_color(table_color)
	table.translate(origin)
	vis.add_geometry(table)

	if add_coordinate_frame:
	mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])
	vis.add_geometry(mesh_frame)

	for i in range(len(xyz)):
	pred_color = [0.0, 1.0, 0] if predictions[i] else [1.0, 0.0, 0]
	pcd = open3d.geometry.PointCloud()
	pcd.points = open3d.utility.Vector3dVector(xyz[i])
	pcd.colors = open3d.utility.Vector3dVector(np.tile(np.array(pred_color, dtype=np.float), (xyz[i].shape[0], 1)))
	# pcd = pcd.uniform_down_sample(10)
	# vis.add_geometry(pcd)

	obb = pcd.get_axis_aligned_bounding_box()
	obb.color = pred_color
	vis.add_geometry(obb)


	# origin = torch.mean(xyz[i], dim=0)
	# origin[2] += 0.02
	# pred_vis = open3d.geometry.TriangleMesh.create_torus(torus_radius=0.02, tube_radius=0.01)
	# pred_vis.paint_uniform_color(pred_color)
	# pred_vis.translate(origin)
	# gt_vis = open3d.geometry.TriangleMesh.create_sphere(radius=0.01)
	# gt_vis.paint_uniform_color(gt_color)
	# gt_vis.translate(origin)
	# vis.add_geometry(pred_vis)
	# vis.add_geometry(gt_vis)

	if side_view:
	open3d_set_side_view(vis)

	if return_buffer:
	vis.poll_events()
	vis.update_renderer()
	buffer = vis.capture_screen_float_buffer(False)
	vis.destroy_window()
	return buffer
	else:
	vis.run()
	vis.destroy_window()


	def test_new_vis(xyz, rgb):
	pass
	# unordered_pc = np.concatenate(xyz, axis=0)
	# unordered_rgb = np.concatenate(rgb, axis=0)
	# pcd = open3d.geometry.PointCloud()
	# pcd.points = open3d.utility.Vector3dVector(unordered_pc)
	# pcd.colors = open3d.utility.Vector3dVector(unordered_rgb)
	#
	# # Some platforms do not require OpenGL implementations to support wide lines,
	# # so the renderer requires a custom shader to implement this: "unlitLine".
	# # The line_width field is only used by this shader; all other shaders ignore
	# # it.
	# # mat = o3d.visualization.rendering.Material()
	# # mat.shader = "unlitLine"
	# # mat.line_width = 10 # note that this is scaled with respect to pixels,
	# # # so will give different results depending on the
	# # # scaling values of your system
	# # mat.transmission = 0.5
	# open3d.visualization.draw({
	# "name": "pcd",
	# "geometry": pcd,
	# # "material": mat
	# })
	#
	# for i in range(len(xyz)):
	# pred_color = [0.0, 1.0, 0] if predictions[i] else [1.0, 0.0, 0]
	# pcd = open3d.geometry.PointCloud()
	# pcd.points = open3d.utility.Vector3dVector(xyz[i])
	# pcd.colors = open3d.utility.Vector3dVector(np.tile(np.array(pred_color, dtype=np.float), (xyz[i].shape[0], 1)))
	# # pcd = pcd.uniform_down_sample(10)
	# # vis.add_geometry(pcd)
	#
	# obb = pcd.get_axis_aligned_bounding_box()
	# obb.color = pred_color
	# vis.add_geometry(obb)


	def show_pcs(xyz, rgb, add_coordinate_frame=False, side_view=False, add_table=True):
	""" Display point clouds """

	unordered_pc = np.concatenate(xyz, axis=0)
	unordered_rgb = np.concatenate(rgb, axis=0)
	pcd = open3d.geometry.PointCloud()
	pcd.points = open3d.utility.Vector3dVector(unordered_pc)
	pcd.colors = open3d.utility.Vector3dVector(unordered_rgb)

	if add_table:
	table_color = [0.78, 0.64, 0.44]
	origin = [0, -0.5, -0.02]
	table = open3d.geometry.TriangleMesh.create_box(width=1.0, height=1.0, depth=0.001)
	table.paint_uniform_color(table_color)
	table.translate(origin)

	if not add_coordinate_frame:
	vis = open3d.visualization.Visualizer()
	vis.create_window()
	vis.add_geometry(pcd)
	if add_table:
	vis.add_geometry(table)
	if side_view:
	open3d_set_side_view(vis)
	vis.run()
	vis.destroy_window()
	else:
	mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])
	# open3d.visualization.draw_geometries([pcd, mesh_frame])
	vis = open3d.visualization.Visualizer()
	vis.create_window()
	vis.add_geometry(pcd)
	vis.add_geometry(mesh_frame)
	if add_table:
	vis.add_geometry(table)
	if side_view:
	open3d_set_side_view(vis)
	vis.run()
	vis.destroy_window()


	def show_pcs_color_order(xyzs, rgbs, add_coordinate_frame=False, side_view=False, add_table=True, save_path=None, texts=None, visualize=False):

	rgb_colors = get_rgb_colors()

	order_rgbs = []
	for i, xyz in enumerate(xyzs):
	order_rgbs.append(np.tile(np.array(rgb_colors[i][1], dtype=np.float), (xyz.shape[0], 1)))

	if visualize:
	show_pcs(xyzs, order_rgbs, add_coordinate_frame=add_coordinate_frame, side_view=side_view, add_table=add_table)
	if save_path:
	if not texts:
	save_pcs(xyzs, order_rgbs, save_path=save_path, add_coordinate_frame=add_coordinate_frame, side_view=side_view, add_table=add_table)
	if texts:
	buffer = save_pcs(xyzs, order_rgbs, add_coordinate_frame=add_coordinate_frame,
	side_view=side_view, add_table=add_table, return_buffer=True)
	img = np.uint8(np.asarray(buffer) * 255)
	img = Image.fromarray(img)
	draw = ImageDraw.Draw(img)
	font = ImageFont.truetype("FreeMono.ttf", 20)
	for it, text in enumerate(texts):
	draw.text((0, it*20), text, (120, 120, 120), font=font)
	img.save(save_path)


	def get_rgb_colors():
	rgb_colors = []
	# each color is a tuple of (name, (r,g,b))
	for name, hex in matplotlib.colors.cnames.items():
	rgb_colors.append((name, matplotlib.colors.to_rgb(hex)))

	rgb_colors = sorted(rgb_colors, key=lambda x: x[0])

	priority_colors = [('red', (1.0, 0.0, 0.0)), ('green', (0.0, 1.0, 0.0)), ('blue', (0.0, 0.0, 1.0)), ('orange', (1.0, 0.6470588235294118, 0.0)), ('purple', (0.5019607843137255, 0.0, 0.5019607843137255)), ('magenta', (1.0, 0.0, 1.0)),]
	rgb_colors = priority_colors + rgb_colors

	return rgb_colors


	def open3d_set_side_view(vis):
	ctr = vis.get_view_control()
	# ctr.set_front([-0.61959040621518757, 0.46765094085676973, 0.63040489055992976])
	# ctr.set_lookat([0.28810001969337462, 0.10746435821056366, 0.23499999999999999])
	# ctr.set_up([0.64188154672853504, -0.16037991603449936, 0.74984422549096852])
	# ctr.set_zoom(0.7)
	# ctr.rotate(10.0, 0.0)

	# ctr.set_front([ -0.51720189814974493, 0.55636089622063711, 0.65035740151617438 ])
	# ctr.set_lookat([ 0.23103321183824999, 0.26154772406860449, 0.15131956132592411 ])
	# ctr.set_up([ 0.47073865286968591, -0.44969907810742304, 0.75906248744340343 ])
	# ctr.set_zoom(3)

	# ctr.set_front([-0.86019269757539152, 0.40355968763418076, 0.31178213796587784])
	# ctr.set_lookat([0.28810001969337462, 0.10746435821056366, 0.23499999999999999])
	# ctr.set_up([0.30587875107201218, -0.080905438599338214, 0.94862663869811026])
	# ctr.set_zoom(0.69999999999999996)

	# ctr.set_front([0.40466417238365116, 0.019007526352692254, 0.91426780624224468])
	# ctr.set_lookat([0.61287602731590907, 0.010181152776318789, -0.073166629933366326])
	# ctr.set_up([-0.91444954965885639, 0.0025306059632757057, 0.40469200283941076])
	# ctr.set_zoom(0.84000000000000008)

	ctr.set_front([-0.45528412367406523, 0.20211727782362851, 0.86710147776111224])
	ctr.set_lookat([0.48308104105920047, 0.078726411326627957, -0.27298814087096795])
	ctr.set_up([0.79763037008159798, -0.34013406176163907, 0.49809096835118638])
	ctr.set_zoom(0.80000000000000004)

	init_param = ctr.convert_to_pinhole_camera_parameters()
	print("camera extrinsic", init_param.extrinsic.tolist())


	def save_pcs(xyz, rgb, save_path=None, return_buffer=False, add_coordinate_frame=False, side_view=False, add_table=True):

	assert save_path or return_buffer, "provide path to save or set return_buffer to true"

	unordered_pc = np.concatenate(xyz, axis=0)
	unordered_rgb = np.concatenate(rgb, axis=0)
	pcd = open3d.geometry.PointCloud()
	pcd.points = open3d.utility.Vector3dVector(unordered_pc)
	pcd.colors = open3d.utility.Vector3dVector(unordered_rgb)

	vis = open3d.visualization.Visualizer()
	vis.create_window()

	vis.add_geometry(pcd)
	vis.update_geometry(pcd)

	if add_table:
	table_color = [0.7, 0.7, 0.7]
	origin = [0, -0.5, -0.03]
	table = open3d.geometry.TriangleMesh.create_box(width=1.0, height=1.0, depth=0.02)
	table.paint_uniform_color(table_color)
	table.translate(origin)
	vis.add_geometry(table)

	if add_coordinate_frame:
	mesh_frame = open3d.geometry.TriangleMesh.create_coordinate_frame(size=0.1, origin=[0, 0, 0])
	vis.add_geometry(mesh_frame)
	vis.update_geometry(mesh_frame)

	if side_view:
	open3d_set_side_view(vis)

	vis.poll_events()
	vis.update_renderer()
	if save_path:
	vis.capture_screen_image(save_path)
	elif return_buffer:
	buffer = vis.capture_screen_float_buffer(False)

	vis.destroy_window()

	if return_buffer:
	return buffer
	else:
	return None


	def get_initial_scene_idxs(dataset):
	"""
	This function finds initial scenes from the dataset
	:param dataset:
	:return:
	"""

	initial_scene2idx_t = {}
	for idx in range(len(dataset)):
	filename, t = dataset.get_data_index(idx)
	if filename not in initial_scene2idx_t:
	initial_scene2idx_t[filename] = (idx, t)
	else:
	if t > initial_scene2idx_t[filename][1]:
	initial_scene2idx_t[filename] = (idx, t)
	initial_scene_idxs = [initial_scene2idx_t[f][0] for f in initial_scene2idx_t]
	return initial_scene_idxs


	def get_initial_scene_idxs_raw_data(data):
	"""
	This function finds initial scenes from the dataset
	:param dataset:
	:return:
	"""

	initial_scene2idx_t = {}
	for idx in range(len(data)):
	filename, t = data[idx]
	if filename not in initial_scene2idx_t:
	initial_scene2idx_t[filename] = (idx, t)
	else:
	if t > initial_scene2idx_t[filename][1]:
	initial_scene2idx_t[filename] = (idx, t)
	initial_scene_idxs = [initial_scene2idx_t[f][0] for f in initial_scene2idx_t]
	return initial_scene_idxs


	def evaluate_target_object_predictions(all_gts, all_predictions, all_sentences, initial_scene_idxs, tokenizer):
	"""
	This function evaluates target object predictions

	:param all_gts: a list of predictions for scenes. Each element is a list of booleans for objects in the scene
	:param all_predictions:
	:param all_sentences: a list of descriptions for scenes
	:param initial_scene_idxs:
	:param tokenizer:
	:return:
	"""

	# overall accuracy
	print("\noverall accuracy")
	report = classification_report(list(itertools.chain(all_gts)), list(itertools.chain(all_predictions)),
	output_dict=True)
	print(report)

	# scene average
	print("\naccuracy per scene")
	acc_per_scene = []
	for gts, preds in zip(all_gts, all_predictions):
	acc_per_scene.append(sum(np.array(gts) == np.array(preds)) * 1.0 / len(gts))
	print(np.mean(acc_per_scene))
	plt.hist(acc_per_scene, 10, range=(0, 1), facecolor='g', alpha=0.75)
	plt.xlabel('Accuracy')
	plt.ylabel('# Scene')
	plt.title('Predicting objects to be rearranged')
	plt.xticks(np.linspace(0, 1, 11), np.linspace(0, 1, 11).round(1))
	plt.grid(True)
	plt.show()

	# initial scene accuracy
	print("\noverall accuracy for initial scenes")
	tested_initial_scene_idxs = [i for i in initial_scene_idxs if i < len(all_gts)]
	initial_gts = [all_gts[i] for i in tested_initial_scene_idxs]
	initial_predictions = [all_predictions[i] for i in tested_initial_scene_idxs]
	report = classification_report(list(itertools.chain(initial_gts)), list(itertools.chain(initial_predictions)),
	output_dict=True)
	print(report)

	# break down by the number of objects
	print("\naccuracy for # objects in scene")
	num_objects_in_scenes = np.array([len(gts) for gts in all_gts])
	unique_num_objects = np.unique(num_objects_in_scenes)
	acc_per_scene = np.array(acc_per_scene)
	assert len(acc_per_scene) == len(num_objects_in_scenes)
	for num_objects in unique_num_objects:
	this_scene_idxs = [i for i in range(len(all_gts)) if len(all_gts[i]) == num_objects]
	this_num_obj_gts = [all_gts[i] for i in this_scene_idxs]
	this_num_obj_predictions = [all_predictions[i] for i in this_scene_idxs]
	report = classification_report(list(itertools.chain(this_num_obj_gts)), list(itertools.chain(this_num_obj_predictions)),
	output_dict=True)
	print("{} objects".format(num_objects))
	print(report)

	# reference
	print("\noverall accuracy break down")
	direct_gts_by_type = defaultdict(list)
	direct_preds_by_type = defaultdict(list)
	d_anchor_gts_by_type = defaultdict(list)
	d_anchor_preds_by_type = defaultdict(list)
	c_anchor_gts_by_type = defaultdict(list)
	c_anchor_preds_by_type = defaultdict(list)

	for i, s in enumerate(all_sentences):
	v, t = s[0]
	if t[-2:] == "_c" or t[-2:] == "_d":
	t = t[:-2]
	if v != "MASK" and t in tokenizer.discrete_types:
	# direct reference
	direct_gts_by_type[t].extend(all_gts[i])
	direct_preds_by_type[t].extend(all_predictions[i])
	else:
	if v == "MASK":
	# discrete anchor
	d_anchor_gts_by_type[t].extend(all_gts[i])
	d_anchor_preds_by_type[t].extend(all_predictions[i])
	else:
	c_anchor_gts_by_type[t].extend(all_gts[i])
	c_anchor_preds_by_type[t].extend(all_predictions[i])

	print("direct")
	for t in direct_gts_by_type:
	report = classification_report(direct_gts_by_type[t], direct_preds_by_type[t], output_dict=True)
	print(t, report)

	print("discrete anchor")
	for t in d_anchor_gts_by_type:
	report = classification_report(d_anchor_gts_by_type[t], d_anchor_preds_by_type[t], output_dict=True)
	print(t, report)

	print("continuous anchor")
	for t in c_anchor_gts_by_type:
	report = classification_report(c_anchor_gts_by_type[t], c_anchor_preds_by_type[t], output_dict=True)
	print(t, report)

	# break down by object class


	def combine_and_sample_xyzs(xyzs, rgbs, center=None, radius=0.5, num_pts=1024):
	xyz = torch.cat(xyzs, dim=0)
	rgb = torch.cat(rgbs, dim=0)

	if center is not None:
	center = center.repeat(xyz.shape[0], 1)
	dists = torch.linalg.norm(xyz - center, dim=-1)
	idx = dists < radius
	xyz = xyz[idx]
	rgb = rgb[idx]

	idx = np.random.randint(0, xyz.shape[0], num_pts)
	xyz = xyz[idx]
	rgb = rgb[idx]

	return xyz, rgb


	def evaluate_prior_prediction(gts, predictions, keys, debug=False):
	"""
	:param gts: expect a list of tensors
	:param predictions: expect a list of tensor
	:return:
	"""

	total_mses = 0
	obj_dists = []
	struct_dists = []
	for key in keys:
	# predictions[key][0]: [batch_size * number_of_objects, dim]
	predictions_for_key = torch.cat(predictions[key], dim=0)
	# gts[key][0]: [batch_size * number_of_objects, dim]
	gts_for_key = torch.cat(gts[key], dim=0)

	assert gts_for_key.shape == predictions_for_key.shape

	target_indices = gts_for_key != -100
	gts_for_key = gts_for_key[target_indices]
	predictions_for_key = predictions_for_key[target_indices]
	num_objects = len(predictions_for_key)

	distances = predictions_for_key - gts_for_key

	me = torch.mean(torch.abs(distances))
	mse = torch.mean(distances ** 2)
	med = torch.median(torch.abs(distances))

	if "obj_x" in key or "obj_y" in key or "obj_z" in key:
	obj_dists.append(distances)
	if "struct_x" in key or "struct_y" in key or "struct_z" in key:
	struct_dists.append(distances)

	if debug:
	print("Groundtruths:")
	print(gts_for_key[:100])
	print("Predictions")
	print(predictions_for_key[:100])

	print("{} ME for {} objects: {}".format(key, num_objects, me))
	print("{} MSE for {} objects: {}".format(key, num_objects, mse))
	print("{} MEDIAN for {} objects: {}".format(key, num_objects, med))
	total_mses += mse

	if "theta" in key:
	predictions_for_key = predictions_for_key.reshape(-1, 3, 3)
	gts_for_key = gts_for_key.reshape(-1, 3, 3)
	geodesic_distance = compute_geodesic_distance_from_two_matrices(predictions_for_key, gts_for_key)
	geodesic_distance = torch.rad2deg(geodesic_distance)
	mgd = torch.mean(geodesic_distance)
	stdgd = torch.std(geodesic_distance)
	megd = torch.median(geodesic_distance)
	print("{} Mean and std Geodesic Distance for {} objects: {} +- {}".format(key, num_objects, mgd, stdgd))
	print("{} Median Geodesic Distance for {} objects: {}".format(key, num_objects, megd))

	if obj_dists:
	euclidean_dists = torch.sqrt(obj_dists[0]2 + obj_dists[1]2 + obj_dists[2]**2)
	me = torch.mean(euclidean_dists)
	stde = torch.std(euclidean_dists)
	med = torch.median(euclidean_dists)
	print("Mean and std euclidean dist for {} objects: {} +- {}".format(len(euclidean_dists), me, stde))
	print("Median euclidean dist for {} objects: {}".format(len(euclidean_dists), med))
	if struct_dists:
	euclidean_dists = torch.sqrt(struct_dists[0] 2 + struct_dists[1] 2 + struct_dists[2] ** 2)
	me = torch.mean(euclidean_dists)
	stde = torch.std(euclidean_dists)
	med = torch.median(euclidean_dists)
	print("Mean euclidean dist for {} structures: {} +- {}".format(len(euclidean_dists), me, stde))
	print("Median euclidean dist for {} structures: {}".format(len(euclidean_dists), med))

	return -total_mses


	def generate_square_subsequent_mask(sz):
	mask = (torch.triu(torch.ones((sz, sz))) == 1).transpose(0, 1)
	mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
	return mask


	def visualize_occ(points, occupancies, in_num_pts=1000, out_num_pts=1000, visualize=False, threshold=0.5):

	rix = np.random.permutation(points.shape[0])
	vis_points = points[rix]
	vis_occupancies = occupancies[rix]
	in_pc = vis_points[vis_occupancies.squeeze() > threshold, :][:in_num_pts]
	out_pc = vis_points[vis_occupancies.squeeze() < threshold, :][:out_num_pts]

	if len(in_pc) == 0:
	print("no in points")
	if len(out_pc) == 0:
	print("no out points")

	in_pc = trimesh.PointCloud(in_pc)
	out_pc = trimesh.PointCloud(out_pc)
	in_pc.colors = np.tile((255, 0, 0, 255), (in_pc.vertices.shape[0], 1))
	out_pc.colors = np.tile((255, 255, 0, 120), (out_pc.vertices.shape[0], 1))

	if visualize:
	scene = trimesh.Scene([in_pc, out_pc])
	scene.show()

	return in_pc, out_pc


	def save_dict_to_h5(dict_data, filename):
	fh = h5py.File(filename, 'w')
	for k in dict_data:
	key_data = dict_data[k]
	if key_data is None:
	raise RuntimeError('data was not properly populated')
	# if type(key_data) is dict:
	# key_data = json.dumps(key_data, sort_keys=True)
	try:
	fh.create_dataset(k, data=key_data)
	except TypeError as e:
	print("Failure on key", k)
	print(key_data)
	print(e)
	raise e
	fh.close()


	def load_h5_key(h5, key):
	if key in h5:
	return h5[key][()]
	elif "json_" + key in h5:
	return json.loads(h5["json_" + key][()])
	else:
	return None


	def load_dict_from_h5(filename):
	h5 = h5py.File(filename, "r")
	data_dict = {}
	for k in h5:
	data_dict[k] = h5[k][()]
	return data_dict