Spaces:

Hancy
/

LiDAR-Diffusion

Running

App Files Files Community

LiDAR-Diffusion / lidm /data /kitti.py

Hancy

init

851751e about 1 year ago

raw

history blame

14.2 kB

	import glob
	import os
	import pickle
	import numpy as np
	import yaml
	from PIL import Image
	import xml.etree.ElementTree as ET

	from lidm.data.base import DatasetBase
	from .annotated_dataset import Annotated3DObjectsDataset
	from .conditional_builder.utils import corners_3d_to_2d
	from .helper_types import Annotation
	from ..utils.lidar_utils import pcd2range, pcd2coord2d, range2pcd

	# TODO add annotation categories and semantic categories
	CATEGORIES = ['ignore', 'car', 'bicycle', 'motorcycle', 'truck', 'other-vehicle', 'person', 'bicyclist', 'motorcyclist',
	'road', 'parking', 'sidewalk', 'other-ground', 'building', 'fence', 'vegetation', 'trunk', 'terrain',
	'pole', 'traffic-sign']
	CATE2LABEL = {k: v for v, k in enumerate(CATEGORIES)} # 0: invalid, 1~10: categories
	LABEL2RGB = np.array([(0, 0, 0), (0, 0, 142), (119, 11, 32), (0, 0, 230), (0, 0, 70), (0, 0, 90), (220, 20, 60),
	(255, 0, 0), (0, 0, 110), (128, 64, 128), (250, 170, 160), (244, 35, 232), (230, 150, 140),
	(70, 70, 70), (190, 153, 153), (107, 142, 35), (0, 80, 100), (230, 150, 140), (153, 153, 153),
	(220, 220, 0)])
	CAMERAS = ['CAM_FRONT']
	BBOX_CATS = ['car', 'people', 'cycle']
	BBOX_CAT2LABEL = {'car': 0, 'truck': 0, 'bus': 0, 'caravan': 0, 'person': 1, 'rider': 2, 'motorcycle': 2, 'bicycle': 2}

	# train + test
	SEM_KITTI_TRAIN_SET = ['00', '01', '02', '03', '04', '05', '06', '07', '09', '10']
	KITTI_TRAIN_SET = SEM_KITTI_TRAIN_SET + ['11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21']
	KITTI360_TRAIN_SET = ['00', '02', '04', '05', '06', '07', '09', '10'] + ['08'] # partial test data at '02' sequence
	CAM_KITTI360_TRAIN_SET = ['00', '04', '05', '06', '07', '08', '09', '10'] # cam mismatch lidar in '02'

	# validation
	SEM_KITTI_VAL_SET = KITTI_VAL_SET = ['08']
	CAM_KITTI360_VAL_SET = KITTI360_VAL_SET = ['03']


	class KITTIBase(DatasetBase):
	def __init__(self, **kwargs):
	super().__init__(**kwargs)
	self.dataset_name = 'kitti'
	self.num_sem_cats = kwargs['dataset_config'].num_sem_cats + 1

	@staticmethod
	def load_lidar_sweep(path):
	scan = np.fromfile(path, dtype=np.float32)
	scan = scan.reshape((-1, 4))
	points = scan[:, 0:3] # get xyz
	return points

	def load_semantic_map(self, path, pcd):
	raise NotImplementedError

	def load_camera(self, path):
	raise NotImplementedError

	def __getitem__(self, idx):
	example = dict()
	data_path = self.data[idx]
	# lidar point cloud
	sweep = self.load_lidar_sweep(data_path)

	if self.lidar_transform:
	sweep, _ = self.lidar_transform(sweep, None)

	if self.condition_key == 'segmentation':
	# semantic maps
	proj_range, sem_map = self.load_semantic_map(data_path, sweep)
	example[self.condition_key] = sem_map
	else:
	proj_range, _ = pcd2range(sweep, self.img_size, self.fov, self.depth_range)
	proj_range, proj_mask = self.process_scan(proj_range)
	example['image'], example['mask'] = proj_range, proj_mask
	if self.return_pcd:
	reproj_sweep, _, _ = range2pcd(proj_range[0] * .5 + .5, self.fov, self.depth_range, self.depth_scale, self.log_scale)
	example['raw'] = sweep
	example['reproj'] = reproj_sweep.astype(np.float32)

	# image degradation
	if self.degradation_transform:
	degraded_proj_range = self.degradation_transform(proj_range)
	example['degraded_image'] = degraded_proj_range

	# cameras
	if self.condition_key == 'camera':
	cameras = self.load_camera(data_path)
	example[self.condition_key] = cameras

	return example


	class SemanticKITTIBase(KITTIBase):
	def __init__(self, **kwargs):
	super().__init__(**kwargs)
	assert self.condition_key in ['segmentation'] # for segmentation input only
	self.label2rgb = LABEL2RGB

	def prepare_data(self):
	# read data paths from KITTI
	for seq_id in eval('SEM_KITTI_%s_SET' % self.split.upper()):
	self.data.extend(glob.glob(os.path.join(
	self.data_root, f'dataset/sequences/{seq_id}/velodyne/*.bin')))
	# read label mapping
	data_config = yaml.safe_load(open('./data/config/semantic-kitti.yaml', 'r'))
	remap_dict = data_config["learning_map"]
	max_key = max(remap_dict.keys())
	self.learning_map = np.zeros((max_key + 100), dtype=np.int32)
	self.learning_map[list(remap_dict.keys())] = list(remap_dict.values())

	def load_semantic_map(self, path, pcd):
	label_path = path.replace('velodyne', 'labels').replace('.bin', '.label')
	labels = np.fromfile(label_path, dtype=np.uint32)
	labels = labels.reshape((-1))
	labels = labels & 0xFFFF # semantic label in lower half
	labels = self.learning_map[labels]

	proj_range, sem_map = pcd2range(pcd, self.img_size, self.fov, self.depth_range, labels=labels)
	# sem_map = np.expand_dims(sem_map, axis=0).astype(np.int64)
	sem_map = sem_map.astype(np.int64)
	if self.filtered_map_cats is not None:
	sem_map[np.isin(sem_map, self.filtered_map_cats)] = 0 # set filtered category as noise
	onehot = np.eye(self.num_sem_cats, dtype=np.float32)[sem_map].transpose(2, 0, 1)
	return proj_range, onehot


	class SemanticKITTITrain(SemanticKITTIBase):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset/SemanticKITTI', split='train', **kwargs)


	class SemanticKITTIValidation(SemanticKITTIBase):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset/SemanticKITTI', split='val', **kwargs)


	class KITTI360Base(KITTIBase):
	def __init__(self, split_per_view=None, **kwargs):
	super().__init__(**kwargs)
	self.split_per_view = split_per_view
	if self.condition_key == 'camera':
	assert self.split_per_view is not None, 'For camera-to-lidar, need to specify split_per_view'

	def prepare_data(self):
	# read data paths
	self.data = []
	if self.condition_key == 'camera':
	seq_list = eval('CAM_KITTI360_%s_SET' % self.split.upper())
	else:
	seq_list = eval('KITTI360_%s_SET' % self.split.upper())
	for seq_id in seq_list:
	self.data.extend(glob.glob(os.path.join(
	self.data_root, f'data_3d_raw/2013_05_28_drive_00{seq_id}_sync/velodyne_points/data/*.bin')))

	def random_drop_camera(self, camera_list):
	if np.random.rand() < self.aug_config['camera_drop'] and self.split == 'train':
	camera_list = [np.zeros_like(c) if i != len(camera_list) // 2 else c for i, c in enumerate(camera_list)] # keep the middle view only
	return camera_list

	def load_camera(self, path):
	camera_path = path.replace('data_3d_raw', 'data_2d_camera').replace('velodyne_points/data', 'image_00/data_rect').replace('.bin', '.png')
	camera = np.array(Image.open(camera_path)).astype(np.float32) / 255.
	camera = camera.transpose(2, 0, 1)
	if self.view_transform:
	camera = self.view_transform(camera)
	camera_list = np.split(camera, self.split_per_view, axis=2) # split into n chunks as different views
	camera_list = self.random_drop_camera(camera_list)
	return camera_list


	class KITTI360Train(KITTI360Base):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset/KITTI-360', split='train', **kwargs)


	class KITTI360Validation(KITTI360Base):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset/KITTI-360', split='val', **kwargs)


	class AnnotatedKITTI360Base(Annotated3DObjectsDataset, KITTI360Base):
	def __init__(self, **kwargs):
	self.id_bbox_dict = dict()
	self.id_label_dict = dict()

	Annotated3DObjectsDataset.__init__(self, **kwargs)
	KITTI360Base.__init__(self, **kwargs)
	assert self.condition_key in ['center', 'bbox'] # for annotated images only

	@staticmethod
	def parseOpencvMatrix(node):
	rows = int(node.find('rows').text)
	cols = int(node.find('cols').text)
	data = node.find('data').text.split(' ')

	mat = []
	for d in data:
	d = d.replace('\n', '')
	if len(d) < 1:
	continue
	mat.append(float(d))
	mat = np.reshape(mat, [rows, cols])
	return mat

	def parseVertices(self, child):
	transform = self.parseOpencvMatrix(child.find('transform'))
	R = transform[:3, :3]
	T = transform[:3, 3]
	vertices = self.parseOpencvMatrix(child.find('vertices'))
	vertices = np.matmul(R, vertices.transpose()).transpose() + T
	return vertices

	def parse_bbox_xml(self, path):
	tree = ET.parse(path)
	root = tree.getroot()

	bbox_dict = dict()
	label_dict = dict()
	for child in root:
	if child.find('transform') is None:
	continue

	label_name = child.find('label').text
	if label_name not in BBOX_CAT2LABEL:
	continue

	label = BBOX_CAT2LABEL[label_name]
	timestamp = int(child.find('timestamp').text)
	# verts = self.parseVertices(child)
	verts = self.parseOpencvMatrix(child.find('vertices'))[:8]
	if timestamp in bbox_dict:
	bbox_dict[timestamp].append(verts)
	label_dict[timestamp].append(label)
	else:
	bbox_dict[timestamp] = [verts]
	label_dict[timestamp] = [label]
	return bbox_dict, label_dict

	def prepare_data(self):
	KITTI360Base.prepare_data(self)

	self.data = [p for p in self.data if '2013_05_28_drive_0008_sync' not in p] # remove unlabeled sequence 08
	seq_list = eval('KITTI360_%s_SET' % self.split.upper())
	for seq_id in seq_list:
	if seq_id != '08':
	xml_path = os.path.join(self.data_root, f'data_3d_bboxes/train/2013_05_28_drive_00{seq_id}_sync.xml')
	bbox_dict, label_dict = self.parse_bbox_xml(xml_path)
	self.id_bbox_dict[seq_id] = bbox_dict
	self.id_label_dict[seq_id] = label_dict

	def load_annotation(self, path):
	seq_id = path.split('/')[-4].split('_')[-2][-2:]
	timestamp = int(path.split('/')[-1].replace('.bin', ''))
	verts_list = self.id_bbox_dict[seq_id][timestamp]
	label_list = self.id_label_dict[seq_id][timestamp]

	if self.condition_key == 'bbox':
	points = np.stack(verts_list)
	elif self.condition_key == 'center':
	points = (verts_list[0] + verts_list[6]) / 2.
	else:
	raise NotImplementedError
	labels = np.array([label_list])
	if self.anno_transform:
	points, labels = self.anno_transform(points, labels)
	return points, labels

	def __getitem__(self, idx):
	example = dict()
	data_path = self.data[idx]

	# lidar point cloud
	sweep = self.load_lidar_sweep(data_path)

	# annotations
	bbox_points, bbox_labels = self.load_annotation(data_path)

	if self.lidar_transform:
	sweep, bbox_points = self.lidar_transform(sweep, bbox_points)

	# point cloud -> range
	proj_range, _ = pcd2range(sweep, self.img_size, self.fov, self.depth_range)
	proj_range, proj_mask = self.process_scan(proj_range)
	example['image'], example['mask'] = proj_range, proj_mask
	if self.return_pcd:
	example['reproj'] = sweep

	# annotation -> range
	# NOTE: do not need to transform bbox points along with lidar, since their coordinates are based on range-image space instead of 3D space
	proj_bbox_points, proj_bbox_labels = pcd2coord2d(bbox_points, self.fov, self.depth_range, labels=bbox_labels)
	builder = self.conditional_builders[self.condition_key]
	if self.condition_key == 'bbox':
	proj_bbox_points = corners_3d_to_2d(proj_bbox_points)
	annotations = [Annotation(bbox=bbox.flatten(), category_id=label) for bbox, label in
	zip(proj_bbox_points, proj_bbox_labels)]
	else:
	annotations = [Annotation(center=center, category_id=label) for center, label in
	zip(proj_bbox_points, proj_bbox_labels)]
	example[self.condition_key] = builder.build(annotations)

	return example


	class AnnotatedKITTI360Train(AnnotatedKITTI360Base):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset/KITTI-360', split='train', cats=BBOX_CATS, **kwargs)


	class AnnotatedKITTI360Validation(AnnotatedKITTI360Base):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset/KITTI-360', split='train', cats=BBOX_CATS, **kwargs)


	class KITTIImageBase(KITTIBase):
	"""
	Range ImageSet only combining KITTI-360 and SemanticKITTI

	#Samples (Training): 98014, #Samples (Val): 3511

	"""
	def __init__(self, **kwargs):
	super().__init__(**kwargs)
	assert self.condition_key in [None, 'image'] # for image input only

	def prepare_data(self):
	# read data paths from KITTI-360
	self.data = []
	for seq_id in eval('KITTI360_%s_SET' % self.split.upper()):
	self.data.extend(glob.glob(os.path.join(
	self.data_root, f'KITTI-360/data_3d_raw/2013_05_28_drive_00{seq_id}_sync/velodyne_points/data/*.bin')))

	# read data paths from KITTI
	for seq_id in eval('KITTI_%s_SET' % self.split.upper()):
	self.data.extend(glob.glob(os.path.join(
	self.data_root, f'SemanticKITTI/dataset/sequences/{seq_id}/velodyne/*.bin')))


	class KITTIImageTrain(KITTIImageBase):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset', split='train', **kwargs)


	class KITTIImageValidation(KITTIImageBase):
	def __init__(self, **kwargs):
	super().__init__(data_root='./dataset', split='val', **kwargs)