Hancy's picture
init
851751e
raw
history blame
14.2 kB
import glob
import os
import pickle
import numpy as np
import yaml
from PIL import Image
import xml.etree.ElementTree as ET
from lidm.data.base import DatasetBase
from .annotated_dataset import Annotated3DObjectsDataset
from .conditional_builder.utils import corners_3d_to_2d
from .helper_types import Annotation
from ..utils.lidar_utils import pcd2range, pcd2coord2d, range2pcd
# TODO add annotation categories and semantic categories
CATEGORIES = ['ignore', 'car', 'bicycle', 'motorcycle', 'truck', 'other-vehicle', 'person', 'bicyclist', 'motorcyclist',
'road', 'parking', 'sidewalk', 'other-ground', 'building', 'fence', 'vegetation', 'trunk', 'terrain',
'pole', 'traffic-sign']
CATE2LABEL = {k: v for v, k in enumerate(CATEGORIES)} # 0: invalid, 1~10: categories
LABEL2RGB = np.array([(0, 0, 0), (0, 0, 142), (119, 11, 32), (0, 0, 230), (0, 0, 70), (0, 0, 90), (220, 20, 60),
(255, 0, 0), (0, 0, 110), (128, 64, 128), (250, 170, 160), (244, 35, 232), (230, 150, 140),
(70, 70, 70), (190, 153, 153), (107, 142, 35), (0, 80, 100), (230, 150, 140), (153, 153, 153),
(220, 220, 0)])
CAMERAS = ['CAM_FRONT']
BBOX_CATS = ['car', 'people', 'cycle']
BBOX_CAT2LABEL = {'car': 0, 'truck': 0, 'bus': 0, 'caravan': 0, 'person': 1, 'rider': 2, 'motorcycle': 2, 'bicycle': 2}
# train + test
SEM_KITTI_TRAIN_SET = ['00', '01', '02', '03', '04', '05', '06', '07', '09', '10']
KITTI_TRAIN_SET = SEM_KITTI_TRAIN_SET + ['11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21']
KITTI360_TRAIN_SET = ['00', '02', '04', '05', '06', '07', '09', '10'] + ['08'] # partial test data at '02' sequence
CAM_KITTI360_TRAIN_SET = ['00', '04', '05', '06', '07', '08', '09', '10'] # cam mismatch lidar in '02'
# validation
SEM_KITTI_VAL_SET = KITTI_VAL_SET = ['08']
CAM_KITTI360_VAL_SET = KITTI360_VAL_SET = ['03']
class KITTIBase(DatasetBase):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.dataset_name = 'kitti'
self.num_sem_cats = kwargs['dataset_config'].num_sem_cats + 1
@staticmethod
def load_lidar_sweep(path):
scan = np.fromfile(path, dtype=np.float32)
scan = scan.reshape((-1, 4))
points = scan[:, 0:3] # get xyz
return points
def load_semantic_map(self, path, pcd):
raise NotImplementedError
def load_camera(self, path):
raise NotImplementedError
def __getitem__(self, idx):
example = dict()
data_path = self.data[idx]
# lidar point cloud
sweep = self.load_lidar_sweep(data_path)
if self.lidar_transform:
sweep, _ = self.lidar_transform(sweep, None)
if self.condition_key == 'segmentation':
# semantic maps
proj_range, sem_map = self.load_semantic_map(data_path, sweep)
example[self.condition_key] = sem_map
else:
proj_range, _ = pcd2range(sweep, self.img_size, self.fov, self.depth_range)
proj_range, proj_mask = self.process_scan(proj_range)
example['image'], example['mask'] = proj_range, proj_mask
if self.return_pcd:
reproj_sweep, _, _ = range2pcd(proj_range[0] * .5 + .5, self.fov, self.depth_range, self.depth_scale, self.log_scale)
example['raw'] = sweep
example['reproj'] = reproj_sweep.astype(np.float32)
# image degradation
if self.degradation_transform:
degraded_proj_range = self.degradation_transform(proj_range)
example['degraded_image'] = degraded_proj_range
# cameras
if self.condition_key == 'camera':
cameras = self.load_camera(data_path)
example[self.condition_key] = cameras
return example
class SemanticKITTIBase(KITTIBase):
def __init__(self, **kwargs):
super().__init__(**kwargs)
assert self.condition_key in ['segmentation'] # for segmentation input only
self.label2rgb = LABEL2RGB
def prepare_data(self):
# read data paths from KITTI
for seq_id in eval('SEM_KITTI_%s_SET' % self.split.upper()):
self.data.extend(glob.glob(os.path.join(
self.data_root, f'dataset/sequences/{seq_id}/velodyne/*.bin')))
# read label mapping
data_config = yaml.safe_load(open('./data/config/semantic-kitti.yaml', 'r'))
remap_dict = data_config["learning_map"]
max_key = max(remap_dict.keys())
self.learning_map = np.zeros((max_key + 100), dtype=np.int32)
self.learning_map[list(remap_dict.keys())] = list(remap_dict.values())
def load_semantic_map(self, path, pcd):
label_path = path.replace('velodyne', 'labels').replace('.bin', '.label')
labels = np.fromfile(label_path, dtype=np.uint32)
labels = labels.reshape((-1))
labels = labels & 0xFFFF # semantic label in lower half
labels = self.learning_map[labels]
proj_range, sem_map = pcd2range(pcd, self.img_size, self.fov, self.depth_range, labels=labels)
# sem_map = np.expand_dims(sem_map, axis=0).astype(np.int64)
sem_map = sem_map.astype(np.int64)
if self.filtered_map_cats is not None:
sem_map[np.isin(sem_map, self.filtered_map_cats)] = 0 # set filtered category as noise
onehot = np.eye(self.num_sem_cats, dtype=np.float32)[sem_map].transpose(2, 0, 1)
return proj_range, onehot
class SemanticKITTITrain(SemanticKITTIBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/SemanticKITTI', split='train', **kwargs)
class SemanticKITTIValidation(SemanticKITTIBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/SemanticKITTI', split='val', **kwargs)
class KITTI360Base(KITTIBase):
def __init__(self, split_per_view=None, **kwargs):
super().__init__(**kwargs)
self.split_per_view = split_per_view
if self.condition_key == 'camera':
assert self.split_per_view is not None, 'For camera-to-lidar, need to specify split_per_view'
def prepare_data(self):
# read data paths
self.data = []
if self.condition_key == 'camera':
seq_list = eval('CAM_KITTI360_%s_SET' % self.split.upper())
else:
seq_list = eval('KITTI360_%s_SET' % self.split.upper())
for seq_id in seq_list:
self.data.extend(glob.glob(os.path.join(
self.data_root, f'data_3d_raw/2013_05_28_drive_00{seq_id}_sync/velodyne_points/data/*.bin')))
def random_drop_camera(self, camera_list):
if np.random.rand() < self.aug_config['camera_drop'] and self.split == 'train':
camera_list = [np.zeros_like(c) if i != len(camera_list) // 2 else c for i, c in enumerate(camera_list)] # keep the middle view only
return camera_list
def load_camera(self, path):
camera_path = path.replace('data_3d_raw', 'data_2d_camera').replace('velodyne_points/data', 'image_00/data_rect').replace('.bin', '.png')
camera = np.array(Image.open(camera_path)).astype(np.float32) / 255.
camera = camera.transpose(2, 0, 1)
if self.view_transform:
camera = self.view_transform(camera)
camera_list = np.split(camera, self.split_per_view, axis=2) # split into n chunks as different views
camera_list = self.random_drop_camera(camera_list)
return camera_list
class KITTI360Train(KITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='train', **kwargs)
class KITTI360Validation(KITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='val', **kwargs)
class AnnotatedKITTI360Base(Annotated3DObjectsDataset, KITTI360Base):
def __init__(self, **kwargs):
self.id_bbox_dict = dict()
self.id_label_dict = dict()
Annotated3DObjectsDataset.__init__(self, **kwargs)
KITTI360Base.__init__(self, **kwargs)
assert self.condition_key in ['center', 'bbox'] # for annotated images only
@staticmethod
def parseOpencvMatrix(node):
rows = int(node.find('rows').text)
cols = int(node.find('cols').text)
data = node.find('data').text.split(' ')
mat = []
for d in data:
d = d.replace('\n', '')
if len(d) < 1:
continue
mat.append(float(d))
mat = np.reshape(mat, [rows, cols])
return mat
def parseVertices(self, child):
transform = self.parseOpencvMatrix(child.find('transform'))
R = transform[:3, :3]
T = transform[:3, 3]
vertices = self.parseOpencvMatrix(child.find('vertices'))
vertices = np.matmul(R, vertices.transpose()).transpose() + T
return vertices
def parse_bbox_xml(self, path):
tree = ET.parse(path)
root = tree.getroot()
bbox_dict = dict()
label_dict = dict()
for child in root:
if child.find('transform') is None:
continue
label_name = child.find('label').text
if label_name not in BBOX_CAT2LABEL:
continue
label = BBOX_CAT2LABEL[label_name]
timestamp = int(child.find('timestamp').text)
# verts = self.parseVertices(child)
verts = self.parseOpencvMatrix(child.find('vertices'))[:8]
if timestamp in bbox_dict:
bbox_dict[timestamp].append(verts)
label_dict[timestamp].append(label)
else:
bbox_dict[timestamp] = [verts]
label_dict[timestamp] = [label]
return bbox_dict, label_dict
def prepare_data(self):
KITTI360Base.prepare_data(self)
self.data = [p for p in self.data if '2013_05_28_drive_0008_sync' not in p] # remove unlabeled sequence 08
seq_list = eval('KITTI360_%s_SET' % self.split.upper())
for seq_id in seq_list:
if seq_id != '08':
xml_path = os.path.join(self.data_root, f'data_3d_bboxes/train/2013_05_28_drive_00{seq_id}_sync.xml')
bbox_dict, label_dict = self.parse_bbox_xml(xml_path)
self.id_bbox_dict[seq_id] = bbox_dict
self.id_label_dict[seq_id] = label_dict
def load_annotation(self, path):
seq_id = path.split('/')[-4].split('_')[-2][-2:]
timestamp = int(path.split('/')[-1].replace('.bin', ''))
verts_list = self.id_bbox_dict[seq_id][timestamp]
label_list = self.id_label_dict[seq_id][timestamp]
if self.condition_key == 'bbox':
points = np.stack(verts_list)
elif self.condition_key == 'center':
points = (verts_list[0] + verts_list[6]) / 2.
else:
raise NotImplementedError
labels = np.array([label_list])
if self.anno_transform:
points, labels = self.anno_transform(points, labels)
return points, labels
def __getitem__(self, idx):
example = dict()
data_path = self.data[idx]
# lidar point cloud
sweep = self.load_lidar_sweep(data_path)
# annotations
bbox_points, bbox_labels = self.load_annotation(data_path)
if self.lidar_transform:
sweep, bbox_points = self.lidar_transform(sweep, bbox_points)
# point cloud -> range
proj_range, _ = pcd2range(sweep, self.img_size, self.fov, self.depth_range)
proj_range, proj_mask = self.process_scan(proj_range)
example['image'], example['mask'] = proj_range, proj_mask
if self.return_pcd:
example['reproj'] = sweep
# annotation -> range
# NOTE: do not need to transform bbox points along with lidar, since their coordinates are based on range-image space instead of 3D space
proj_bbox_points, proj_bbox_labels = pcd2coord2d(bbox_points, self.fov, self.depth_range, labels=bbox_labels)
builder = self.conditional_builders[self.condition_key]
if self.condition_key == 'bbox':
proj_bbox_points = corners_3d_to_2d(proj_bbox_points)
annotations = [Annotation(bbox=bbox.flatten(), category_id=label) for bbox, label in
zip(proj_bbox_points, proj_bbox_labels)]
else:
annotations = [Annotation(center=center, category_id=label) for center, label in
zip(proj_bbox_points, proj_bbox_labels)]
example[self.condition_key] = builder.build(annotations)
return example
class AnnotatedKITTI360Train(AnnotatedKITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='train', cats=BBOX_CATS, **kwargs)
class AnnotatedKITTI360Validation(AnnotatedKITTI360Base):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset/KITTI-360', split='train', cats=BBOX_CATS, **kwargs)
class KITTIImageBase(KITTIBase):
"""
Range ImageSet only combining KITTI-360 and SemanticKITTI
#Samples (Training): 98014, #Samples (Val): 3511
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
assert self.condition_key in [None, 'image'] # for image input only
def prepare_data(self):
# read data paths from KITTI-360
self.data = []
for seq_id in eval('KITTI360_%s_SET' % self.split.upper()):
self.data.extend(glob.glob(os.path.join(
self.data_root, f'KITTI-360/data_3d_raw/2013_05_28_drive_00{seq_id}_sync/velodyne_points/data/*.bin')))
# read data paths from KITTI
for seq_id in eval('KITTI_%s_SET' % self.split.upper()):
self.data.extend(glob.glob(os.path.join(
self.data_root, f'SemanticKITTI/dataset/sequences/{seq_id}/velodyne/*.bin')))
class KITTIImageTrain(KITTIImageBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset', split='train', **kwargs)
class KITTIImageValidation(KITTIImageBase):
def __init__(self, **kwargs):
super().__init__(data_root='./dataset', split='val', **kwargs)