Realcat
fix: eloftr
63f3cf2
# -*- coding: UTF-8 -*-
'''=================================================
@Project -> File pram -> basicdataset
@IDE PyCharm
@Author [email protected]
@Date 29/01/2024 14:27
=================================================='''
import torchvision.transforms.functional as tvf
import torchvision.transforms as tvt
import os.path as osp
import numpy as np
import cv2
from colmap_utils.read_write_model import qvec2rotmat, read_model
from dataset.utils import normalize_size
class BasicDataset:
def __init__(self,
img_list_fn,
feature_dir,
sfm_path,
seg_fn,
dataset_path,
n_class,
dataset,
nfeatures=1024,
query_p3d_fn=None,
train=True,
with_aug=False,
min_inliers=0,
max_inliers=4096,
random_inliers=False,
jitter_params=None,
scale_params=None,
image_dim=1,
pre_load=False,
query_info_path=None,
sc_mean_scale_fn=None,
):
self.n_class = n_class
self.train = train
self.min_inliers = min_inliers
self.max_inliers = max_inliers if max_inliers < nfeatures else nfeatures
self.random_inliers = random_inliers
self.dataset_path = dataset_path
self.with_aug = with_aug
self.dataset = dataset
self.jitter_params = jitter_params
self.scale_params = scale_params
self.image_dim = image_dim
self.image_prefix = ''
train_transforms = []
if self.with_aug:
train_transforms.append(tvt.ColorJitter(
brightness=jitter_params['brightness'],
contrast=jitter_params['contrast'],
saturation=jitter_params['saturation'],
hue=jitter_params['hue']))
if jitter_params['blur'] > 0:
train_transforms.append(tvt.GaussianBlur(kernel_size=int(jitter_params['blur'])))
self.train_transforms = tvt.Compose(train_transforms)
# only for testing of query images
if not self.train:
data = np.load(query_p3d_fn, allow_pickle=True)[()]
self.img_p3d = data
else:
self.img_p3d = {}
self.img_fns = []
with open(img_list_fn, 'r') as f:
lines = f.readlines()
for l in lines:
l = l.strip()
self.img_fns.append(l)
print('Load {} images from {} for {}...'.format(len(self.img_fns), dataset, 'training' if train else 'eval'))
self.feats = {}
if train:
self.cameras, self.images, point3Ds = read_model(path=sfm_path, ext='.bin')
self.name_to_id = {image.name: i for i, image in self.images.items()}
data = np.load(seg_fn, allow_pickle=True)[()]
p3d_id = data['id']
seg_id = data['label']
self.p3d_seg = {p3d_id[i]: seg_id[i] for i in range(p3d_id.shape[0])}
self.p3d_xyzs = {}
for pid in self.p3d_seg.keys():
p3d = point3Ds[pid]
self.p3d_xyzs[pid] = p3d.xyz
with open(sc_mean_scale_fn, 'r') as f:
lines = f.readlines()
for l in lines:
l = l.strip().split()
self.mean_xyz = np.array([float(v) for v in l[:3]])
self.scale_xyz = np.array([float(v) for v in l[3:]])
if not train:
self.query_info = self.read_query_info(path=query_info_path)
self.nfeatures = nfeatures
self.feature_dir = feature_dir
print('Pre loaded {} feats, mean xyz {}, scale xyz {}'.format(len(self.feats.keys()), self.mean_xyz,
self.scale_xyz))
def normalize_p3ds(self, p3ds):
mean_p3ds = np.ceil(np.mean(p3ds, axis=0))
p3ds_ = p3ds - mean_p3ds
dx = np.max(abs(p3ds_[:, 0]))
dy = np.max(abs(p3ds_[:, 1]))
dz = np.max(abs(p3ds_[:, 2]))
scale_p3ds = np.ceil(np.array([dx, dy, dz], dtype=float).reshape(3, ))
scale_p3ds[scale_p3ds < 1] = 1
scale_p3ds[scale_p3ds == 0] = 1
return mean_p3ds, scale_p3ds
def read_query_info(self, path):
query_info = {}
with open(path, 'r') as f:
lines = f.readlines()
for l in lines:
l = l.strip().split()
image_name = l[0]
cam_model = l[1]
h, w = int(l[2]), int(l[3])
params = np.array([float(v) for v in l[4:]])
query_info[image_name] = {
'width': w,
'height': h,
'model': cam_model,
'params': params,
}
return query_info
def extract_intrinsic_extrinsic_params(self, image_id):
cam = self.cameras[self.images[image_id].camera_id]
params = cam.params
model = cam.model
if model in ("SIMPLE_PINHOLE", "SIMPLE_RADIAL", "RADIAL"):
fx = fy = params[0]
cx = params[1]
cy = params[2]
elif model in ("PINHOLE", "OPENCV", "OPENCV_FISHEYE", "FULL_OPENCV"):
fx = params[0]
fy = params[1]
cx = params[2]
cy = params[3]
else:
raise Exception("Camera model not supported")
K = np.eye(3, dtype=float)
K[0, 0] = fx
K[1, 1] = fy
K[0, 2] = cx
K[1, 2] = cy
qvec = self.images[image_id].qvec
tvec = self.images[image_id].tvec
R = qvec2rotmat(qvec=qvec)
P = np.eye(4, dtype=float)
P[:3, :3] = R
P[:3, 3] = tvec.reshape(3, )
return {'K': K, 'P': P}
def get_item_train(self, idx):
img_name = self.img_fns[idx]
if img_name in self.feats.keys():
feat_data = self.feats[img_name]
else:
feat_data = np.load(osp.join(self.feature_dir, img_name.replace('/', '+') + '.npy'), allow_pickle=True)[()]
# descs = feat_data['descriptors'] # [N, D]
scores = feat_data['scores'] # [N, 1]
kpts = feat_data['keypoints'] # [N, 2]
image_size = feat_data['image_size']
nfeat = kpts.shape[0]
# print(img_name, self.name_to_id[img_name])
p3d_ids = self.images[self.name_to_id[img_name]].point3D_ids
p3d_xyzs = np.zeros(shape=(nfeat, 3), dtype=float)
seg_ids = np.zeros(shape=(nfeat,), dtype=int) # + self.n_class - 1
for i in range(nfeat):
p3d = p3d_ids[i]
if p3d in self.p3d_seg.keys():
seg_ids[i] = self.p3d_seg[p3d] + 1 # 0 for invalid
if seg_ids[i] == -1:
seg_ids[i] = 0
if p3d in self.p3d_xyzs.keys():
p3d_xyzs[i] = self.p3d_xyzs[p3d]
seg_ids = np.array(seg_ids).reshape(-1, )
n_inliers = np.sum(seg_ids > 0)
n_outliers = np.sum(seg_ids == 0)
inlier_ids = np.where(seg_ids > 0)[0]
outlier_ids = np.where(seg_ids == 0)[0]
if n_inliers <= self.min_inliers:
sel_inliers = n_inliers
sel_outliers = self.nfeatures - sel_inliers
out_ids = np.arange(n_outliers)
np.random.shuffle(out_ids)
sel_ids = np.hstack([inlier_ids, outlier_ids[out_ids[:self.nfeatures - n_inliers]]])
else:
sel_inliers = np.random.randint(self.min_inliers, self.max_inliers)
if sel_inliers > n_inliers:
sel_inliers = n_inliers
if sel_inliers + n_outliers < self.nfeatures:
sel_inliers = self.nfeatures - n_outliers
sel_outliers = self.nfeatures - sel_inliers
in_ids = np.arange(n_inliers)
np.random.shuffle(in_ids)
sel_inlier_ids = inlier_ids[in_ids[:sel_inliers]]
out_ids = np.arange(n_outliers)
np.random.shuffle(out_ids)
sel_outlier_ids = outlier_ids[out_ids[:sel_outliers]]
sel_ids = np.hstack([sel_inlier_ids, sel_outlier_ids])
# sel_descs = descs[sel_ids]
sel_scores = scores[sel_ids]
sel_kpts = kpts[sel_ids]
sel_seg_ids = seg_ids[sel_ids]
sel_xyzs = p3d_xyzs[sel_ids]
shuffle_ids = np.arange(sel_ids.shape[0])
np.random.shuffle(shuffle_ids)
# sel_descs = sel_descs[shuffle_ids]
sel_scores = sel_scores[shuffle_ids]
sel_kpts = sel_kpts[shuffle_ids]
sel_seg_ids = sel_seg_ids[shuffle_ids]
sel_xyzs = sel_xyzs[shuffle_ids]
if sel_kpts.shape[0] < self.nfeatures:
# print(sel_descs.shape, sel_kpts.shape, sel_scores.shape, sel_seg_ids.shape, sel_xyzs.shape)
valid_sel_ids = np.array([v for v in range(sel_kpts.shape[0]) if sel_seg_ids[v] > 0], dtype=int)
# ref_sel_id = np.random.choice(valid_sel_ids, size=1)[0]
if valid_sel_ids.shape[0] == 0:
valid_sel_ids = np.array([v for v in range(sel_kpts.shape[0])], dtype=int)
random_n = self.nfeatures - sel_kpts.shape[0]
random_scores = np.random.random((random_n,))
random_kpts, random_seg_ids, random_xyzs = self.random_points_from_reference(
n=random_n,
ref_kpts=sel_kpts[valid_sel_ids],
ref_segs=sel_seg_ids[valid_sel_ids],
ref_xyzs=sel_xyzs[valid_sel_ids],
radius=5,
)
# sel_descs = np.vstack([sel_descs, random_descs])
sel_scores = np.hstack([sel_scores, random_scores])
sel_kpts = np.vstack([sel_kpts, random_kpts])
sel_seg_ids = np.hstack([sel_seg_ids, random_seg_ids])
sel_xyzs = np.vstack([sel_xyzs, random_xyzs])
gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
uids = np.unique(sel_seg_ids).tolist()
for uid in uids:
if uid == 0:
continue
gt_cls[uid] = 1
gt_n_seg[uid] = np.sum(sel_seg_ids == uid)
gt_cls_dist[uid] = np.sum(seg_ids == uid) / np.sum(seg_ids > 0) # [valid_id / total_valid_id]
param_out = self.extract_intrinsic_extrinsic_params(image_id=self.name_to_id[img_name])
img = self.read_image(image_name=img_name)
image_size = img.shape[:2]
if self.image_dim == 1:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
else:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if self.with_aug:
nh = img.shape[0]
nw = img.shape[1]
if self.scale_params is not None:
do_scale = np.random.random()
if do_scale <= 0.25:
p = np.random.randint(0, 11)
s = self.scale_params[0] + (self.scale_params[1] - self.scale_params[0]) / 10 * p
nh = int(img.shape[0] * s)
nw = int(img.shape[1] * s)
sh = nh / img.shape[0]
sw = nw / img.shape[1]
sel_kpts[:, 0] = sel_kpts[:, 0] * sw
sel_kpts[:, 1] = sel_kpts[:, 1] * sh
img = cv2.resize(img, dsize=(nw, nh))
brightness = np.random.uniform(-self.jitter_params['brightness'], self.jitter_params['brightness']) * 255
contrast = 1 + np.random.uniform(-self.jitter_params['contrast'], self.jitter_params['contrast'])
img = cv2.addWeighted(img, contrast, img, 0, brightness)
img = np.clip(img, a_min=0, a_max=255)
if self.image_dim == 1:
img = img[..., None]
img = img.astype(float) / 255.
image_size = np.array([nh, nw], dtype=int)
else:
if self.image_dim == 1:
img = img[..., None].astype(float) / 255.
output = {
# 'descriptors': sel_descs, # may not be used
'scores': sel_scores,
'keypoints': sel_kpts,
'norm_keypoints': normalize_size(x=sel_kpts, size=image_size),
'image': [img],
'gt_seg': sel_seg_ids,
'gt_cls': gt_cls,
'gt_cls_dist': gt_cls_dist,
'gt_n_seg': gt_n_seg,
'file_name': img_name,
'prefix_name': self.image_prefix,
# 'mean_xyz': self.mean_xyz,
# 'scale_xyz': self.scale_xyz,
# 'gt_sc': sel_xyzs,
# 'gt_norm_sc': (sel_xyzs - self.mean_xyz) / self.scale_xyz,
'K': param_out['K'],
'gt_P': param_out['P']
}
return output
def get_item_test(self, idx):
# evaluation of recognition only
img_name = self.img_fns[idx]
feat_data = np.load(osp.join(self.feature_dir, img_name.replace('/', '+') + '.npy'), allow_pickle=True)[()]
descs = feat_data['descriptors'] # [N, D]
scores = feat_data['scores'] # [N, 1]
kpts = feat_data['keypoints'] # [N, 2]
image_size = feat_data['image_size']
nfeat = descs.shape[0]
if img_name in self.img_p3d.keys():
p3d_ids = self.img_p3d[img_name]
p3d_xyzs = np.zeros(shape=(nfeat, 3), dtype=float)
seg_ids = np.zeros(shape=(nfeat,), dtype=int) # attention! by default invalid!!!
for i in range(nfeat):
p3d = p3d_ids[i]
if p3d in self.p3d_seg.keys():
seg_ids[i] = self.p3d_seg[p3d] + 1
if seg_ids[i] == -1:
seg_ids[i] = 0 # 0 for in valid
if p3d in self.p3d_xyzs.keys():
p3d_xyzs[i] = self.p3d_xyzs[p3d]
seg_ids = np.array(seg_ids).reshape(-1, )
if self.nfeatures > 0:
sorted_ids = np.argsort(scores)[::-1][:self.nfeatures] # large to small
descs = descs[sorted_ids]
scores = scores[sorted_ids]
kpts = kpts[sorted_ids]
p3d_xyzs = p3d_xyzs[sorted_ids]
seg_ids = seg_ids[sorted_ids]
gt_n_seg = np.zeros(shape=(self.n_class,), dtype=int)
gt_cls = np.zeros(shape=(self.n_class,), dtype=int)
gt_cls_dist = np.zeros(shape=(self.n_class,), dtype=float)
uids = np.unique(seg_ids).tolist()
for uid in uids:
if uid == 0:
continue
gt_cls[uid] = 1
gt_n_seg[uid] = np.sum(seg_ids == uid)
gt_cls_dist[uid] = np.sum(seg_ids == uid) / np.sum(
seg_ids < self.n_class - 1) # [valid_id / total_valid_id]
gt_cls[0] = 0
img = self.read_image(image_name=img_name)
if self.image_dim == 1:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = img[..., None].astype(float) / 255.
else:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(float) / 255.
return {
'descriptors': descs,
'scores': scores,
'keypoints': kpts,
'image_size': image_size,
'norm_keypoints': normalize_size(x=kpts, size=image_size),
'gt_seg': seg_ids,
'gt_cls': gt_cls,
'gt_cls_dist': gt_cls_dist,
'gt_n_seg': gt_n_seg,
'file_name': img_name,
'prefix_name': self.image_prefix,
'image': [img],
'mean_xyz': self.mean_xyz,
'scale_xyz': self.scale_xyz,
'gt_sc': p3d_xyzs,
'gt_norm_sc': (p3d_xyzs - self.mean_xyz) / self.scale_xyz
}
def __getitem__(self, idx):
if self.train:
return self.get_item_train(idx=idx)
else:
return self.get_item_test(idx=idx)
def __len__(self):
return len(self.img_fns)
def read_image(self, image_name):
return cv2.imread(osp.join(self.dataset_path, image_name))
def jitter_augmentation(self, img, params):
brightness, contrast, saturation, hue = params
p = np.random.randint(0, 20) / 20
b = brightness[0] + (brightness[1] - brightness[0]) / 20 * p
img = tvf.adjust_brightness(img=img, brightness_factor=b)
p = np.random.randint(0, 20) / 20
c = contrast[0] + (contrast[1] - contrast[0]) / 20 * p
img = tvf.adjust_contrast(img=img, contrast_factor=c)
p = np.random.randint(0, 20) / 20
s = saturation[0] + (saturation[1] - saturation[0]) / 20 * p
img = tvf.adjust_saturation(img=img, saturation_factor=s)
p = np.random.randint(0, 20) / 20
h = hue[0] + (hue[1] - hue[0]) / 20 * p
img = tvf.adjust_hue(img=img, hue_factor=h)
return img
def random_points(self, n, d, h, w):
desc = np.random.random((n, d))
desc = desc / np.linalg.norm(desc, ord=2, axis=1)[..., None]
xs = np.random.randint(0, w - 1, size=(n, 1))
ys = np.random.randint(0, h - 1, size=(n, 1))
kpts = np.hstack([xs, ys])
return desc, kpts
def random_points_from_reference(self, n, ref_kpts, ref_segs, ref_xyzs, radius=5):
n_ref = ref_kpts.shape[0]
if n_ref < n:
ref_ids = np.random.choice([i for i in range(n_ref)], size=n).tolist()
else:
ref_ids = [i for i in range(n)]
new_xs = []
new_ys = []
# new_descs = []
new_segs = []
new_xyzs = []
for i in ref_ids:
nx = np.random.randint(-radius, radius) + ref_kpts[i, 0]
ny = np.random.randint(-radius, radius) + ref_kpts[i, 1]
new_xs.append(nx)
new_ys.append(ny)
# new_descs.append(ref_descs[i])
new_segs.append(ref_segs[i])
new_xyzs.append(ref_xyzs[i])
new_xs = np.array(new_xs).reshape(n, 1)
new_ys = np.array(new_ys).reshape(n, 1)
new_segs = np.array(new_segs).reshape(n, )
new_kpts = np.hstack([new_xs, new_ys])
# new_descs = np.array(new_descs).reshape(n, -1)
new_xyzs = np.array(new_xyzs)
return new_kpts, new_segs, new_xyzs