Spaces:
Running
Running
File size: 7,034 Bytes
499e141 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
from pathlib import Path
import typing
import logging
import numpy as np
from transforms3d.quaternions import qinverse, rotate_vector, qmult
VARIANTS_ANGLE_SIN = 'sin'
VARIANTS_ANGLE_COS = 'cos'
def convert_world2cam_to_cam2world(q, t):
qinv = qinverse(q)
tinv = -rotate_vector(t, qinv)
return qinv, tinv
def load_poses(file: typing.IO, load_confidence: bool = False):
"""Load poses from text file and converts them to cam2world convention (t is the camera center in world coordinates)
The text file encodes world2cam poses with the format:
imgpath qw qx qy qz tx ty tz [confidence]
where qw qx qy qz is the quaternion encoding rotation,
and tx ty tz is the translation vector,
and confidence is a float encoding confidence, for estimated poses
"""
expected_parts = 9 if load_confidence else 8
poses = dict()
for line_number, line in enumerate(file.readlines()):
parts = tuple(line.strip().split(' '))
# if 'tensor' in parts[-1]:
# print('ERROR: confidence is a tensor')
# parts = list(parts)
# parts[-1] = parts[-1].split('[')[-1].split(']')[0]
if len(parts) != expected_parts:
logging.warning(
f'Invalid number of fields in file {file.name} line {line_number}.'
f' Expected {expected_parts}, received {len(parts)}. Ignoring line.')
continue
try:
name = parts[0]
if '#' in name:
logging.info(f'Ignoring comment line in {file.name} line {line_number}')
continue
frame_num = int(name[-9:-4])
except ValueError:
logging.warning(
f'Invalid frame number in file {file.name} line {line_number}.'
f' Expected formatting "seq1/frame_00000.jpg". Ignoring line.')
continue
try:
parts_float = tuple(map(float, parts[1:]))
if any(np.isnan(v) or np.isinf(v) for v in parts_float):
raise ValueError()
qw, qx, qy, qz, tx, ty, tz = parts_float[:7]
confidence = parts_float[7] if load_confidence else None
except ValueError:
logging.warning(
f'Error parsing pose in file {file.name} line {line_number}. Ignoring line.')
continue
q = np.array((qw, qx, qy, qz), dtype=np.float64)
t = np.array((tx, ty, tz), dtype=np.float64)
if np.isclose(np.linalg.norm(q), 0):
logging.warning(
f'Error parsing pose in file {file.name} line {line_number}. '
'Quaternion must have non-zero norm. Ignoring line.')
continue
q, t = convert_world2cam_to_cam2world(q, t)
poses[frame_num] = (q, t, confidence)
return poses
def subsample_poses(poses: dict, subsample: int = 1):
return {k: v for i, (k, v) in enumerate(poses.items()) if i % subsample == 0}
def load_K(file_path: Path):
K = dict()
with file_path.open('r', encoding='utf-8') as f:
for line in f.readlines():
if '#' in line:
continue
line = line.strip().split(' ')
frame_num = int(line[0][-9:-4])
fx, fy, cx, cy, W, H = map(float, line[1:])
K[frame_num] = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float32)
return K, W, H
def quat_angle_error(label, pred, variant=VARIANTS_ANGLE_SIN) -> np.ndarray:
assert label.shape == (4,)
assert pred.shape == (4,)
assert variant in (VARIANTS_ANGLE_SIN, VARIANTS_ANGLE_COS), \
f"Need variant to be in ({VARIANTS_ANGLE_SIN}, {VARIANTS_ANGLE_COS})"
if len(label.shape) == 1:
label = np.expand_dims(label, axis=0)
if len(label.shape) != 2 or label.shape[0] != 1 or label.shape[1] != 4:
raise RuntimeError(f"Unexpected shape of label: {label.shape}, expected: (1, 4)")
if len(pred.shape) == 1:
pred = np.expand_dims(pred, axis=0)
if len(pred.shape) != 2 or pred.shape[0] != 1 or pred.shape[1] != 4:
raise RuntimeError(f"Unexpected shape of pred: {pred.shape}, expected: (1, 4)")
label = label.astype(np.float64)
pred = pred.astype(np.float64)
q1 = pred / np.linalg.norm(pred, axis=1, keepdims=True)
q2 = label / np.linalg.norm(label, axis=1, keepdims=True)
if variant == VARIANTS_ANGLE_COS:
d = np.abs(np.sum(np.multiply(q1, q2), axis=1, keepdims=True))
d = np.clip(d, a_min=-1, a_max=1)
angle = 2. * np.degrees(np.arccos(d))
elif variant == VARIANTS_ANGLE_SIN:
if q1.shape[0] != 1 or q2.shape[0] != 1:
raise NotImplementedError(f"Multiple angles is todo")
# https://www.researchgate.net/post/How_do_I_calculate_the_smallest_angle_between_two_quaternions/5d6ed4a84f3a3e1ed3656616/citation/download
sine = qmult(q1[0], qinverse(q2[0])) # note: takes first element in 2D array
# 114.59 = 2. * 180. / pi
angle = np.arcsin(np.linalg.norm(sine[1:], keepdims=True)) * 114.59155902616465
angle = np.expand_dims(angle, axis=0)
return angle.astype(np.float64)
def precision_recall(inliers, tp, failures):
"""
Computes Precision/Recall plot for a set of poses given inliers (confidence) and wether the
estimated pose error (whatever it may be) is within a threshold.
Each point in the plot is obtained by choosing a threshold for inliers (i.e. inlier_thr).
Recall measures how many images have inliers >= inlier_thr
Precision measures how many images that have inliers >= inlier_thr have
estimated pose error <= pose_threshold (measured by counting tps)
Where pose_threshold is (trans_thr[m], rot_thr[deg])
Inputs:
- inliers [N]
- terr [N]
- rerr [N]
- failures (int)
- pose_threshold (tuple float)
Output
- precision [N]
- recall [N]
- average_precision (scalar)
"""
assert len(inliers) == len(tp), 'unequal shapes'
# sort by inliers (descending order)
inliers = np.array(inliers)
sort_idx = np.argsort(inliers)[::-1]
inliers = inliers[sort_idx]
tp = np.array(tp).reshape(-1)[sort_idx]
# get idxs where inliers change (avoid tied up values)
distinct_value_indices = np.where(np.diff(inliers))[0]
threshold_idxs = np.r_[distinct_value_indices, inliers.size - 1]
# compute prec/recall
N = inliers.shape[0]
rec = np.arange(N, dtype=np.float32) + 1
cum_tp = np.cumsum(tp)
prec = cum_tp[threshold_idxs] / rec[threshold_idxs]
rec = rec[threshold_idxs] / (float(N) + float(failures))
# invert order and ensures (prec=1, rec=0) point
last_ind = rec.searchsorted(rec[-1])
sl = slice(last_ind, None, -1)
prec = np.r_[prec[sl], 1]
rec = np.r_[rec[sl], 0]
# compute average precision (AUC) as the weighted average of precisions
average_precision = np.abs(np.sum(np.diff(rec) * np.array(prec)[:-1]))
return prec, rec, average_precision
|