Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

image-matching-webui / imcui /third_party /rdd /benchmarks /mega_view.py

Realcat

add: rdd sparse and dense match

1b369eb 19 days ago

raw

history blame contribute delete

10.5 kB

	import sys
	sys.path.append(".")
	import numpy as np
	import torch
	from PIL import Image
	import tqdm
	import cv2
	import argparse
	import matplotlib.pyplot as plt
	import matplotlib
	from RDD.RDD import build
	from RDD.RDD_helper import RDD_helper
	import os
	from benchmarks.utils import pose_auc, angle_error_vec, angle_error_mat, symmetric_epipolar_distance, compute_symmetrical_epipolar_errors, compute_pose_error, compute_relative_pose, estimate_pose, dynamic_alpha

	def make_matching_figure(
	img0, img1, mkpts0, mkpts1, color,
	kpts0=None, kpts1=None, text=[], dpi=75, path=None):
	# draw image pair
	assert mkpts0.shape[0] == mkpts1.shape[0], f'mkpts0: {mkpts0.shape[0]} v.s. mkpts1: {mkpts1.shape[0]}'
	fig, axes = plt.subplots(1, 2, figsize=(10, 6), dpi=dpi)
	axes[0].imshow(img0, cmap='gray')
	axes[1].imshow(img1, cmap='gray')
	for i in range(2): # clear all frames
	axes[i].get_yaxis().set_ticks([])
	axes[i].get_xaxis().set_ticks([])
	for spine in axes[i].spines.values():
	spine.set_visible(False)
	plt.tight_layout(pad=1)

	if kpts0 is not None:
	assert kpts1 is not None
	axes[0].scatter(kpts0[:, 0], kpts0[:, 1], c='w', s=2)
	axes[1].scatter(kpts1[:, 0], kpts1[:, 1], c='w', s=2)

	# draw matches
	if mkpts0.shape[0] != 0 and mkpts1.shape[0] != 0:
	fig.canvas.draw()
	transFigure = fig.transFigure.inverted()
	fkpts0 = transFigure.transform(axes[0].transData.transform(mkpts0))
	fkpts1 = transFigure.transform(axes[1].transData.transform(mkpts1))
	fig.lines = [matplotlib.lines.Line2D((fkpts0[i, 0], fkpts1[i, 0]),
	(fkpts0[i, 1], fkpts1[i, 1]),
	transform=fig.transFigure, c=color[i], linewidth=1)
	for i in range(len(mkpts0))]

	axes[0].scatter(mkpts0[:, 0], mkpts0[:, 1], c=color, s=4)
	axes[1].scatter(mkpts1[:, 0], mkpts1[:, 1], c=color, s=4)

	# put txts
	txt_color = 'k' if img0[:100, :200].mean() > 200 else 'w'
	fig.text(
	0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes,
	fontsize=15, va='top', ha='left', color=txt_color)

	# save or return figure
	if path:
	plt.savefig(str(path), bbox_inches='tight', pad_inches=0)
	plt.close()
	else:
	return fig

	def error_colormap(err, thr, alpha=1.0):
	assert alpha <= 1.0 and alpha > 0, f"Invaid alpha value: {alpha}"
	x = 1 - np.clip(err / (thr * 2), 0, 1)
	return np.clip(
	np.stack([2-x2, x2, np.zeros_like(x), np.ones_like(x)*alpha], -1), 0, 1)

	def _make_evaluation_figure(img0, img1, kpts0, kpts1, epi_errs, e_t, e_R, alpha='dynamic', path=None):
	conf_thr = 1e-4

	img0 = np.array(img0)
	img1 = np.array(img1)

	kpts0 = kpts0
	kpts1 = kpts1

	epi_errs = epi_errs.cpu().numpy()
	correct_mask = epi_errs < conf_thr
	precision = np.mean(correct_mask) if len(correct_mask) > 0 else 0
	n_correct = np.sum(correct_mask)

	# recall might be larger than 1, since the calculation of conf_matrix_gt
	# uses groundtruth depths and camera poses, but epipolar distance is used here.

	# matching info
	if alpha == 'dynamic':
	alpha = dynamic_alpha(len(correct_mask))
	color = error_colormap(epi_errs, conf_thr, alpha=alpha)

	text = [
	f'#Matches {len(kpts0)}',
	f'Precision({conf_thr:.2e}) ({100 * precision:.1f}%): {n_correct}/{len(kpts0)}',
	f'e_t: {e_t:.2f} \| e_R: {e_R:.2f}',
	]

	# make the figure
	figure = make_matching_figure(img0, img1, kpts0, kpts1,
	color, text=text, path=path)
	return figure

	class MegaDepthPoseMNNBenchmark:
	def __init__(self, data_root="./megadepth_test_1500", scene_names = None) -> None:
	if scene_names is None:
	self.scene_names = [
	"hard_indices.npz",
	]
	# self.scene_names = ["0022_0.5_0.7.npz",]
	else:
	self.scene_names = scene_names
	self.scenes = [
	np.load(f"{data_root}/{scene}", allow_pickle=True)
	for scene in self.scene_names
	]
	self.data_root = data_root

	def benchmark(self, model_helper, model_name = None, scale_intrinsics = False, calibrated = True, plot_every_iter=1, plot=False, method='sparse'):
	with torch.no_grad():
	data_root = self.data_root
	tot_e_t, tot_e_R, tot_e_pose = [], [], []
	thresholds = [5, 10, 20]
	for scene_ind in range(len(self.scenes)):
	scene_name = os.path.splitext(self.scene_names[scene_ind])[0]
	scene = self.scenes[scene_ind]
	indices = scene['indices']
	idx = 0

	for pair in tqdm.tqdm(indices):

	pairs = pair['pair_names']
	K0 = pair['intrisinic'][0].copy().astype(np.float32)
	T0 = pair['pose'][0].copy().astype(np.float32)
	R0, t0 = T0[:3, :3], T0[:3, 3]
	K1 = pair['intrisinic'][1].copy().astype(np.float32)
	T1 = pair['pose'][1].copy().astype(np.float32)
	R1, t1 = T1[:3, :3], T1[:3, 3]
	R, t = compute_relative_pose(R0, t0, R1, t1)
	T0_to_1 = np.concatenate((R,t[:,None]), axis=-1)
	im_A_path = f"{data_root}/images/{pairs[0]}"
	im_B_path = f"{data_root}/images/{pairs[1]}"

	im_A = cv2.imread(im_A_path)
	im_B = cv2.imread(im_B_path)

	if method == 'dense':
	kpts0, kpts1, conf = model_helper.match_dense(im_A, im_B, thr=0.01, resize=1600)
	elif method == 'lightglue':
	kpts0, kpts1, conf = model_helper.match_lg(im_A, im_B, thr=0.01, resize=1600)
	elif method == 'sparse':
	kpts0, kpts1, conf = model_helper.match(im_A, im_B, thr=0.01, resize=1600)
	else:
	raise ValueError(f"Invalid method {method}")

	im_A = Image.open(im_A_path)
	w0, h0 = im_A.size
	im_B = Image.open(im_B_path)
	w1, h1 = im_B.size

	if scale_intrinsics:
	scale0 = 840 / max(w0, h0)
	scale1 = 840 / max(w1, h1)
	w0, h0 = scale0 * w0, scale0 * h0
	w1, h1 = scale1 * w1, scale1 * h1
	K0, K1 = K0.copy(), K1.copy()
	K0[:2] = K0[:2] * scale0
	K1[:2] = K1[:2] * scale1

	threshold = 0.5
	if calibrated:
	norm_threshold = threshold / (np.mean(np.abs(K0[:2, :2])) + np.mean(np.abs(K1[:2, :2])))
	ret = estimate_pose(
	kpts0,
	kpts1,
	K0,
	K1,
	norm_threshold,
	conf=0.99999,
	)
	if ret is not None:
	R_est, t_est, mask = ret
	T0_to_1_est = np.concatenate((R_est, t_est), axis=-1) #
	T0_to_1 = np.concatenate((R, t[:,None]), axis=-1)
	e_t, e_R = compute_pose_error(T0_to_1_est, R, t)

	epi_errs = compute_symmetrical_epipolar_errors(T0_to_1, kpts0, kpts1, K0, K1)
	if scene_ind % plot_every_iter == 0 and plot:

	if not os.path.exists(f'outputs/mega_view/{model_name}_{method}'):
	os.mkdir(f'outputs/mega_view/{model_name}_{method}')
	name = f'outputs/mega_view/{model_name}_{method}/{scene_name}_{idx}.png'
	_make_evaluation_figure(im_A, im_B, kpts0, kpts1, epi_errs, e_t, e_R, path=name)
	e_pose = max(e_t, e_R)

	tot_e_t.append(e_t)
	tot_e_R.append(e_R)
	tot_e_pose.append(e_pose)
	idx += 1

	tot_e_pose = np.array(tot_e_pose)
	auc = pose_auc(tot_e_pose, thresholds)
	acc_5 = (tot_e_pose < 5).mean()
	acc_10 = (tot_e_pose < 10).mean()
	acc_15 = (tot_e_pose < 15).mean()
	acc_20 = (tot_e_pose < 20).mean()
	map_5 = acc_5
	map_10 = np.mean([acc_5, acc_10])
	map_20 = np.mean([acc_5, acc_10, acc_15, acc_20])
	print(f"{model_name} auc: {auc}")
	return {
	"auc_5": auc[0],
	"auc_10": auc[1],
	"auc_20": auc[2],
	"map_5": map_5,
	"map_10": map_10,
	"map_20": map_20,
	}



	def parse_arguments():
	parser = argparse.ArgumentParser(description="Testing script.")

	parser.add_argument("--data_root", type=str, default="./data/megadepth_view", help="Path to the MegaDepth dataset.")

	parser.add_argument("--weights", type=str, default="./weights/RDD-v2.pth", help="Path to the model checkpoint.")

	parser.add_argument("--plot", action="store_true", help="Whether to plot the results.")

	parser.add_argument("--method", type=str, default="sparse", help="Method for matching.")

	return parser.parse_args()

	if __name__ == "__main__":
	args = parse_arguments()
	if not os.path.exists('outputs'):
	os.mkdir('outputs')

	if not os.path.exists(f'outputs/mega_view'):
	os.mkdir(f'outputs/mega_view')
	model = build(weights=args.weights)
	benchmark = MegaDepthPoseMNNBenchmark(data_root=args.data_root)
	model.eval()
	model_helper = RDD_helper(model)
	with torch.no_grad():
	method = args.method
	out = benchmark.benchmark(model_helper, model_name='RDD', plot_every_iter=1, plot=args.plot, method=method)
	with open(f'outputs/mega_view/RDD_{method}.txt', 'w') as f:
	f.write(str(out))