Spaces:

Realcat
/

image-matching-webui

Running

App Files Files Community

Realcat commited on May 8

Commit

13760e8

1 Parent(s): 7fd062e

add: liftfeat

Browse files

Files changed (36) hide show

README.md +1 -0
config/config.yaml +11 -0
imcui/hloc/extract_features.py +11 -0
imcui/hloc/extractors/liftfeat.py +57 -0
imcui/third_party/LiftFeat/.gitignore +4 -0
imcui/third_party/LiftFeat/README.md +141 -0
imcui/third_party/LiftFeat/assert/achitecture.png +3 -0
imcui/third_party/LiftFeat/assert/demo_liftfeat.gif +3 -0
imcui/third_party/LiftFeat/assert/demo_sp.gif +3 -0
imcui/third_party/LiftFeat/assert/query.jpg +3 -0
imcui/third_party/LiftFeat/assert/ref.jpg +3 -0
imcui/third_party/LiftFeat/data/megadepth_1500.json +0 -0
imcui/third_party/LiftFeat/dataset/__init__.py +0 -0
imcui/third_party/LiftFeat/dataset/coco_augmentor.py +298 -0
imcui/third_party/LiftFeat/dataset/coco_wrapper.py +175 -0
imcui/third_party/LiftFeat/dataset/dataset_utils.py +183 -0
imcui/third_party/LiftFeat/dataset/megadepth.py +177 -0
imcui/third_party/LiftFeat/dataset/megadepth_wrapper.py +167 -0
imcui/third_party/LiftFeat/demo.py +68 -0
imcui/third_party/LiftFeat/evaluation/HPatch_evaluation.py +182 -0
imcui/third_party/LiftFeat/evaluation/MegaDepth1500_evaluation.py +105 -0
imcui/third_party/LiftFeat/evaluation/eval_utils.py +127 -0
imcui/third_party/LiftFeat/loss/loss.py +291 -0
imcui/third_party/LiftFeat/models/interpolator.py +34 -0
imcui/third_party/LiftFeat/models/liftfeat.py +190 -0
imcui/third_party/LiftFeat/models/liftfeat_wrapper.py +173 -0
imcui/third_party/LiftFeat/models/model.py +419 -0
imcui/third_party/LiftFeat/requirements.txt +18 -0
imcui/third_party/LiftFeat/train.py +365 -0
imcui/third_party/LiftFeat/train.sh +11 -0
imcui/third_party/LiftFeat/utils/__init__.py +0 -0
imcui/third_party/LiftFeat/utils/alike_wrapper.py +45 -0
imcui/third_party/LiftFeat/utils/config.py +16 -0
imcui/third_party/LiftFeat/utils/depth_anything_wrapper.py +150 -0
imcui/third_party/LiftFeat/utils/featurebooster.py +247 -0
imcui/third_party/LiftFeat/weights/LiftFeat.pth +3 -0

README.md CHANGED Viewed

@@ -45,6 +45,7 @@ The tool currently supports various popular image matching algorithms, namely:
 | Algorithm        | Supported | Conference/Journal | Year | GitHub Link |
 |------------------|-----------|--------------------|------|-------------|
 | DaD            | ✅ | ARXIV   | 2025 | [Link](https://github.com/Parskatt/dad) |
 | MINIMA         | ✅ | ARXIV   | 2024 | [Link](https://github.com/LSXI7/MINIMA) |
 | XoFTR          | ✅ | CVPR    | 2024 | [Link](https://github.com/OnderT/XoFTR) |
 | EfficientLoFTR | ✅ | CVPR    | 2024 | [Link](https://github.com/zju3dv/EfficientLoFTR) |

 | Algorithm        | Supported | Conference/Journal | Year | GitHub Link |
 |------------------|-----------|--------------------|------|-------------|
 | DaD            | ✅ | ARXIV   | 2025 | [Link](https://github.com/Parskatt/dad) |
+| LiftFeat       | ✅ | ICRA    | 2025 | [Link](https://github.com/lyp-deeplearning/LiftFeat) |
 | MINIMA         | ✅ | ARXIV   | 2024 | [Link](https://github.com/LSXI7/MINIMA) |
 | XoFTR          | ✅ | CVPR    | 2024 | [Link](https://github.com/OnderT/XoFTR) |
 | EfficientLoFTR | ✅ | CVPR    | 2024 | [Link](https://github.com/zju3dv/EfficientLoFTR) |

config/config.yaml CHANGED Viewed

@@ -256,6 +256,17 @@ matcher_zoo:
       paper: https://arxiv.org/abs/2404.19174
       project: null
       display: false
   dedode:
     matcher: Dual-Softmax
     feature: dedode

       paper: https://arxiv.org/abs/2404.19174
       project: null
       display: false
+  liftfeat(sparse):
+    matcher: NN-mutual
+    feature: liftfeat
+    dense: false
+    info:
+      name: LiftFeat #dispaly name
+      source: "ICRA 2025"
+      github: https://github.com/lyp-deeplearning/LiftFeat
+      paper: https://arxiv.org/abs/2505.0342
+      project: null
+      display: true
   dedode:
     matcher: Dual-Softmax
     feature: dedode

imcui/hloc/extract_features.py CHANGED Viewed

@@ -214,6 +214,17 @@ confs = {
             "resize_max": 1600,
         },
     },
     "aliked-n16-rot": {
         "output": "feats-aliked-n16-rot",
         "model": {

             "resize_max": 1600,
         },
     },
+    "liftfeat": {
+        "output": "feats-liftfeat-n5000-r1600",
+        "model": {
+            "name": "liftfeat",
+            "max_keypoints": 5000,
+        },
+        "preprocessing": {
+            "grayscale": False,
+            "resize_max": 1600,
+        },
+    },
     "aliked-n16-rot": {
         "output": "feats-aliked-n16-rot",
         "model": {

imcui/hloc/extractors/liftfeat.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import logging
+import sys
+from pathlib import Path
+import torch
+import random
+from ..utils.base_model import BaseModel
+from .. import logger
+fire_path = Path(__file__).parent / "../../third_party/LiftFeat"
+sys.path.append(str(fire_path))
+from models.liftfeat_wrapper import LiftFeat, MODEL_PATH
+def select_idx(N, M):
+    numbers = list(range(1, N + 1))
+    selected = random.sample(numbers, M)
+    return selected
+class Liftfeat(BaseModel):
+    default_conf = {
+        "keypoint_threshold": 0.05,
+        "max_keypoints": 5000,
+    }
+    required_inputs = ["image"]
+    def _init(self, conf):
+        logger.info("Loading LiftFeat model...")
+        self.net = LiftFeat(
+            weight=MODEL_PATH,
+            detect_threshold=self.conf["keypoint_threshold"],
+            top_k=self.conf["max_keypoints"],
+        )
+        logger.info("Loading LiftFeat model done!")
+    def _forward(self, data):
+        image = data["image"].cpu().numpy().squeeze() * 255
+        image = image.transpose(1, 2, 0)
+        pred = self.net.extract(image)
+        keypoints = pred["keypoints"]
+        descriptors = pred["descriptors"]
+        scores = torch.ones_like(pred["keypoints"][:, 0])
+        if self.conf["max_keypoints"] < len(keypoints):
+            idxs = select_idx(len(keypoints), self.conf["max_keypoints"])
+            keypoints = keypoints[idxs, :2]
+            descriptors = descriptors[idxs]
+            scores = scores[idxs]
+        pred = {
+            "keypoints": keypoints[None],
+            "descriptors": descriptors[None].permute(0, 2, 1),
+            "scores": scores[None],
+        }
+        return pred

imcui/third_party/LiftFeat/.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+visualize
+trained_weights
+data/HPatch
+data/megadepth_test_1500

imcui/third_party/LiftFeat/README.md ADDED Viewed

	@@ -0,0 +1,141 @@

+## LiftFeat: 3D Geometry-Aware Local Feature Matching
+<div align="center" style="display: flex; justify-content: center; align-items: center; flex-direction: column;">
+  <div style="display: flex; justify-content: space-around; width: 100%;">
+    <img src='./assert/demo_sp.gif' width="400"/>
+    <img src='./assert/demo_liftfeat.gif' width="400"/>
+  </div>
+  Real-time SuperPoint demonstration (left) compared to LiftFeat (right) on a textureless scene.
+</div>
+- 🎉 **New!** Training code is now available 🚀
+- 🎉 **New!** The test code and pretrained model have been released. 🚀
+## Table of Contents
+- [Introduction](#introduction)
+- [Installation](#installation)
+- [Usage](#usage)
+ - [Inference](#inference)
+ - [Training](#training)
+ - [Evaluation](#evaluation)
+- [Citation](#citation)
+- [License](#license)
+## Introduction
+This repository contains the official implementation of the paper:
+**[LiftFeat: 3D Geometry-Aware Local Feature Matching](https://www.arxiv.org/abs/2505.03422)**, to be presented at *ICRA 2025*.
+**Overview of LiftFeat's achitecture**
+<div style="background-color:white">
+    <img align="center" src="./assert/achitecture.png" width=1000 />
+</div>
+LiftFeat is a lightweight and robust local feature matching network designed to handle challenging scenarios such as drastic lighting changes, low-texture regions, and repetitive patterns. By incorporating 3D geometric cues through surface normals predicted from monocular depth, LiftFeat enhances the discriminative power of 2D descriptors. Our proposed 3D geometry-aware feature lifting module effectively fuses these cues, leading to significant improvements in tasks like relative pose estimation, homography estimation, and visual localization.
+## Installation
+If you use conda as virtual environment,you can create a new env with:
+```bash
+git clone https://github.com/lyp-deeplearning/LiftFeat.git
+cd LiftFeat
+conda create -n LiftFeat python=3.8
+conda activate LiftFeat
+pip install -r requirements.txt
+```
+## Usage
+### Inference
+To run LiftFeat on an image,you can simply run with:
+```bash
+python demo.py --img1=<reference image> --img2=<query image>
+```
+### Training
+To train LiftFeat as described in the paper, you will need MegaDepth & COCO_20k subset of COCO2017 dataset as described in the paper *[XFeat: Accelerated Features for Lightweight Image Matching](https://arxiv.org/abs/2404.19174)*
+You can obtain the full COCO2017 train data at https://cocodataset.org/.
+However, we [make available](https://drive.google.com/file/d/1ijYsPq7dtLQSl-oEsUOGH1fAy21YLc7H/view?usp=drive_link) a subset of COCO for convenience. We simply selected a subset of 20k images according to image resolution. Please check COCO [terms of use](https://cocodataset.org/#termsofuse) before using the data.
+To reproduce the training setup from the paper, please follow the steps:
+1. Download [COCO_20k](https://drive.google.com/file/d/1ijYsPq7dtLQSl-oEsUOGH1fAy21YLc7H/view?usp=drive_link) containing a subset of COCO2017;
+2. Download MegaDepth dataset. You can follow [LoFTR instructions](https://github.com/zju3dv/LoFTR/blob/master/docs/TRAINING.md#download-datasets), we use the same standard as LoFTR. Then put the megadepth indices inside the MegaDepth root folder following the standard below:
+```bash
+{megadepth_root_path}/train_data/megadepth_indices #indices
+{megadepth_root_path}/MegaDepth_v1 #images & depth maps & poses
+```
+3. Finally you can call training
+```bash
+python train.py --megadepth_root_path <path_to>/MegaDepth --synthetic_root_path <path_to>/coco_20k --ckpt_save_path /path/to/ckpts
+```
+### Evaluation
+All evaluation code are in *evaluation*, you can download **HPatch** dataset following [D2-Net](https://github.com/mihaidusmanu/d2-net/tree/master) and download **MegaDepth** test dataset following [LoFTR](https://github.com/zju3dv/LoFTR/tree/master).
+**Download and process HPatch**
+```bash
+cd /data
+# Download the dataset
+wget https://huggingface.co/datasets/vbalnt/hpatches/resolve/main/hpatches-sequences-release.zip
+# Extract the dataset
+unzip hpatches-sequences-release.zip
+# Remove the high-resolution sequences
+cd hpatches-sequences-release
+rm -rf i_contruction i_crownnight i_dc i_pencils i_whitebuilding v_artisans v_astronautis v_talent
+cd <LiftFeat>/data
+ln -s /data/hpatches-sequences-release ./HPatch
+```
+**Download and process MegaDepth1500**
+We provide download link to [megadepth_test_1500](https://drive.google.com/drive/folders/1nTkK1485FuwqA0DbZrK2Cl0WnXadUZdc)
+```bash
+tar xvf <path to megadepth_test_1500.tar>
+cd <LiftFeat>/data
+ln -s <path to megadepth_test_1500> ./megadepth_test_1500
+```
+**Homography Estimation**
+```bash
+python evaluation/HPatch_evaluation.py
+```
+**Relative Pose Estimation**
+For *Megadepth1500* dataset:
+```bash
+python evaluation/MegaDepth1500_evaluation.py
+```
+## Citation
+If you find this code useful for your research, please cite the paper:
+```bibtex
+@misc{liu2025liftfeat3dgeometryawarelocal,
+      title={LiftFeat: 3D Geometry-Aware Local Feature Matching},
+      author={Yepeng Liu and Wenpeng Lai and Zhou Zhao and Yuxuan Xiong and Jinchi Zhu and Jun Cheng and Yongchao Xu},
+      year={2025},
+      eprint={2505.03422},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2505.03422},
+}
+```
+## License
+[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](LICENSE)
+## Acknowledgements
+We would like to thank the authors of the following open-source repositories for their valuable contributions, which have inspired or supported this work:
+- [verlab/accelerated_features](https://github.com/verlab/accelerated_features)
+- [zju3dv/LoFTR](https://github.com/zju3dv/LoFTR)
+- [rpautrat/SuperPoint](https://github.com/rpautrat/SuperPoint)
+We deeply appreciate the efforts of the research community in releasing high-quality codebases.

imcui/third_party/LiftFeat/assert/achitecture.png ADDED Viewed

Git LFS Details

SHA256: a00df3202b47a4dfb12a8b57e40ac36cea77fe4c1fe671c12ba7d785db85da1b
Pointer size: 131 Bytes
Size of remote file: 529 kB

imcui/third_party/LiftFeat/assert/demo_liftfeat.gif ADDED Viewed

Git LFS Details

SHA256: b2370fceb92f3f4cc8cd1def7af870469c7a0345c6e5502d618f80b6aa7322d8
Pointer size: 132 Bytes
Size of remote file: 4.11 MB

imcui/third_party/LiftFeat/assert/demo_sp.gif ADDED Viewed

Git LFS Details

SHA256: d5f6613eb69830ed1a6c4a09d1bf548f8acdb3d500c743bb73ba939347017892
Pointer size: 132 Bytes
Size of remote file: 4.7 MB

imcui/third_party/LiftFeat/assert/query.jpg ADDED Viewed

Git LFS Details

SHA256: d49dd4628d36baaa8f47eedace5c6c45fd67e47bbf5aecf8ff8427fd82e5e463
Pointer size: 132 Bytes
Size of remote file: 1.22 MB

imcui/third_party/LiftFeat/assert/ref.jpg ADDED Viewed

Git LFS Details

SHA256: 140a4bb3b353c215e20d1abfa18e264669df3950e1933df8a26448c0d9900838
Pointer size: 132 Bytes
Size of remote file: 1.09 MB

imcui/third_party/LiftFeat/data/megadepth_1500.json ADDED Viewed

The diff for this file is too large to render. See raw diff

imcui/third_party/LiftFeat/dataset/__init__.py ADDED Viewed

File without changes

imcui/third_party/LiftFeat/dataset/coco_augmentor.py ADDED Viewed

	@@ -0,0 +1,298 @@

+"""
+    "LiftFeat: 3D Geometry-Aware Local Feature Matching"
+    COCO_20k image augmentor
+"""
+import torch
+from torch import nn
+from torch.utils.data import Dataset
+import torch.utils.data as data
+from torchvision import transforms
+import torch.nn.functional as F
+import cv2
+import kornia
+import kornia.augmentation as K
+from kornia.geometry.transform import get_tps_transform as findTPS
+from kornia.geometry.transform import warp_points_tps, warp_image_tps
+import glob
+import random
+import tqdm
+import numpy as np
+import pdb
+import time
+random.seed(0)
+torch.manual_seed(0)
+def generateRandomTPS(shape,grid=(8,6),GLOBAL_MULTIPLIER=0.3,prob=0.5):
+    h, w = shape
+    sh, sw = h/grid[0], w/grid[1]
+    src = torch.dstack(torch.meshgrid(torch.arange(0, h + sh , sh), torch.arange(0, w + sw , sw), indexing='ij'))
+    offsets = torch.rand(grid[0]+1, grid[1]+1, 2) - 0.5
+    offsets *= torch.tensor([ sh/2, sw/2 ]).view(1, 1, 2)  * min(0.97, 2.0 * GLOBAL_MULTIPLIER)
+    dst = src + offsets if np.random.uniform() < prob else src
+    src, dst = src.view(1, -1, 2), dst.view(1, -1, 2)
+    src = (src / torch.tensor([h,w]).view(1,1,2) ) * 2 - 1.
+    dst = (dst / torch.tensor([h,w]).view(1,1,2) ) * 2 - 1.
+    weights, A = findTPS(dst, src)
+    return src, weights, A
+def generateRandomHomography(shape,GLOBAL_MULTIPLIER=0.3):
+    #Generate random in-plane rotation [-theta,+theta]
+    theta = np.radians(np.random.uniform(-30, 30))
+    #Generate random scale in both x and y
+    scale_x, scale_y = np.random.uniform(0.35, 1.2, 2)
+    #Generate random translation shift
+    tx , ty = -shape[1]/2.0 , -shape[0]/2.0
+    txn, tyn = np.random.normal(0, 120.0*GLOBAL_MULTIPLIER, 2)
+    c, s = np.cos(theta), np.sin(theta)
+    #Affine coeffs
+    sx , sy = np.random.normal(0,0.6*GLOBAL_MULTIPLIER,2)
+    #Projective coeffs
+    p1 , p2 = np.random.normal(0,0.006*GLOBAL_MULTIPLIER,2)
+    # Build Homography from parmeterizations
+    H_t = np.array(((1,0, tx), (0, 1, ty), (0,0,1))) #t
+    H_r = np.array(((c,-s, 0), (s, c, 0), (0,0,1))) #rotation,
+    H_a = np.array(((1,sy, 0), (sx, 1, 0), (0,0,1))) # affine
+    H_p = np.array(((1, 0, 0), (0 , 1, 0), (p1,p2,1))) # projective
+    H_s = np.array(((scale_x,0, 0), (0, scale_y, 0), (0,0,1))) #scale
+    H_b = np.array(((1.0,0,-tx +txn), (0, 1, -ty + tyn), (0,0,1))) #t_back,
+    #H = H_e * H_s * H_a * H_p
+    H = np.dot(np.dot(np.dot(np.dot(np.dot(H_b,H_s),H_p),H_a),H_r),H_t)
+    return H
+class COCOAugmentor(nn.Module):
+    def __init__(self,device,load_dataset=True,
+                img_dir="/home/yepeng_liu/code_python/dataset/coco_20k",
+                warp_resolution=(1200, 900),
+                out_resolution=(400, 300),
+                sides_crop=0.2,
+                max_num_imgs=50,
+                num_test_imgs=10,
+                batch_size=1,
+                photometric=True,
+                geometric=True,
+                reload_step=1_000
+                ):
+        super(COCOAugmentor,self).__init__()
+        self.half=16
+        self.device=device
+        self.dims=warp_resolution
+        self.batch_size=batch_size
+        self.out_resolution=out_resolution
+        self.sides_crop=sides_crop
+        self.max_num_imgs=max_num_imgs
+        self.num_test_imgs=num_test_imgs
+        self.dims_t=torch.tensor([int(self.dims[0]*(1. - self.sides_crop)) - int(self.dims[0]*self.sides_crop) -1,
+                                  int(self.dims[1]*(1. - self.sides_crop)) - int(self.dims[1]*self.sides_crop) -1]).float().to(device).view(1,1,2)
+        self.dims_s=torch.tensor([self.dims_t[0,0,0] / out_resolution[0],
+                                  self.dims_t[0,0,1] / out_resolution[1]]).float().to(device).view(1,1,2)
+        self.all_imgs=glob.glob(img_dir+'/*.jpg')+glob.glob(img_dir+'/*.png')
+        self.photometric=photometric
+        self.geometric=geometric
+        self.cnt=1
+        self.reload_step=reload_step
+        list_augmentation=[
+            kornia.augmentation.ColorJitter(0.15,0.15,0.15,0.15,p=1.),
+            kornia.augmentation.RandomEqualize(p=0.4),
+            kornia.augmentation.RandomGaussianBlur(p=0.3,sigma=(2.0,2.0),kernel_size=(7,7))
+        ]
+        if photometric is False:
+            list_augmentation = []
+        self.aug_list=kornia.augmentation.ImageSequential(*list_augmentation)
+        if len(self.all_imgs)<10:
+            raise RuntimeError('Couldnt find enough images to train. Please check the path: ',img_dir)
+        if load_dataset:
+            print('[COCO]: ',len(self.all_imgs),' images for training..')
+            if len(self.all_imgs) - num_test_imgs < max_num_imgs:
+                raise RuntimeError('Error: test set overlaps with training set! Decrease number of test imgs')
+            self.load_imgs()
+            self.TPS = True
+    def load_imgs(self):
+        random.shuffle(self.all_imgs)
+        train = []
+        for p in tqdm.tqdm(self.all_imgs[:self.max_num_imgs],desc='loading train'):
+            im=cv2.imread(p)
+            halfH,halfW=im.shape[0]//2,im.shape[1]//2
+            if halfH>halfW:
+                im=np.rot90(im)
+                halfH,halfW=halfW,halfH
+            if im.shape[0]!=self.dims[1] or im.shape[1]!=self.dims[0]:
+                im = cv2.resize(im, self.dims)
+            train.append(np.copy(im))
+        self.train=train
+        self.test=[
+            cv2.resize(cv2.imread(p),self.dims)
+            for p in tqdm.tqdm(self.all_imgs[-self.num_test_imgs:],desc='loading test')
+        ]
+    def norm_pts_grid(self, x):
+        if len(x.size()) == 2:
+            return (x.view(1,-1,2) * self.dims_s / self.dims_t) * 2. - 1
+        return (x * self.dims_s / self.dims_t) * 2. - 1
+    def denorm_pts_grid(self, x):
+        if len(x.size()) == 2:
+            return ((x.view(1,-1,2) + 1) / 2.) / self.dims_s * self.dims_t
+        return ((x+1) / 2.) / self.dims_s * self.dims_t
+    def rnd_kps(self, shape, n = 256):
+        h, w = shape
+        kps = torch.rand(size = (3,n)).to(self.device)
+        kps[0,:]*=w
+        kps[1,:]*=h
+        kps[2,:] = 1.0
+        return kps
+    def warp_points(self, H, pts):
+      scale = self.dims_s.view(-1,2)
+      offset = torch.tensor([int(self.dims[0]*self.sides_crop), int(self.dims[1]*self.sides_crop)], device = pts.device).float()
+      pts = pts*scale + offset
+      pts = torch.vstack( [pts.t(), torch.ones(1, pts.shape[0], device = pts.device)])
+      warped = torch.matmul(H, pts)
+      warped = warped / warped[2,...]
+      warped = warped.t()[:, :2]
+      return (warped - offset) / scale
+    @torch.inference_mode()
+    def forward(self, x, difficulty = 0.3, TPS = False, prob_deformation = 0.5, test = False):
+        """
+            Perform augmentation to a batch of images.
+            input:
+                x -> torch.Tensor(B, C, H, W): rgb images
+                difficulty -> float: level of difficulty, 0.1 is medium, 0.3 is already pretty hard
+                tps -> bool: Wether to apply non-rigid deformations in images
+                prob_deformation -> float: probability to apply a deformation
+            return:
+                'output'    ->   torch.Tensor(B, C, H, W): rgb images
+                Tuple:
+                    'H'       ->   torch.Tensor(3,3): homography matrix
+                    'mask'  ->     torch.Tensor(B, H, W): mask of valid pixels after warp
+                    (deformation only)
+                    src, weights, A are parameters from a TPS warp (all torch.Tensors)
+        """
+        if self.cnt % self.reload_step == 0:
+            self.load_imgs()
+        if self.geometric is False:
+            difficulty = 0.
+        with torch.no_grad():
+            x = (x/255.).to(self.device)
+            b, c, h, w = x.shape
+            shape = (h, w)
+            ######## Geometric Transformations
+            H = torch.tensor(np.array([generateRandomHomography(shape,difficulty) for b in range(self.batch_size)]),dtype=torch.float32).to(self.device)
+            output = kornia.geometry.transform.warp_perspective(x,H,dsize=shape,padding_mode='zeros')
+            #crop % of image boundaries each side to reduce invalid pixels after warps
+            low_h = int(h * self.sides_crop); low_w = int(w*self.sides_crop)
+            high_h = int(h*(1. - self.sides_crop)); high_w= int(w * (1. - self.sides_crop))
+            output = output[..., low_h:high_h, low_w:high_w]
+            x = x[..., low_h:high_h, low_w:high_w]
+            #apply TPS if desired:
+            if TPS:
+                src, weights, A = None, None, None
+                for b in range(self.batch_size):
+                    b_src, b_weights, b_A = generateRandomTPS(shape, (8,6), difficulty, prob = prob_deformation)
+                    b_src, b_weights, b_A = b_src.to(self.device), b_weights.to(self.device), b_A.to(self.device)
+                    if src is None:
+                        src, weights, A = b_src, b_weights, b_A
+                    else:
+                        src = torch.cat((b_src, src))
+                        weights = torch.cat((b_weights, weights))
+                        A = torch.cat((b_A, A))
+                output = warp_image_tps(output, src, weights, A)
+            output = F.interpolate(output, self.out_resolution[::-1], mode = 'nearest')
+            x = F.interpolate(x, self.out_resolution[::-1], mode = 'nearest')
+            mask = ~torch.all(output == 0, dim=1, keepdim=True)
+            mask = mask.expand(-1,3,-1,-1)
+            # Make-up invalid regions with texture from the batch
+            rv = 1 if not TPS else 2
+            output_shifted = torch.roll(x, rv, 0)
+            output[~mask] = output_shifted[~mask]
+            mask = mask[:, 0, :, :]
+            ######## Photometric Transformations
+            output = self.aug_list(output)
+            b, c, h, w = output.shape
+            #Correlated Gaussian Noise
+            if np.random.uniform() > 0.5 and self.photometric:
+                noise = F.interpolate(torch.randn_like(output)*(10/255), (h//2, w//2))
+                noise = F.interpolate(noise, (h, w), mode = 'bicubic')
+                output = torch.clip( output + noise, 0., 1.)
+            #Random shadows
+            if np.random.uniform() > 0.6 and self.photometric:
+                noise = torch.rand((b, 1, h//64, w//64), device = self.device) * 1.3
+                noise = torch.clip(noise, 0.25, 1.0)
+                noise = F.interpolate(noise, (h, w), mode = 'bicubic')
+                noise = noise.expand(-1, 3, -1, -1)
+                output *= noise
+                output = torch.clip( output, 0., 1.)
+            self.cnt+=1
+        if TPS:
+            return output, (H, src, weights, A, mask)
+        else:
+            return output, (H, mask)
+    def get_correspondences(self, kps_target, T):
+        H, H2, src, W, A = T
+        undeformed  = self.denorm_pts_grid(
+                                        warp_points_tps(self.norm_pts_grid(kps_target),
+                                        src, W, A) ).view(-1,2)
+        warped_to_src = self.warp_points([email protected](H2), undeformed)
+        return warped_to_src

imcui/third_party/LiftFeat/dataset/coco_wrapper.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import torch
+import numpy as np
+import pdb
+debug_cnt = -1
+def make_batch(augmentor, difficulty = 0.3, train = True):
+    Hs = []
+    img_list = augmentor.train if train else augmentor.test
+    dev = augmentor.device
+    batch_images = []
+    with torch.no_grad(): # we dont require grads in the augmentation
+        for b in range(augmentor.batch_size):
+            rdidx = np.random.randint(len(img_list))
+            img = torch.tensor(img_list[rdidx], dtype=torch.float32).permute(2,0,1).to(augmentor.device).unsqueeze(0)
+            batch_images.append(img)
+        batch_images = torch.cat(batch_images)
+        p1, H1 = augmentor(batch_images, difficulty)
+        p2, H2 = augmentor(batch_images, difficulty, TPS = True, prob_deformation = 0.7)
+        # p2, H2 = augmentor(batch_images, difficulty, TPS = False, prob_deformation = 0.7)
+    return p1, p2, H1, H2
+def plot_corrs(p1, p2, src_pts, tgt_pts):
+    import matplotlib.pyplot as plt
+    p1 = p1.cpu()
+    p2 = p2.cpu()
+    src_pts = src_pts.cpu() ; tgt_pts = tgt_pts.cpu()
+    rnd_idx = np.random.randint(len(src_pts), size=200)
+    src_pts = src_pts[rnd_idx, ...]
+    tgt_pts = tgt_pts[rnd_idx, ...]
+    #Plot ground-truth correspondences
+    fig, ax = plt.subplots(1,2,figsize=(18, 12))
+    colors = np.random.uniform(size=(len(tgt_pts),3))
+    #Src image
+    img = p1
+    for i, p in enumerate(src_pts):
+        ax[0].scatter(p[0],p[1],color=colors[i])
+    ax[0].imshow(img.permute(1,2,0).numpy()[...,::-1])
+    #Target img
+    img2 = p2
+    for i, p in enumerate(tgt_pts):
+        ax[1].scatter(p[0],p[1],color=colors[i])
+    ax[1].imshow(img2.permute(1,2,0).numpy()[...,::-1])
+    plt.show()
+def get_corresponding_pts(p1, p2, H, H2, augmentor, h, w, crop = None):
+    '''
+        Get dense corresponding points
+    '''
+    global debug_cnt
+    negatives, positives = [], []
+    with torch.no_grad():
+        #real input res of samples
+        rh, rw = p1.shape[-2:]
+        ratio = torch.tensor([rw/w, rh/h], device = p1.device)
+        (H, mask1) = H
+        (H2, src, W, A, mask2) = H2
+        #Generate meshgrid of target pts
+        x, y = torch.meshgrid(torch.arange(w, device=p1.device), torch.arange(h, device=p1.device), indexing ='xy')
+        mesh = torch.cat([x.unsqueeze(-1), y.unsqueeze(-1)], dim=-1)
+        target_pts = mesh.view(-1, 2) * ratio
+        #Pack all transformations into T
+        for batch_idx in range(len(p1)):
+            with torch.no_grad():
+                T = (H[batch_idx], H2[batch_idx],
+                    src[batch_idx].unsqueeze(0), W[batch_idx].unsqueeze(0), A[batch_idx].unsqueeze(0))
+                #We now warp the target points to src image
+                src_pts = (augmentor.get_correspondences(target_pts, T) ) #target to src
+                tgt_pts = (target_pts)
+                #Check out of bounds points
+                mask_valid = (src_pts[:, 0] >=0) & (src_pts[:, 1] >=0) & \
+                            (src_pts[:, 0] < rw) & (src_pts[:, 1] < rh)
+                negatives.append( tgt_pts[~mask_valid] )
+                tgt_pts = tgt_pts[mask_valid]
+                src_pts = src_pts[mask_valid]
+                #Remove invalid pixels
+                mask_valid =    mask1[batch_idx, src_pts[:,1].long(), src_pts[:,0].long()]  & \
+                                mask2[batch_idx, tgt_pts[:,1].long(), tgt_pts[:,0].long()]
+                tgt_pts = tgt_pts[mask_valid]
+                src_pts = src_pts[mask_valid]
+                # limit nb of matches if desired
+                if crop is not None:
+                    rnd_idx = torch.randperm(len(src_pts), device=src_pts.device)[:crop]
+                    src_pts = src_pts[rnd_idx]
+                    tgt_pts = tgt_pts[rnd_idx]
+                if debug_cnt >=0 and debug_cnt < 4:
+                    plot_corrs(p1[batch_idx], p2[batch_idx], src_pts , tgt_pts )
+                    debug_cnt +=1
+                src_pts = (src_pts / ratio)
+                tgt_pts = (tgt_pts / ratio)
+                #Check out of bounds points
+                padto = 10 if crop is not None else 2
+                mask_valid1 = (src_pts[:, 0] >= (0 + padto)) & (src_pts[:, 1] >= (0 + padto)) & \
+                             (src_pts[:, 0] < (w - padto)) & (src_pts[:, 1] < (h - padto))
+                mask_valid2 = (tgt_pts[:, 0] >= (0 + padto)) & (tgt_pts[:, 1] >= (0 + padto)) & \
+                             (tgt_pts[:, 0] < (w - padto)) & (tgt_pts[:, 1] < (h - padto))
+                mask_valid = mask_valid1 & mask_valid2
+                tgt_pts = tgt_pts[mask_valid]
+                src_pts = src_pts[mask_valid]
+                #Remove repeated correspondences
+                lut_mat = torch.ones((h, w, 4), device = src_pts.device, dtype = src_pts.dtype) * -1
+                # src_pts_np = src_pts.cpu().numpy()
+                # tgt_pts_np = tgt_pts.cpu().numpy()
+                try:
+                    lut_mat[src_pts[:,1].long(), src_pts[:,0].long()] = torch.cat([src_pts, tgt_pts], dim=1)
+                    mask_valid = torch.all(lut_mat >= 0, dim=-1)
+                    points = lut_mat[mask_valid]
+                    positives.append(points)
+                except:
+                    pdb.set_trace()
+                    print('..')
+    return negatives, positives
+def crop_patches(tensor, coords, size = 7):
+    '''
+        Crop [size x size] patches around 2D coordinates from a tensor.
+    '''
+    B, C, H, W = tensor.shape
+    x, y = coords[:, 0], coords[:, 1]
+    y = y.view(-1, 1, 1)
+    x = x.view(-1, 1, 1)
+    halfsize = size // 2
+    # Create meshgrid for indexing
+    x_offset, y_offset = torch.meshgrid(torch.arange(-halfsize, halfsize+1), torch.arange(-halfsize, halfsize+1), indexing='xy')
+    y_offset = y_offset.to(tensor.device)
+    x_offset = x_offset.to(tensor.device)
+    # Compute indices around each coordinate
+    y_indices = (y + y_offset.view(1, size, size)).squeeze(0) + halfsize
+    x_indices = (x + x_offset.view(1, size, size)).squeeze(0) + halfsize
+    # Handle out-of-boundary indices with padding
+    tensor_padded = torch.nn.functional.pad(tensor, (halfsize, halfsize, halfsize, halfsize), mode='constant')
+    # Index tensor to get patches
+    patches = tensor_padded[:, :, y_indices, x_indices] # [B, C, N, H, W]
+    return patches
+def subpix_softmax2d(heatmaps, temp = 0.25):
+    N, H, W = heatmaps.shape
+    heatmaps = torch.softmax(temp * heatmaps.view(-1, H*W), -1).view(-1, H, W)
+    x, y = torch.meshgrid(torch.arange(W, device =  heatmaps.device ), torch.arange(H, device =  heatmaps.device ), indexing = 'xy')
+    x = x - (W//2)
+    y = y - (H//2)
+    #pdb.set_trace()
+    coords_x = (x[None, ...] * heatmaps)
+    coords_y = (y[None, ...] * heatmaps)
+    coords = torch.cat([coords_x[..., None], coords_y[..., None]], -1).view(N, H*W, 2)
+    coords = coords.sum(1)
+    return coords

imcui/third_party/LiftFeat/dataset/dataset_utils.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""
+	"LiftFeat: 3D Geometry-Aware Local Feature Matching"
+    MegaDepth data handling was adapted from
+    LoFTR official code: https://github.com/zju3dv/LoFTR/blob/master/src/datasets/megadepth.py
+"""
+import io
+import cv2
+import numpy as np
+import h5py
+import torch
+from numpy.linalg import inv
+try:
+    # for internel use only
+    from .client import MEGADEPTH_CLIENT, SCANNET_CLIENT
+except Exception:
+    MEGADEPTH_CLIENT = SCANNET_CLIENT = None
+# --- DATA IO ---
+def load_array_from_s3(
+    path, client, cv_type,
+    use_h5py=False,
+):
+    byte_str = client.Get(path)
+    try:
+        if not use_h5py:
+            raw_array = np.fromstring(byte_str, np.uint8)
+            data = cv2.imdecode(raw_array, cv_type)
+        else:
+            f = io.BytesIO(byte_str)
+            data = np.array(h5py.File(f, 'r')['/depth'])
+    except Exception as ex:
+        print(f"==> Data loading failure: {path}")
+        raise ex
+    assert data is not None
+    return data
+def imread_gray(path, augment_fn=None, client=SCANNET_CLIENT):
+    cv_type = cv2.IMREAD_GRAYSCALE if augment_fn is None \
+                else cv2.IMREAD_COLOR
+    if str(path).startswith('s3://'):
+        image = load_array_from_s3(str(path), client, cv_type)
+    else:
+        image = cv2.imread(str(path), 1)
+    if augment_fn is not None:
+        image = cv2.imread(str(path), cv2.IMREAD_COLOR)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image = augment_fn(image)
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+    return image  # (h, w)
+def get_resized_wh(w, h, resize=None):
+    if resize is not None:  # resize the longer edge
+        scale = resize / max(h, w)
+        w_new, h_new = int(round(w*scale)), int(round(h*scale))
+    else:
+        w_new, h_new = w, h
+    return w_new, h_new
+def get_divisible_wh(w, h, df=None):
+    if df is not None:
+        w_new, h_new = map(lambda x: int(x // df * df), [w, h])
+    else:
+        w_new, h_new = w, h
+    return w_new, h_new
+def pad_bottom_right(inp, pad_size, ret_mask=False):
+    assert isinstance(pad_size, int) and pad_size >= max(inp.shape[-2:]), f"{pad_size} < {max(inp.shape[-2:])}"
+    mask = None
+    if inp.ndim == 2:
+        padded = np.zeros((pad_size, pad_size), dtype=inp.dtype)
+        padded[:inp.shape[0], :inp.shape[1]] = inp
+        if ret_mask:
+            mask = np.zeros((pad_size, pad_size), dtype=bool)
+            mask[:inp.shape[0], :inp.shape[1]] = True
+    elif inp.ndim == 3:
+        padded = np.zeros((inp.shape[0], pad_size, pad_size), dtype=inp.dtype)
+        padded[:, :inp.shape[1], :inp.shape[2]] = inp
+        if ret_mask:
+            mask = np.zeros((inp.shape[0], pad_size, pad_size), dtype=bool)
+            mask[:, :inp.shape[1], :inp.shape[2]] = True
+    else:
+        raise NotImplementedError()
+    return padded, mask
+# --- MEGADEPTH ---
+def fix_path_from_d2net(path):
+    if not path:
+        return None
+    path = path.replace('Undistorted_SfM/', '')
+    path = path.replace('images', 'dense0/imgs')
+    path = path.replace('phoenix/S6/zl548/MegaDepth_v1/', '')
+    return path
+def read_megadepth_gray(path, resize=None, df=None, padding=False, augment_fn=None):
+    """
+    Args:
+        resize (int, optional): the longer edge of resized images. None for no resize.
+        padding (bool): If set to 'True', zero-pad resized images to squared size.
+        augment_fn (callable, optional): augments images with pre-defined visual effects
+    Returns:
+        image (torch.tensor): (1, h, w)
+        mask (torch.tensor): (h, w)
+        scale (torch.tensor): [w/w_new, h/h_new]
+    """
+    # read image
+    image = imread_gray(path, augment_fn, client=MEGADEPTH_CLIENT)
+    # resize image
+    w, h = image.shape[1], image.shape[0]
+    if resize is not None:
+        if len(resize) == 2:
+            w_new, h_new = resize
+        else:
+            resize = resize[0]
+            w_new, h_new = get_resized_wh(w, h, resize)
+            w_new, h_new = get_divisible_wh(w_new, h_new, df)
+        image = cv2.resize(image, (w_new, h_new))
+        scale = torch.tensor([w/w_new, h/h_new], dtype=torch.float)
+        if padding:  # padding
+            pad_to = max(h_new, w_new)
+            image, mask = pad_bottom_right(image, pad_to, ret_mask=True)
+        else:
+            mask = None
+    else:
+        scale=torch.tensor([1.0,1.0],dtype=torch.float)
+        if padding:
+            pad_to=max(w,h)
+            image,mask=pad_bottom_right(image,pad_to,ret_mask=True)
+        else:
+            mask=None
+    #image = torch.from_numpy(image).float()[None] / 255  # (h, w) -> (1, h, w) and normalized
+    image_t = torch.from_numpy(image).float().permute(2,0,1) / 255  # (h, w) -> (1, h, w) and normalized
+    mask = torch.from_numpy(mask) if mask is not None else None
+    return image, image_t, mask, scale
+def read_megadepth_depth(path, pad_to=None):
+    if str(path).startswith('s3://'):
+        depth = load_array_from_s3(path, MEGADEPTH_CLIENT, None, use_h5py=True)
+    else:
+        depth = np.array(h5py.File(path, 'r')['depth'])
+    if pad_to is not None:
+        depth, _ = pad_bottom_right(depth, pad_to, ret_mask=False)
+    depth = torch.from_numpy(depth).float()  # (h, w)
+    return depth
+def imread_bgr(path, augment_fn=None, client=SCANNET_CLIENT):
+    cv_type = cv2.IMREAD_GRAYSCALE if augment_fn is None else cv2.IMREAD_COLOR
+    if str(path).startswith('s3://'):
+        image = load_array_from_s3(str(path), client, cv_type)
+    else:
+        image = cv2.imread(str(path), 1)
+    if augment_fn is not None:
+        image = cv2.imread(str(path), cv2.IMREAD_COLOR)
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        image = augment_fn(image)
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+    return image  # (h, w)

imcui/third_party/LiftFeat/dataset/megadepth.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+	"LiftFeat: 3D Geometry-Aware Local Feature Matching"
+    MegaDepth data handling was adapted from
+    LoFTR official code: https://github.com/zju3dv/LoFTR/blob/master/src/datasets/megadepth.py
+"""
+import os.path as osp
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+import glob
+import numpy.random as rnd
+import os
+import sys
+sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
+from dataset.dataset_utils import read_megadepth_gray, read_megadepth_depth, fix_path_from_d2net
+import pdb, tqdm, os
+class MegaDepthDataset(Dataset):
+    def __init__(self,
+                 root_dir,
+                 npz_path,
+                 mode='train',
+                 min_overlap_score = 0.3, #0.3,
+                 max_overlap_score = 1.0, #1,
+                 load_depth = True,
+                 img_resize = (800,608), #or None
+                 df=32,
+                 img_padding=False,
+                 depth_padding=True,
+                 augment_fn=None,
+                 **kwargs):
+        """
+        Manage one scene(npz_path) of MegaDepth dataset.
+        Args:
+            root_dir (str): megadepth root directory that has `phoenix`.
+            npz_path (str): {scene_id}.npz path. This contains image pair information of a scene.
+            mode (str): options are ['train', 'val', 'test']
+            min_overlap_score (float): how much a pair should have in common. In range of [0, 1]. Set to 0 when testing.
+            img_resize (int, optional): the longer edge of resized images. None for no resize. 640 is recommended.
+                                        This is useful during training with batches and testing with memory intensive algorithms.
+            df (int, optional): image size division factor. NOTE: this will change the final image size after img_resize.
+            img_padding (bool): If set to 'True', zero-pad the image to squared size. This is useful during training.
+            depth_padding (bool): If set to 'True', zero-pad depthmap to (2000, 2000). This is useful during training.
+            augment_fn (callable, optional): augments images with pre-defined visual effects.
+        """
+        super().__init__()
+        self.root_dir = root_dir
+        self.mode = mode
+        self.scene_id = npz_path.split('.')[0]
+        self.load_depth = load_depth
+        # prepare scene_info and pair_info
+        if mode == 'test' and min_overlap_score != 0:
+            min_overlap_score = 0
+        self.scene_info = np.load(npz_path, allow_pickle=True)
+        self.pair_infos = self.scene_info['pair_infos'].copy()
+        del self.scene_info['pair_infos']
+        self.pair_infos = [pair_info for pair_info in self.pair_infos if pair_info[1] > min_overlap_score and pair_info[1] < max_overlap_score]
+        # parameters for image resizing, padding and depthmap padding
+        if mode == 'train':
+            assert img_resize is not None #and img_padding and depth_padding
+        self.img_resize = img_resize
+        self.df = df
+        self.img_padding = img_padding
+        self.depth_max_size = 2000 if depth_padding else None  # the upperbound of depthmaps size in megadepth.
+        # for training LoFTR
+        self.augment_fn = augment_fn if mode == 'train' else None
+        self.coarse_scale = getattr(kwargs, 'coarse_scale', 0.125)
+        #pdb.set_trace()
+        for idx in range(len(self.scene_info['image_paths'])):
+            self.scene_info['image_paths'][idx] = fix_path_from_d2net(self.scene_info['image_paths'][idx])
+        for idx in range(len(self.scene_info['depth_paths'])):
+            self.scene_info['depth_paths'][idx] = fix_path_from_d2net(self.scene_info['depth_paths'][idx])
+    def __len__(self):
+        return len(self.pair_infos)
+    def __getitem__(self, idx):
+        (idx0, idx1), overlap_score, central_matches = self.pair_infos[idx % len(self)]
+        # read grayscale image and mask. (1, h, w) and (h, w)
+        img_name0 = osp.join(self.root_dir, self.scene_info['image_paths'][idx0])
+        img_name1 = osp.join(self.root_dir, self.scene_info['image_paths'][idx1])
+        # TODO: Support augmentation & handle seeds for each worker correctly.
+        image0, image0_t, mask0, scale0 = read_megadepth_gray(img_name0, self.img_resize, self.df, self.img_padding, None)
+            # np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
+        image1, image1_t, mask1, scale1 = read_megadepth_gray(img_name1, self.img_resize, self.df, self.img_padding, None)
+            # np.random.choice([self.augment_fn, None], p=[0.5, 0.5]))
+        if self.load_depth:
+            # read depth. shape: (h, w)
+            if self.mode in ['train', 'val']:
+                depth0 = read_megadepth_depth(
+                    osp.join(self.root_dir, self.scene_info['depth_paths'][idx0]), pad_to=self.depth_max_size)
+                depth1 = read_megadepth_depth(
+                    osp.join(self.root_dir, self.scene_info['depth_paths'][idx1]), pad_to=self.depth_max_size)
+            else:
+                depth0 = depth1 = torch.tensor([])
+            # read intrinsics of original size
+            K_0 = torch.tensor(self.scene_info['intrinsics'][idx0].copy(), dtype=torch.float).reshape(3, 3)
+            K_1 = torch.tensor(self.scene_info['intrinsics'][idx1].copy(), dtype=torch.float).reshape(3, 3)
+            # read and compute relative poses
+            T0 = self.scene_info['poses'][idx0]
+            T1 = self.scene_info['poses'][idx1]
+            T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4]  # (4, 4)
+            T_1to0 = T_0to1.inverse()
+            data = {
+                'image0': image0_t,  # (1, h, w)
+                'image0_np': image0,
+                'depth0': depth0,  # (h, w)
+                'image1': image1_t,
+                'image1_np': image1,
+                'depth1': depth1,
+                'T_0to1': T_0to1,  # (4, 4)
+                'T_1to0': T_1to0,
+                'K0': K_0,  # (3, 3)
+                'K1': K_1,
+                'scale0': scale0,  # [scale_w, scale_h]
+                'scale1': scale1,
+                'dataset_name': 'MegaDepth',
+                'scene_id': self.scene_id,
+                'pair_id': idx,
+                'pair_names': (self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]),
+            }
+            # for LoFTR training
+            if mask0 is not None:  # img_padding is True
+                if self.coarse_scale:
+                    [ts_mask_0, ts_mask_1] = F.interpolate(torch.stack([mask0, mask1], dim=0)[None].float(),
+                                                        scale_factor=self.coarse_scale,
+                                                        mode='nearest',
+                                                        recompute_scale_factor=False)[0].bool()
+                data.update({'mask0': ts_mask_0, 'mask1': ts_mask_1})
+        else:
+            # read intrinsics of original size
+            K_0 = torch.tensor(self.scene_info['intrinsics'][idx0].copy(), dtype=torch.float).reshape(3, 3)
+            K_1 = torch.tensor(self.scene_info['intrinsics'][idx1].copy(), dtype=torch.float).reshape(3, 3)
+            # read and compute relative poses
+            T0 = self.scene_info['poses'][idx0]
+            T1 = self.scene_info['poses'][idx1]
+            T_0to1 = torch.tensor(np.matmul(T1, np.linalg.inv(T0)), dtype=torch.float)[:4, :4]  # (4, 4)
+            T_1to0 = T_0to1.inverse()
+            data = {
+                'image0': image0,  # (1, h, w)
+                'image1': image1,
+                'T_0to1': T_0to1,  # (4, 4)
+                'T_1to0': T_1to0,
+                'K0': K_0,  # (3, 3)
+                'K1': K_1,
+                'scale0': scale0,  # [scale_w, scale_h]
+                'scale1': scale1,
+                'dataset_name': 'MegaDepth',
+                'scene_id': self.scene_id,
+                'pair_id': idx,
+                'pair_names': (self.scene_info['image_paths'][idx0], self.scene_info['image_paths'][idx1]),
+            }
+        return data

imcui/third_party/LiftFeat/dataset/megadepth_wrapper.py ADDED Viewed

	@@ -0,0 +1,167 @@

+"""
+	"LiftFeat: 3D Geometry-Aware Local Feature Matching"
+    MegaDepth data handling was adapted from
+    LoFTR official code: https://github.com/zju3dv/LoFTR/blob/master/src/datasets/megadepth.py
+"""
+import torch
+from kornia.utils import create_meshgrid
+import matplotlib.pyplot as plt
+import pdb
+import cv2
+@torch.no_grad()
+def warp_kpts(kpts0, depth0, depth1, T_0to1, K0, K1):
+    """ Warp kpts0 from I0 to I1 with depth, K and Rt
+    Also check covisibility and depth consistency.
+    Depth is consistent if relative error < 0.2 (hard-coded).
+    Args:
+        kpts0 (torch.Tensor): [N, L, 2] - <x, y>,
+        depth0 (torch.Tensor): [N, H, W],
+        depth1 (torch.Tensor): [N, H, W],
+        T_0to1 (torch.Tensor): [N, 3, 4],
+        K0 (torch.Tensor): [N, 3, 3],
+        K1 (torch.Tensor): [N, 3, 3],
+    Returns:
+        calculable_mask (torch.Tensor): [N, L]
+        warped_keypoints0 (torch.Tensor): [N, L, 2] <x0_hat, y1_hat>
+    """
+    kpts0_long = kpts0.round().long().clip(0, 2000-1)
+    depth0[:, 0, :] = 0 ; depth1[:, 0, :] = 0
+    depth0[:, :, 0] = 0 ; depth1[:, :, 0] = 0
+    # Sample depth, get calculable_mask on depth != 0
+    kpts0_depth = torch.stack(
+        [depth0[i, kpts0_long[i, :, 1], kpts0_long[i, :, 0]] for i in range(kpts0.shape[0])], dim=0
+    )  # (N, L)
+    nonzero_mask = kpts0_depth > 0
+    # Draw cross marks on the image for each keypoint
+    # for b in range(len(kpts0)):
+    #     fig, ax = plt.subplots(1,2)
+    #     depth_np = depth0.numpy()[b]
+    #     depth_np_plot = depth_np.copy()
+    #     for x, y in kpts0_long[b, nonzero_mask[b], :].numpy():
+    #         cv2.drawMarker(depth_np_plot, (x, y), (255), cv2.MARKER_CROSS, markerSize=10, thickness=2)
+    #     ax[0].imshow(depth_np)
+    #     ax[1].imshow(depth_np_plot)
+    # Unproject
+    kpts0_h = torch.cat([kpts0, torch.ones_like(kpts0[:, :, [0]])], dim=-1) * kpts0_depth[..., None]  # (N, L, 3)
+    kpts0_cam = K0.inverse() @ kpts0_h.transpose(2, 1)  # (N, 3, L)
+    # Rigid Transform
+    w_kpts0_cam = T_0to1[:, :3, :3] @ kpts0_cam + T_0to1[:, :3, [3]]    # (N, 3, L)
+    w_kpts0_depth_computed = w_kpts0_cam[:, 2, :]
+    # Project
+    w_kpts0_h = (K1 @ w_kpts0_cam).transpose(2, 1)  # (N, L, 3)
+    w_kpts0 = w_kpts0_h[:, :, :2] / (w_kpts0_h[:, :, [2]] + 1e-5)  # (N, L, 2), +1e-4 to avoid zero depth
+    # Covisible Check
+    # h, w = depth1.shape[1:3]
+    # covisible_mask = (w_kpts0[:, :, 0] > 0) * (w_kpts0[:, :, 0] < w-1) * \
+    #     (w_kpts0[:, :, 1] > 0) * (w_kpts0[:, :, 1] < h-1)
+    # w_kpts0_long = w_kpts0.long()
+    # w_kpts0_long[~covisible_mask, :] = 0
+    # w_kpts0_depth = torch.stack(
+    #     [depth1[i, w_kpts0_long[i, :, 1], w_kpts0_long[i, :, 0]] for i in range(w_kpts0_long.shape[0])], dim=0
+    # )  # (N, L)
+    # consistent_mask = ((w_kpts0_depth - w_kpts0_depth_computed) / w_kpts0_depth).abs() < 0.2
+    valid_mask = nonzero_mask #* consistent_mask* covisible_mask
+    return valid_mask, w_kpts0
+@torch.no_grad()
+def spvs_coarse(data, scale = 8):
+    """
+        Supervise corresp with dense depth & camera poses
+    """
+    # 1. misc
+    device = data['image0'].device
+    N, _, H0, W0 = data['image0'].shape
+    _, _, H1, W1 = data['image1'].shape
+    #scale = 8
+    scale0 = scale * data['scale0'][:, None] if 'scale0' in data else scale
+    scale1 = scale * data['scale1'][:, None] if 'scale1' in data else scale
+    h0, w0, h1, w1 = map(lambda x: x // scale, [H0, W0, H1, W1])
+    # 2. warp grids
+    # create kpts in meshgrid and resize them to image resolution
+    grid_pt1_c = create_meshgrid(h1, w1, False, device).reshape(1, h1*w1, 2).repeat(N, 1, 1)    # [N, hw, 2]
+    grid_pt1_i = scale1 * grid_pt1_c
+    # warp kpts bi-directionally and check reproj error
+    nonzero_m1, w_pt1_i  =  warp_kpts(grid_pt1_i, data['depth1'], data['depth0'], data['T_1to0'], data['K1'], data['K0'])
+    nonzero_m2, w_pt1_og =  warp_kpts(   w_pt1_i, data['depth0'], data['depth1'], data['T_0to1'], data['K0'], data['K1'])
+    dist = torch.linalg.norm( grid_pt1_i - w_pt1_og, dim=-1)
+    mask_mutual = (dist < 1.5) & nonzero_m1 & nonzero_m2
+    #_, w_pt1_i = warp_kpts(grid_pt1_i, data['depth1'], data['depth0'], data['T_1to0'], data['K1'], data['K0'])
+    batched_corrs = [ torch.cat([w_pt1_i[i, mask_mutual[i]] / data['scale0'][i],
+                       grid_pt1_i[i, mask_mutual[i]] / data['scale1'][i]],dim=-1) for i in range(len(mask_mutual))]
+    #Remove repeated correspondences - this is important for network convergence
+    corrs = []
+    for pts in batched_corrs:
+        lut_mat12 = torch.ones((h1, w1, 4), device = device, dtype = torch.float32) * -1
+        lut_mat21 = torch.clone(lut_mat12)
+        src_pts = pts[:, :2] / scale
+        tgt_pts = pts[:, 2:] / scale
+        try:
+            lut_mat12[src_pts[:,1].long(), src_pts[:,0].long()] = torch.cat([src_pts, tgt_pts], dim=1)
+            mask_valid12 = torch.all(lut_mat12 >= 0, dim=-1)
+            points = lut_mat12[mask_valid12]
+            #Target-src check
+            src_pts, tgt_pts = points[:, :2], points[:, 2:]
+            lut_mat21[tgt_pts[:,1].long(), tgt_pts[:,0].long()] = torch.cat([src_pts, tgt_pts], dim=1)
+            mask_valid21 = torch.all(lut_mat21 >= 0, dim=-1)
+            points = lut_mat21[mask_valid21]
+            corrs.append(points)
+        except:
+            pdb.set_trace()
+            print('..')
+    #Plot for debug purposes
+    # for i in range(len(corrs)):
+    #     plot_corrs(data['image0'][i], data['image1'][i], corrs[i][:, :2]*8, corrs[i][:, 2:]*8)
+    return corrs
+@torch.no_grad()
+def get_correspondences(pts2, data, idx):
+    device = data['image0'].device
+    N, _, H0, W0 = data['image0'].shape
+    _, _, H1, W1 = data['image1'].shape
+    pts2 = pts2[None, ...]
+    scale0 = data['scale0'][idx, None][None, ...] if 'scale0' in data else 1
+    scale1 = data['scale1'][idx, None][None, ...] if 'scale1' in data else 1
+    pts2 = scale1 * pts2 * 8
+    # warp kpts bi-directionally and check reproj error
+    nonzero_m1, pts1  = warp_kpts(pts2, data['depth1'][idx][None, ...], data['depth0'][idx][None, ...], data['T_1to0'][idx][None, ...],
+                                                                        data['K1'][idx][None, ...], data['K0'][idx][None, ...])
+    corrs = torch.cat([pts1[0, :] / data['scale0'][idx],
+                       pts2[0, :] / data['scale1'][idx]],dim=-1)
+    #plot_corrs(data['image0'][idx], data['image1'][idx], corrs[:, :2], corrs[:, 2:])
+    return corrs

imcui/third_party/LiftFeat/demo.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import os
+import sys
+import torch
+import numpy as np
+import math
+import cv2
+os.environ['CUDA_VISIBLE_DEVICES']='1'
+from models.liftfeat_wrapper import LiftFeat,MODEL_PATH
+import argparse
+parser=argparse.ArgumentParser(description='HPatch dataset evaluation script')
+parser.add_argument('--name',type=str,default='LiftFeat',help='experiment name')
+parser.add_argument('--img1',type=str,default='./assert/ref.jpg',help='reference image path')
+parser.add_argument('--img2',type=str,default='./assert/query.jpg',help='query image path')
+parser.add_argument('--gpu',type=str,default='0',help='GPU ID')
+args=parser.parse_args()
+os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
+def warp_corners_and_draw_matches(ref_points, dst_points, img1, img2):
+    # Calculate the Homography matrix
+    H, mask = cv2.findHomography(ref_points, dst_points, cv2.USAC_MAGSAC, 3.5, maxIters=1_000, confidence=0.999)
+    mask = mask.flatten()
+    # Get corners of the first image (image1)
+    h, w = img1.shape[:2]
+    corners_img1 = np.array([[0, 0], [w-1, 0], [w-1, h-1], [0, h-1]], dtype=np.float32).reshape(-1, 1, 2)
+    # Warp corners to the second image (image2) space
+    warped_corners = cv2.perspectiveTransform(corners_img1, H)
+    # Draw the warped corners in image2
+    img2_with_corners = img2.copy()
+    # Prepare keypoints and matches for drawMatches function
+    keypoints1 = [cv2.KeyPoint(float(p[0]), float(p[1]), 5) for p in ref_points]
+    keypoints2 = [cv2.KeyPoint(float(p[0]), float(p[1]), 5) for p in dst_points]
+    matches = [cv2.DMatch(i,i,0) for i in range(len(mask)) if mask[i]]
+    # Draw inlier matches
+    img_matches = cv2.drawMatches(img1, keypoints1, img2_with_corners, keypoints2, matches, None,
+                                  matchColor=(0, 255, 0), flags=2)
+    return img_matches
+if __name__=="__main__":
+    liftfeat=LiftFeat(weight=MODEL_PATH,detect_threshold=0.05)
+    img1=cv2.imread(args.img1)
+    img2=cv2.imread(args.img2)
+    # import pdb;pdb.set_trace()
+    mkpts1,mkpts2=liftfeat.match_liftfeat(img1,img2)
+    canvas=warp_corners_and_draw_matches(mkpts1,mkpts2,img1,img2)
+    import matplotlib.pyplot as plt
+    plt.figure(figsize=[12,12])
+    plt.imshow(canvas[...,::-1])
+    plt.savefig(os.path.join(os.path.dirname(__file__),'match.jpg'), dpi=300, bbox_inches='tight')
+    plt.show()

imcui/third_party/LiftFeat/evaluation/HPatch_evaluation.py ADDED Viewed

	@@ -0,0 +1,182 @@

+import cv2
+import os
+from tqdm import tqdm
+import torch
+import numpy as np
+import sys
+import poselib
+sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
+import argparse
+import datetime
+parser=argparse.ArgumentParser(description='HPatch dataset evaluation script')
+parser.add_argument('--name',type=str,default='LiftFeat',help='experiment name')
+parser.add_argument('--gpu',type=str,default='0',help='GPU ID')
+args=parser.parse_args()
+os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
+use_cuda = torch.cuda.is_available()
+device = torch.device("cuda" if use_cuda else "cpu")
+top_k = None
+n_i = 52
+n_v = 56
+DATASET_ROOT = os.path.join(os.path.dirname(__file__),'../data/HPatch')
+from evaluation.eval_utils import *
+from models.liftfeat_wrapper import LiftFeat
+poselib_config = {"ransac_th": 3.0, "options": {}}
+class PoseLibHomographyEstimator:
+    def __init__(self, conf):
+        self.conf = conf
+    def estimate(self, mkpts0,mkpts1):
+        M, info = poselib.estimate_homography(
+            mkpts0,
+            mkpts1,
+            {
+                "max_reproj_error": self.conf["ransac_th"],
+                **self.conf["options"],
+            },
+        )
+        success = M is not None
+        if not success:
+            M = np.eye(3,dtype=np.float32)
+            inl = np.zeros(mkpts0.shape[0],dtype=np.bool_)
+        else:
+            inl = info["inliers"]
+        estimation = {
+            "success": success,
+            "M_0to1": M,
+            "inliers": inl,
+        }
+        return estimation
+estimator=PoseLibHomographyEstimator(poselib_config)
+def poselib_homography_estimate(mkpts0,mkpts1):
+    data=estimator.estimate(mkpts0,mkpts1)
+    return data
+def generate_standard_image(img,target_size=(1920,1080)):
+    sh,sw=img.shape[0],img.shape[1]
+    rh,rw=float(target_size[1])/float(sh),float(target_size[0])/float(sw)
+    ratio=min(rh,rw)
+    nh,nw=int(ratio*sh),int(ratio*sw)
+    ph,pw=target_size[1]-nh,target_size[0]-nw
+    nimg=cv2.resize(img,(nw,nh))
+    nimg=cv2.copyMakeBorder(nimg,0,ph,0,pw,cv2.BORDER_CONSTANT,value=(0,0,0))
+    return nimg,ratio,ph,pw
+def benchmark_features(match_fn):
+    lim = [1, 9]
+    rng = np.arange(lim[0], lim[1] + 1)
+    seq_names = sorted(os.listdir(DATASET_ROOT))
+    n_feats = []
+    n_matches = []
+    seq_type = []
+    i_err = {thr: 0 for thr in rng}
+    v_err = {thr: 0 for thr in rng}
+    i_err_homo = {thr: 0 for thr in rng}
+    v_err_homo = {thr: 0 for thr in rng}
+    for seq_idx, seq_name in tqdm(enumerate(seq_names), total=len(seq_names)):
+        # load reference image
+        ref_img = cv2.imread(os.path.join(DATASET_ROOT, seq_name, "1.ppm"))
+        ref_img_shape=ref_img.shape
+        # load query images
+        for im_idx in range(2, 7):
+            # read ground-truth homography
+            homography = np.loadtxt(os.path.join(DATASET_ROOT, seq_name, "H_1_" + str(im_idx)))
+            query_img = cv2.imread(os.path.join(DATASET_ROOT, seq_name, f"{im_idx}.ppm"))
+            mkpts_a,mkpts_b=match_fn(ref_img,query_img)
+            pos_a = mkpts_a
+            pos_a_h = np.concatenate([pos_a, np.ones([pos_a.shape[0], 1])], axis=1)
+            pos_b_proj_h = np.transpose(np.dot(homography, np.transpose(pos_a_h)))
+            pos_b_proj = pos_b_proj_h[:, :2] / pos_b_proj_h[:, 2:]
+            pos_b = mkpts_b
+            dist = np.sqrt(np.sum((pos_b - pos_b_proj) ** 2, axis=1))
+            n_matches.append(pos_a.shape[0])
+            seq_type.append(seq_name[0])
+            if dist.shape[0] == 0:
+                dist = np.array([float("inf")])
+            for thr in rng:
+                if seq_name[0] == "i":
+                    i_err[thr] += np.mean(dist <= thr)
+                else:
+                    v_err[thr] += np.mean(dist <= thr)
+            # estimate homography
+            gt_homo = homography
+            pred_homo, _ = cv2.findHomography(mkpts_a,mkpts_b,cv2.USAC_MAGSAC)
+            if pred_homo is None:
+                homo_dist = np.array([float("inf")])
+            else:
+                corners = np.array(
+                    [
+                        [0, 0],
+                        [ref_img_shape[1] - 1, 0],
+                        [0, ref_img_shape[0] - 1],
+                        [ref_img_shape[1] - 1, ref_img_shape[0] - 1],
+                    ]
+                )
+                real_warped_corners = homo_trans(corners, gt_homo)
+                warped_corners = homo_trans(corners, pred_homo)
+                homo_dist = np.mean(np.linalg.norm(real_warped_corners - warped_corners, axis=1))
+            for thr in rng:
+                if seq_name[0] == "i":
+                    i_err_homo[thr] += np.mean(homo_dist <= thr)
+                else:
+                    v_err_homo[thr] += np.mean(homo_dist <= thr)
+    seq_type = np.array(seq_type)
+    n_feats = np.array(n_feats)
+    n_matches = np.array(n_matches)
+    return i_err, v_err, i_err_homo, v_err_homo, [seq_type, n_feats, n_matches]
+if __name__ == "__main__":
+    errors = {}
+    weights=os.path.join(os.path.dirname(__file__),'../weights/LiftFeat.pth')
+    liftfeat=LiftFeat(weight=weights)
+    errors = benchmark_features(liftfeat.match_liftfeat)
+    i_err, v_err, i_err_hom, v_err_hom, _ = errors
+    cur_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    print(f'\n==={cur_time}==={args.name}===')
+    print(f"MHA@3 MHA@5 MHA@7")
+    for thr in [3, 5, 7]:
+        ill_err_hom = i_err_hom[thr] / (n_i * 5)
+        view_err_hom = v_err_hom[thr] / (n_v * 5)
+        print(f"{ill_err_hom * 100:.2f}%-{view_err_hom * 100:.2f}%")

imcui/third_party/LiftFeat/evaluation/MegaDepth1500_evaluation.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import os
+import sys
+import cv2
+from pathlib import Path
+import numpy as np
+import torch
+import torch.utils.data as data
+import tqdm
+from copy import deepcopy
+from torchvision.transforms import ToTensor
+import torch.nn.functional as F
+import json
+import scipy.io as scio
+import poselib
+import argparse
+import datetime
+parser=argparse.ArgumentParser(description='MegaDepth dataset evaluation script')
+parser.add_argument('--name',type=str,default='LiftFeat',help='experiment name')
+parser.add_argument('--gpu',type=str,default='0',help='GPU ID')
+args=parser.parse_args()
+os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
+sys.path.append(os.path.join(os.path.dirname(__file__),'../'))
+from models.liftfeat_wrapper import LiftFeat
+from evaluation.eval_utils import *
+from torch.utils.data import Dataset,DataLoader
+use_cuda = torch.cuda.is_available()
+device = "cuda" if use_cuda else "cpu"
+DATASET_ROOT = os.path.join(os.path.dirname(__file__),'../data/megadepth_test_1500')
+DATASET_JSON = os.path.join(os.path.dirname(__file__),'../data/megadepth_1500.json')
+class MegaDepth1500(Dataset):
+    """
+        Streamlined MegaDepth-1500 dataloader. The camera poses & metadata are stored in a formatted json for facilitating
+        the download of the dataset and to keep the setup as simple as possible.
+    """
+    def __init__(self, json_file, root_dir):
+        # Load the info & calibration from the JSON
+        with open(json_file, 'r') as f:
+            self.data = json.load(f)
+        self.root_dir = root_dir
+        if not os.path.exists(self.root_dir):
+            raise RuntimeError(
+            f"Dataset {self.root_dir} does not exist! \n \
+              > If you didn't download the dataset, use the downloader tool: python3 -m modules.dataset.download -h")
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        data = deepcopy(self.data[idx])
+        h1, w1 = data['size0_hw']
+        h2, w2 = data['size1_hw']
+        # Here we resize the images to max_dim = 1200, as described in the paper, and adjust the image such that it is divisible by 32
+        # following the protocol of the LoFTR's Dataloader (intrinsics are corrected accordingly).
+        # For adapting this with different resolution, you would need to re-scale intrinsics below.
+        image0 = cv2.resize(cv2.imread(f"{self.root_dir}/{data['pair_names'][0]}"),(w1, h1))
+        image1 = cv2.resize(cv2.imread(f"{self.root_dir}/{data['pair_names'][1]}"),(w2, h2))
+        data['image0'] = torch.tensor(image0.astype(np.float32)/255).permute(2,0,1)
+        data['image1'] = torch.tensor(image1.astype(np.float32)/255).permute(2,0,1)
+        for k,v in data.items():
+            if k not in ('dataset_name', 'scene_id', 'pair_id', 'pair_names', 'size0_hw', 'size1_hw', 'image0', 'image1'):
+                data[k] = torch.tensor(np.array(v, dtype=np.float32))
+        return data
+if __name__ == "__main__":
+    weights=os.path.join(os.path.dirname(__file__),'../weights/LiftFeat.pth')
+    liftfeat=LiftFeat(weight=weights)
+    dataset = MegaDepth1500(json_file = DATASET_JSON, root_dir = DATASET_ROOT)
+    loader = DataLoader(dataset, batch_size=1, shuffle=False)
+    metrics = {}
+    R_errs = []
+    t_errs = []
+    inliers = []
+    results=[]
+    cur_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+    for d in tqdm.tqdm(loader, desc="processing"):
+        error_infos = compute_pose_error(liftfeat.match_liftfeat,d)
+        results.append(error_infos)
+    print(f'\n==={cur_time}==={args.name}===')
+    d_err_auc,errors=compute_maa(results)
+    for s_k,s_v in d_err_auc.items():
+        print(f'{s_k}: {s_v*100}')

imcui/third_party/LiftFeat/evaluation/eval_utils.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import numpy as np
+import torch
+import poselib
+def relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0):
+    # angle error between 2 vectors
+    t_gt = T_0to1[:3, 3]
+    n = np.linalg.norm(t) * np.linalg.norm(t_gt)
+    t_err = np.rad2deg(np.arccos(np.clip(np.dot(t, t_gt) / n, -1.0, 1.0)))
+    t_err = np.minimum(t_err, 180 - t_err)  # handle E ambiguity
+    if np.linalg.norm(t_gt) < ignore_gt_t_thr:  # pure rotation is challenging
+        t_err = 0
+    # angle error between 2 rotation matrices
+    R_gt = T_0to1[:3, :3]
+    cos = (np.trace(np.dot(R.T, R_gt)) - 1) / 2
+    cos = np.clip(cos, -1.0, 1.0)  # handle numercial errors
+    R_err = np.rad2deg(np.abs(np.arccos(cos)))
+    return t_err, R_err
+def intrinsics_to_camera(K):
+    px, py = K[0, 2], K[1, 2]
+    fx, fy = K[0, 0], K[1, 1]
+    return {
+        "model": "PINHOLE",
+        "width": int(2 * px),
+        "height": int(2 * py),
+        "params": [fx, fy, px, py],
+    }
+def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999):
+    M, info = poselib.estimate_relative_pose(
+        kpts0, kpts1,
+        intrinsics_to_camera(K0),
+        intrinsics_to_camera(K1),
+        {"max_epipolar_error": thresh,
+         "success_prob": conf,
+         "min_iterations": 20,
+         "max_iterations": 1_000},
+    )
+    R, t, inl = M.R, M.t, info["inliers"]
+    inl = np.array(inl)
+    ret = (R, t, inl)
+    return ret
+def tensor2bgr(t):
+    return (t.cpu()[0].permute(1,2,0).numpy()*255).astype(np.uint8)
+def compute_pose_error(match_fn,data):
+    result = {}
+    with torch.no_grad():
+        mkpts0,mkpts1=match_fn(tensor2bgr(data["image0"]),tensor2bgr(data["image1"]))
+    mkpts0=mkpts0 * data["scale0"].numpy()
+    mkpts1=mkpts1 * data["scale1"].numpy()
+    K0, K1 = data["K0"][0].numpy(), data["K1"][0].numpy()
+    T_0to1 = data["T_0to1"][0].numpy()
+    T_1to0 = data["T_1to0"][0].numpy()
+    result={}
+    conf = 0.99999
+    ret = estimate_pose(mkpts0,mkpts1,K0,K1,4.0,conf)
+    if ret is not None:
+        R, t, inliers = ret
+        t_err, R_err = relative_pose_error(T_0to1, R, t, ignore_gt_t_thr=0.0)
+        result['R_err'] = R_err
+        result['t_err'] = t_err
+    return result
+def error_auc(errors, thresholds=[5, 10, 20]):
+    """
+    Args:
+        errors (list): [N,]
+        thresholds (list)
+    """
+    errors = [0] + sorted(list(errors))
+    recall = list(np.linspace(0, 1, len(errors)))
+    aucs = []
+    for thr in thresholds:
+        last_index = np.searchsorted(errors, thr)
+        y = recall[:last_index] + [recall[last_index-1]]
+        x = errors[:last_index] + [thr]
+        aucs.append(np.trapz(y, x) / thr)
+    return {f'auc@{t}': auc for t, auc in zip(thresholds, aucs)}
+def compute_maa(pairs, thresholds=[5, 10, 20]):
+    # print("auc / mAcc on %d pairs" % (len(pairs)))
+    errors = []
+    for p in pairs:
+        et = p['t_err']
+        er = p['R_err']
+        errors.append(max(et, er))
+    d_err_auc = error_auc(errors)
+    # for k,v in d_err_auc.items():
+    #     print(k, ': ', '%.1f'%(v*100))
+    errors = np.array(errors)
+    for t in thresholds:
+        acc = (errors <= t).sum() / len(errors)
+        # print("mAcc@%d: %.1f "%(t, acc*100))
+    return d_err_auc,errors
+def homo_trans(coord, H):
+    kpt_num = coord.shape[0]
+    homo_coord = np.concatenate((coord, np.ones((kpt_num, 1))), axis=-1)
+    proj_coord = np.matmul(H, homo_coord.T).T
+    proj_coord = proj_coord / proj_coord[:, 2][..., None]
+    proj_coord = proj_coord[:, 0:2]
+    return proj_coord

imcui/third_party/LiftFeat/loss/loss.py ADDED Viewed

	@@ -0,0 +1,291 @@

+import os
+import sys
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import time
+def dual_softmax_loss(X, Y, temp = 0.2):
+    if X.size() != Y.size() or X.dim() != 2 or Y.dim() != 2:
+        raise RuntimeError('Error: X and Y shapes must match and be 2D matrices')
+    dist_mat = (X @ Y.t()) * temp
+    conf_matrix12 = F.log_softmax(dist_mat, dim=1)
+    conf_matrix21 = F.log_softmax(dist_mat.t(), dim=1)
+    with torch.no_grad():
+        conf12 = torch.exp( conf_matrix12 ).max(dim=-1)[0]
+        conf21 = torch.exp( conf_matrix21 ).max(dim=-1)[0]
+        conf = conf12 * conf21
+    target = torch.arange(len(X), device = X.device)
+    loss = F.nll_loss(conf_matrix12, target) + \
+           F.nll_loss(conf_matrix21, target)
+    return loss, conf
+class LiftFeatLoss(nn.Module):
+    def __init__(self,device,lam_descs=1,lam_fb_descs=1,lam_kpts=1,lam_heatmap=1,lam_normals=1,lam_coordinates=1,lam_fb_coordinates=1,depth_spvs=False):
+        super().__init__()
+        # loss parameters
+        self.lam_descs=lam_descs
+        self.lam_fb_descs=lam_fb_descs
+        self.lam_kpts=lam_kpts
+        self.lam_heatmap=lam_heatmap
+        self.lam_normals=lam_normals
+        self.lam_coordinates=lam_coordinates
+        self.lam_fb_coordinates=lam_fb_coordinates
+        self.depth_spvs=depth_spvs
+        self.running_descs_loss=0
+        self.running_kpts_loss=0
+        self.running_heatmaps_loss=0
+        self.loss_descs=0
+        self.loss_fb_descs=0
+        self.loss_kpts=0
+        self.loss_heatmaps=0
+        self.loss_normals=0
+        self.loss_coordinates=0
+        self.loss_fb_coordinates=0
+        self.acc_coarse=0
+        self.acc_fb_coarse=0
+        self.acc_kpt=0
+        self.acc_coordinates=0
+        self.acc_fb_coordinates=0
+        # device
+        self.dev=device
+    def check_accuracy(self,m1,m2,pts1=None,pts2=None,plot=False):
+        with torch.no_grad():
+            #dist_mat = torch.cdist(X,Y)
+            dist_mat = m1 @ m2.t()
+            nn = torch.argmax(dist_mat, dim=1)
+            #nn = torch.argmin(dist_mat, dim=1)
+            correct = nn == torch.arange(len(m1), device = m1.device)
+            if pts1 is not None and plot:
+                import matplotlib.pyplot as plt
+                canvas = torch.zeros((60, 80),device=m1.device)
+                pts1 = pts1[~correct]
+                canvas[pts1[:,1].long(), pts1[:,0].long()] = 1
+                canvas = canvas.cpu().numpy()
+                plt.imshow(canvas), plt.show()
+            acc = correct.sum().item() / len(m1)
+            return acc
+    def compute_descriptors_loss(self,descs1,descs2,pts):
+        loss=[]
+        acc=0
+        B,_,H,W=descs1.shape
+        conf_list=[]
+        for b in range(B):
+            pts1,pts2=pts[b][:,:2],pts[b][:,2:]
+            m1=descs1[b,:,pts1[:,1].long(),pts1[:,0].long()].permute(1,0)
+            m2=descs2[b,:,pts2[:,1].long(),pts2[:,0].long()].permute(1,0)
+            loss_per,conf_per=dual_softmax_loss(m1,m2)
+            loss.append(loss_per.unsqueeze(0))
+            conf_list.append(conf_per)
+            acc_coarse_per=self.check_accuracy(m1,m2)
+            acc += acc_coarse_per
+        loss=torch.cat(loss,dim=-1).mean()
+        acc /= B
+        return loss,acc,conf_list
+    def alike_distill_loss(self,kpts,alike_kpts):
+        C, H, W = kpts.shape
+        kpts = kpts.permute(1,2,0)
+        # get ALike keypoints
+        with torch.no_grad():
+            labels = torch.ones((H, W), dtype = torch.long, device = kpts.device) * 64 # -> Default is non-keypoint (bin 64)
+            offsets = (((alike_kpts/8) - (alike_kpts/8).long())*8).long()
+            offsets =  offsets[:, 0] + 8*offsets[:, 1]  # Linear IDX
+            labels[(alike_kpts[:,1]/8).long(), (alike_kpts[:,0]/8).long()] = offsets
+        kpts = kpts.view(-1,C)
+        labels = labels.view(-1)
+        mask = labels < 64
+        idxs_pos = mask.nonzero().flatten()
+        idxs_neg = (~mask).nonzero().flatten()
+        perm = torch.randperm(idxs_neg.size(0))[:len(idxs_pos)//32]
+        idxs_neg = idxs_neg[perm]
+        idxs = torch.cat([idxs_pos, idxs_neg])
+        kpts = kpts[idxs]
+        labels = labels[idxs]
+        with torch.no_grad():
+            predicted = kpts.max(dim=-1)[1]
+            acc =  (labels == predicted)
+            acc = acc.sum() / len(acc)
+        kpts = F.log_softmax(kpts,dim=-1)
+        loss = F.nll_loss(kpts, labels, reduction = 'mean')
+        return loss, acc
+    def compute_keypoints_loss(self,kpts1,kpts2,alike_kpts1,alike_kpts2):
+        loss=[]
+        acc=0
+        B,_,H,W=kpts1.shape
+        for b in range(B):
+            loss_per1,acc_per1=self.alike_distill_loss(kpts1[b],alike_kpts1[b])
+            loss_per2,acc_per2=self.alike_distill_loss(kpts2[b],alike_kpts2[b])
+            loss_per=(loss_per1+loss_per2)
+            acc_per=(acc_per1+acc_per2)/2
+            loss.append(loss_per.unsqueeze(0))
+            acc += acc_per
+        loss=torch.cat(loss,dim=-1).mean()
+        acc /= B
+        return loss,acc
+    def compute_heatmaps_loss(self,heatmaps1,heatmaps2,pts,conf_list):
+        loss=[]
+        B,_,H,W=heatmaps1.shape
+        for b in range(B):
+            pts1,pts2=pts[b][:,:2],pts[b][:,2:]
+            h1=heatmaps1[b,0,pts1[:,1].long(),pts1[:,0].long()]
+            h2=heatmaps2[b,0,pts2[:,1].long(),pts2[:,0].long()]
+            conf=conf_list[b]
+            loss_per1=F.l1_loss(h1,conf)
+            loss_per2=F.l1_loss(h2,conf)
+            loss_per=(loss_per1+loss_per2)
+            loss.append(loss_per.unsqueeze(0))
+        loss=torch.cat(loss,dim=-1).mean()
+        return loss
+    def normal_loss(self,normal,target_normal):
+        # import pdb;pdb.set_trace()
+        normal = normal.permute(1, 2, 0)
+        target_normal = target_normal.permute(1,2,0)
+        # loss = F.l1_loss(d_feat, depth_anything_normal_feat)
+        dot = torch.cosine_similarity(normal, target_normal, dim=2)
+        valid_mask = target_normal[:, :, 0].float() \
+                    * (dot.detach() < 0.999).float() \
+                    * (dot.detach() > -0.999).float()
+        valid_mask = valid_mask > 0.0
+        al = torch.acos(dot[valid_mask])
+        loss = torch.mean(al)
+        return loss
+    def compute_normals_loss(self,normals1,normals2,DA_normals1,DA_normals2,megadepth_batch_size,coco_batch_size):
+        loss=[]
+        # import pdb;pdb.set_trace()
+        # only MegaDepth image need depth-normal
+        normals1=normals1[coco_batch_size:,...]
+        normals2=normals2[coco_batch_size:,...]
+        for b in range(len(DA_normals1)):
+            normal1,normal2=normals1[b],normals2[b]
+            loss_per1=self.normal_loss(normal1,DA_normals1[b].permute(2,0,1))
+            loss_per2=self.normal_loss(normal2,DA_normals2[b].permute(2,0,1))
+            loss_per=(loss_per1+loss_per2)
+            loss.append(loss_per.unsqueeze(0))
+        loss=torch.cat(loss,dim=-1).mean()
+        return loss
+    def coordinate_loss(self,coordinate,conf,pts1):
+        with torch.no_grad():
+            coordinate_detached = pts1 * 8
+            offset_detached = (coordinate_detached/8) - (coordinate_detached/8).long()
+            offset_detached = (offset_detached * 8).long()
+            label = offset_detached[:, 0] + 8*offset_detached[:, 1]
+        #pdb.set_trace()
+        coordinate_log = F.log_softmax(coordinate, dim=-1)
+        predicted = coordinate.max(dim=-1)[1]
+        acc =  (label == predicted)
+        acc = acc[conf > 0.1]
+        acc = acc.sum() / len(acc)
+        loss = F.nll_loss(coordinate_log, label, reduction = 'none')
+        #Weight loss by confidence, giving more emphasis on reliable matches
+        conf = conf / conf.sum()
+        loss = (loss * conf).sum()
+        return loss*2., acc
+    def compute_coordinates_loss(self,coordinates,pts,conf_list):
+        loss=[]
+        acc=0
+        B,_,H,W=coordinates.shape
+        for b in range(B):
+            pts1,pts2=pts[b][:,:2],pts[b][:,2:]
+            coordinate=coordinates[b,:,pts1[:,1].long(),pts1[:,0].long()].permute(1,0)
+            conf=conf_list[b]
+            loss_per,acc_per=self.coordinate_loss(coordinate,conf,pts1)
+            loss.append(loss_per.unsqueeze(0))
+            acc += acc_per
+        loss=torch.cat(loss,dim=-1).mean()
+        acc /= B
+        return loss,acc
+    def forward(self,
+                descs1,fb_descs1,kpts1,normals1,
+                descs2,fb_descs2,kpts2,normals2,
+                pts,coordinates,fb_coordinates,
+                alike_kpts1,alike_kpts2,
+                DA_normals1,DA_normals2,
+                megadepth_batch_size,coco_batch_size
+                ):
+        # import pdb;pdb.set_trace()
+        self.loss_descs,self.acc_coarse,conf_list=self.compute_descriptors_loss(descs1,descs2,pts)
+        self.loss_fb_descs,self.acc_fb_coarse,fb_conf_list=self.compute_descriptors_loss(fb_descs1,fb_descs2,pts)
+        # start=time.perf_counter()
+        self.loss_kpts,self.acc_kpt=self.compute_keypoints_loss(kpts1,kpts2,alike_kpts1,alike_kpts2)
+        # end=time.perf_counter()
+        # print(f"kpts loss cost {end-start} seconds")
+        # start=time.perf_counter()
+        self.loss_normals=self.compute_normals_loss(normals1,normals2,DA_normals1,DA_normals2,megadepth_batch_size,coco_batch_size)
+        # end=time.perf_counter()
+        # print(f"normal loss cost {end-start} seconds")
+        self.loss_coordinates,self.acc_coordinates=self.compute_coordinates_loss(coordinates,pts,conf_list)
+        self.loss_fb_coordinates,self.acc_fb_coordinates=self.compute_coordinates_loss(fb_coordinates,pts,fb_conf_list)
+        return {
+            'loss_descs':self.lam_descs*self.loss_descs,
+            'acc_coarse':self.acc_coarse,
+            'loss_coordinates':self.lam_coordinates*self.loss_coordinates,
+            'acc_coordinates':self.acc_coordinates,
+            'loss_fb_descs':self.lam_fb_descs*self.loss_fb_descs,
+            'acc_fb_coarse':self.acc_fb_coarse,
+            'loss_fb_coordinates':self.lam_fb_coordinates*self.loss_fb_coordinates,
+            'acc_fb_coordinates':self.acc_fb_coordinates,
+            'loss_kpts':self.lam_kpts*self.loss_kpts,
+            'acc_kpt':self.acc_kpt,
+            'loss_normals':self.lam_normals*self.loss_normals,
+        }

imcui/third_party/LiftFeat/models/interpolator.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""
+	"LiftFeat: 3D Geometry-Aware Local Feature Matching"
+    This script is used to interpolate rough descriptors from LiftFeat
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class InterpolateSparse2d(nn.Module):
+    """ Efficiently interpolate tensor at given sparse 2D positions. """
+    def __init__(self, mode = 'bicubic', align_corners = False):
+        super().__init__()
+        self.mode = mode
+        self.align_corners = align_corners
+    def normgrid(self, x, H, W):
+        """ Normalize coords to [-1,1]. """
+        return 2. * (x/(torch.tensor([W-1, H-1], device = x.device, dtype = x.dtype))) - 1.
+    def forward(self, x, pos, H, W):
+        """
+        Input
+            x: [B, C, H, W] feature tensor
+            pos: [B, N, 2] tensor of positions
+            H, W: int, original resolution of input 2d positions -- used in normalization [-1,1]
+        Returns
+            [B, N, C] sampled channels at 2d positions
+        """
+        grid = self.normgrid(pos, H, W).unsqueeze(-2).to(x.dtype)
+        x = F.grid_sample(x, grid, mode = self.mode , align_corners = False)
+        return x.permute(0,2,3,1).squeeze(-2)

imcui/third_party/LiftFeat/models/liftfeat.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os
+import sys
+import torch
+import torch.nn as nn
+import numpy as np
+import math
+import cv2
+os.environ['CUDA_VISIBLE_DEVICES']='1'
+import kornia as K
+sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
+from models.model import LiftFeatSPModel
+from models.interpolator import InterpolateSparse2d
+from utils.config import featureboost_config
+class NonMaxSuppression(torch.nn.Module):
+    def __init__(self, rep_thr=0.1, top_k=4096):
+        super(NonMaxSuppression,self).__init__()
+        self.max_filter = torch.nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
+        self.rep_thr = rep_thr
+        self.top_k=top_k
+    def NMS(self, x, threshold = 0.05, kernel_size = 5):
+        B, _, H, W = x.shape
+        pad=kernel_size//2
+        local_max = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=pad)(x)
+        pos = (x == local_max) & (x > threshold)
+        pos_batched = [k.nonzero()[..., 1:].flip(-1) for k in pos]
+        pad_val = max([len(x) for x in pos_batched])
+        pos = torch.zeros((B, pad_val, 2), dtype=torch.long, device=x.device)
+        #Pad kpts and build (B, N, 2) tensor
+        for b in range(len(pos_batched)):
+            pos[b, :len(pos_batched[b]), :] = pos_batched[b]
+        return pos
+    def forward(self, score):
+        pos = self.NMS(score,self.rep_thr)
+        return pos
+def load_model(model, weight_path):
+    pretrained_weights = torch.load(weight_path)
+    model_keys = set(model.state_dict().keys())
+    pretrained_keys = set(pretrained_weights.keys())
+    missing_keys = model_keys - pretrained_keys
+    unexpected_keys = pretrained_keys - model_keys
+    if missing_keys:
+        print("Missing keys in pretrained weights:", missing_keys)
+    else:
+        print("No missing keys in pretrained weights.")
+    if unexpected_keys:
+        print("Unexpected keys in pretrained weights:", unexpected_keys)
+    else:
+        print("No unexpected keys in pretrained weights.")
+    if not missing_keys and not unexpected_keys:
+        model.load_state_dict(pretrained_weights)
+        print("Pretrained weights loaded successfully.")
+    else:
+        model.load_state_dict(pretrained_weights, strict=False)
+        print("There were issues with the keys.")
+    return model
+def load_torch_image(fname, device=torch.device('cpu')):
+    img = K.image_to_tensor(cv2.imread(fname), False).float() / 255.
+    img = K.color.bgr_to_rgb(img.to(device))
+    image=cv2.imread(fname)
+    H,W,C=image.shape[0],image.shape[1],image.shape[2]
+    _H=math.ceil(H/32)*32
+    _W=math.ceil(W/32)*32
+    pad_h=_H-H
+    pad_w=_W-W
+    image=cv2.copyMakeBorder(image,0,pad_h,0,pad_w,cv2.BORDER_CONSTANT,None,(0, 0, 0))
+    pad_info=[0,pad_h,0,pad_w]
+    image = K.image_to_tensor(image, False).float() / 255.
+    image = image.to(device)
+    return image,pad_info
+class LiftFeat(nn.Module):
+    def __init__(self,weight,top_k=4096,detect_threshold=0.1):
+        super().__init__()
+        self.net=LiftFeatSPModel(featureboost_config)
+        self.top_k=top_k
+        self.sampler=InterpolateSparse2d('bicubic')
+        self.net=load_model(self.net,weight)
+        self.detector=NonMaxSuppression(rep_thr=detect_threshold)
+    @torch.inference_mode()
+    def extract(self,image,pad_info):
+        B,_,_H1,_W1=image.shape
+        M1,K1,D1=self.net.forward1(image)
+        refine_M=self.net.forward2(M1,K1,D1)
+        refine_M=refine_M.reshape(M1.shape[0],M1.shape[2],M1.shape[3],-1).permute(0,3,1,2)
+        refine_M=torch.nn.functional.normalize(refine_M,2,dim=1)
+        descs_map=refine_M
+        # descs_map=M1
+        scores=torch.softmax(K1,dim=1)[:,:64]
+        heatmap=scores.permute(0,2,3,1).reshape(scores.shape[0],scores.shape[2],scores.shape[3],8,8)
+        heatmap=heatmap.permute(0,1,3,2,4).reshape(scores.shape[0],1,scores.shape[2]*8,scores.shape[3]*8)
+        pos=self.detector(heatmap)
+        kpts=pos.squeeze(0)
+        mask_w=kpts[...,0]<(_W1-pad_info[-1])
+        kpts=kpts[mask_w]
+        mask_h=kpts[..., 1]<(_H1-pad_info[1])
+        kpts=kpts[mask_h]
+        descs=self.sampler(descs_map,kpts.unsqueeze(0),_H1,_W1)
+        descs=torch.nn.functional.normalize(descs,p=2,dim=1)
+        descs=descs.squeeze(0)
+        return {
+            'descriptors':descs,
+            'keypoints':kpts
+        }
+    def match_liftfeat(self, img1, pad_info1, img2, pad_info2, min_cossim=-1):
+        # import pdb;pdb.set_trace()
+        data1=self.extract(img1, pad_info1)
+        data2=self.extract(img2, pad_info2)
+        kpts1,feats1=data1['keypoints'],data1['descriptors']
+        kpts2,feats2=data2['keypoints'],data2['descriptors']
+        cossim = feats1 @ feats2.t()
+        cossim_t = feats2 @ feats1.t()
+        _, match12 = cossim.max(dim=1)
+        _, match21 = cossim_t.max(dim=1)
+        idx0 = torch.arange(len(match12), device=match12.device)
+        mutual = match21[match12] == idx0
+        if min_cossim > 0:
+            cossim, _ = cossim.max(dim=1)
+            good = cossim > min_cossim
+            idx0 = idx0[mutual & good]
+            idx1 = match12[mutual & good]
+        else:
+            idx0 = idx0[mutual]
+            idx1 = match12[mutual]
+        mkpts1,mkpts2=kpts1[idx0],kpts2[idx1]
+        return mkpts1, mkpts2
+weight=os.path.join(os.path.dirname(__file__),'../weights/LiftFeat.pth')
+liftfeat=LiftFeat(weight)
+save_file=os.path.join(os.path.dirname(__file__),'../weights/LiftFeat.pt')
+liftfeat_script=torch.jit.script(liftfeat)
+liftfeat_script.save(save_file)
+# checkpoint = {
+#     'model_name': 'LiftFeat',
+#     'model_args': {
+#         'top_k': 4096,
+#         'detect_threshold': 0.1
+#     },
+#     'state_dict': liftfeat.state_dict()
+# }
+# torch.save(checkpoint,os.path.join(os.path.dirname(__file__),'../weights/LiftFeat.ckpt'))

imcui/third_party/LiftFeat/models/liftfeat_wrapper.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import os
+import sys
+import torch
+import numpy as np
+import math
+import cv2
+from models.model import LiftFeatSPModel
+from models.interpolator import InterpolateSparse2d
+from utils.config import featureboost_config
+device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+MODEL_PATH=os.path.join(os.path.dirname(__file__),'../weights/LiftFeat.pth')
+class NonMaxSuppression(torch.nn.Module):
+    def __init__(self, rep_thr=0.1, top_k=4096):
+        super(NonMaxSuppression,self).__init__()
+        self.max_filter = torch.nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
+        self.rep_thr = rep_thr
+        self.top_k=top_k
+    def NMS(self, x, threshold = 0.05, kernel_size = 5):
+        B, _, H, W = x.shape
+        pad=kernel_size//2
+        local_max = nn.MaxPool2d(kernel_size=kernel_size, stride=1, padding=pad)(x)
+        pos = (x == local_max) & (x > threshold)
+        pos_batched = [k.nonzero()[..., 1:].flip(-1) for k in pos]
+        pad_val = max([len(x) for x in pos_batched])
+        pos = torch.zeros((B, pad_val, 2), dtype=torch.long, device=x.device)
+        #Pad kpts and build (B, N, 2) tensor
+        for b in range(len(pos_batched)):
+            pos[b, :len(pos_batched[b]), :] = pos_batched[b]
+        return pos
+    def forward(self, score):
+        pos = self.NMS(score,self.rep_thr)
+        return pos
+def load_model(model, weight_path):
+    pretrained_weights = torch.load(weight_path, map_location="cpu")
+    model_keys = set(model.state_dict().keys())
+    pretrained_keys = set(pretrained_weights.keys())
+    missing_keys = model_keys - pretrained_keys
+    unexpected_keys = pretrained_keys - model_keys
+    # if missing_keys:
+    #     print("Missing keys in pretrained weights:", missing_keys)
+    # else:
+    #     print("No missing keys in pretrained weights.")
+    # if unexpected_keys:
+    #     print("Unexpected keys in pretrained weights:", unexpected_keys)
+    # else:
+    #     print("No unexpected keys in pretrained weights.")
+    if not missing_keys and not unexpected_keys:
+        model.load_state_dict(pretrained_weights)
+        print("load weight successfully.")
+    else:
+        model.load_state_dict(pretrained_weights, strict=False)
+        # print("There were issues with the keys.")
+    return model
+import torch.nn as nn
+class LiftFeat(nn.Module):
+    def __init__(self,weight=MODEL_PATH,top_k=4096,detect_threshold=0.1):
+        super().__init__()
+        self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.net=LiftFeatSPModel(featureboost_config).to(self.device).eval()
+        self.top_k=top_k
+        self.sampler=InterpolateSparse2d('bicubic')
+        self.net=load_model(self.net,weight)
+        self.detector=NonMaxSuppression(rep_thr=detect_threshold)
+        self.net=self.net.to(self.device)
+        self.detector=self.detector.to(self.device)
+        self.sampler=self.sampler.to(self.device)
+    def image_preprocess(self,image: np.ndarray):
+        H,W,C=image.shape[0],image.shape[1],image.shape[2]
+        _H=math.ceil(H/32)*32
+        _W=math.ceil(W/32)*32
+        pad_h=_H-H
+        pad_w=_W-W
+        image=cv2.copyMakeBorder(image,0,pad_h,0,pad_w,cv2.BORDER_CONSTANT,None,(0, 0, 0))
+        pad_info=[0,pad_h,0,pad_w]
+        if len(image.shape)==3:
+            image=image[None,...]
+        image=torch.tensor(image).permute(0,3,1,2)/255
+        image=image.to(device)
+        return image, pad_info
+    @torch.inference_mode()
+    def extract(self,image: np.ndarray):
+        image,pad_info=self.image_preprocess(image)
+        B,_,_H1,_W1=image.shape
+        M1,K1,D1=self.net.forward1(image)
+        refine_M=self.net.forward2(M1,K1,D1)
+        refine_M=refine_M.reshape(M1.shape[0],M1.shape[2],M1.shape[3],-1).permute(0,3,1,2)
+        refine_M=torch.nn.functional.normalize(refine_M,2,dim=1)
+        descs_map=refine_M
+        # descs_map=M1
+        scores=torch.softmax(K1,dim=1)[:,:64]
+        heatmap=scores.permute(0,2,3,1).reshape(scores.shape[0],scores.shape[2],scores.shape[3],8,8)
+        heatmap=heatmap.permute(0,1,3,2,4).reshape(scores.shape[0],1,scores.shape[2]*8,scores.shape[3]*8)
+        pos=self.detector(heatmap)
+        kpts=pos.squeeze(0)
+        mask_w=kpts[...,0]<(_W1-pad_info[-1])
+        kpts=kpts[mask_w]
+        mask_h=kpts[..., 1]<(_H1-pad_info[1])
+        kpts=kpts[mask_h]
+        descs=self.sampler(descs_map,kpts.unsqueeze(0),_H1,_W1)
+        descs=torch.nn.functional.normalize(descs,p=2,dim=1)
+        descs=descs.squeeze(0)
+        return {
+            'descriptors':descs,
+            'keypoints':kpts
+        }
+    def match_liftfeat(self, img1, img2, min_cossim=-1):
+        # import pdb;pdb.set_trace()
+        data1=self.extract(img1)
+        data2=self.extract(img2)
+        kpts1,feats1=data1['keypoints'],data1['descriptors']
+        kpts2,feats2=data2['keypoints'],data2['descriptors']
+        cossim = feats1 @ feats2.t()
+        cossim_t = feats2 @ feats1.t()
+        _, match12 = cossim.max(dim=1)
+        _, match21 = cossim_t.max(dim=1)
+        idx0 = torch.arange(len(match12), device=match12.device)
+        mutual = match21[match12] == idx0
+        if min_cossim > 0:
+            cossim, _ = cossim.max(dim=1)
+            good = cossim > min_cossim
+            idx0 = idx0[mutual & good]
+            idx1 = match12[mutual & good]
+        else:
+            idx0 = idx0[mutual]
+            idx1 = match12[mutual]
+        mkpts1,mkpts2=kpts1[idx0],kpts2[idx1]
+        mkpts1,mkpts2=mkpts1.cpu().numpy(),mkpts2.cpu().numpy()
+        return mkpts1, mkpts2

imcui/third_party/LiftFeat/models/model.py ADDED Viewed

	@@ -0,0 +1,419 @@

+"""
+	"LiftFeat: 3D Geometry-Aware Local Feature Matching"
+"""
+import numpy as np
+import os
+import torch
+from torch import nn
+import torch.nn.functional as F
+import tqdm
+import math
+import cv2
+import sys
+sys.path.append('/home/yepeng_liu/code_python/laiwenpeng/LiftFeat')
+from utils.featurebooster import FeatureBooster
+from utils.config import featureboost_config
+# from models.model_dfb import LiftFeatModel
+# from models.interpolator import InterpolateSparse2d
+# from third_party.config import featureboost_config
+"""
+foundational functions
+"""
+def simple_nms(scores, radius):
+    """Perform non maximum suppression on the heatmap using max-pooling.
+    This method does not suppress contiguous points that have the same score.
+    Args:
+        scores: the score heatmap of size `(B, H, W)`.
+        radius: an integer scalar, the radius of the NMS window.
+    """
+    def max_pool(x):
+        return torch.nn.functional.max_pool2d(
+            x, kernel_size=radius * 2 + 1, stride=1, padding=radius
+        )
+    zeros = torch.zeros_like(scores)
+    max_mask = scores == max_pool(scores)
+    for _ in range(2):
+        supp_mask = max_pool(max_mask.float()) > 0
+        supp_scores = torch.where(supp_mask, zeros, scores)
+        new_max_mask = supp_scores == max_pool(supp_scores)
+        max_mask = max_mask | (new_max_mask & (~supp_mask))
+    return torch.where(max_mask, scores, zeros)
+def top_k_keypoints(keypoints, scores, k):
+    if k >= len(keypoints):
+        return keypoints, scores
+    scores, indices = torch.topk(scores, k, dim=0, sorted=True)
+    return keypoints[indices], scores
+def sample_k_keypoints(keypoints, scores, k):
+    if k >= len(keypoints):
+        return keypoints, scores
+    indices = torch.multinomial(scores, k, replacement=False)
+    return keypoints[indices], scores[indices]
+def soft_argmax_refinement(keypoints, scores, radius: int):
+    width = 2 * radius + 1
+    sum_ = torch.nn.functional.avg_pool2d(
+        scores[:, None], width, 1, radius, divisor_override=1
+    )
+    ar = torch.arange(-radius, radius + 1).to(scores)
+    kernel_x = ar[None].expand(width, -1)[None, None]
+    dx = torch.nn.functional.conv2d(scores[:, None], kernel_x, padding=radius)
+    dy = torch.nn.functional.conv2d(
+        scores[:, None], kernel_x.transpose(2, 3), padding=radius
+    )
+    dydx = torch.stack([dy[:, 0], dx[:, 0]], -1) / sum_[:, 0, :, :, None]
+    refined_keypoints = []
+    for i, kpts in enumerate(keypoints):
+        delta = dydx[i][tuple(kpts.t())]
+        refined_keypoints.append(kpts.float() + delta)
+    return refined_keypoints
+# Legacy (broken) sampling of the descriptors
+def sample_descriptors(keypoints, descriptors, s):
+    b, c, h, w = descriptors.shape
+    keypoints = keypoints - s / 2 + 0.5
+    keypoints /= torch.tensor(
+        [(w * s - s / 2 - 0.5), (h * s - s / 2 - 0.5)],
+    ).to(
+        keypoints
+    )[None]
+    keypoints = keypoints * 2 - 1  # normalize to (-1, 1)
+    args = {"align_corners": True} if torch.__version__ >= "1.3" else {}
+    descriptors = torch.nn.functional.grid_sample(
+        descriptors, keypoints.view(b, 1, -1, 2), mode="bilinear", **args
+    )
+    descriptors = torch.nn.functional.normalize(
+        descriptors.reshape(b, c, -1), p=2, dim=1
+    )
+    return descriptors
+# The original keypoint sampling is incorrect. We patch it here but
+# keep the original one above for legacy.
+def sample_descriptors_fix_sampling(keypoints, descriptors, s: int = 8):
+    """Interpolate descriptors at keypoint locations"""
+    b, c, h, w = descriptors.shape
+    keypoints = keypoints / (keypoints.new_tensor([w, h]) * s)
+    keypoints = keypoints * 2 - 1  # normalize to (-1, 1)
+    descriptors = torch.nn.functional.grid_sample(
+        descriptors, keypoints.view(b, 1, -1, 2), mode="bilinear", align_corners=False
+    )
+    descriptors = torch.nn.functional.normalize(
+        descriptors.reshape(b, c, -1), p=2, dim=1
+    )
+    return descriptors
+class UpsampleLayer(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        # 定义特征提取层，减少通道数同时增加特征提取能力
+        self.conv = nn.Conv2d(in_channels, in_channels//2, kernel_size=3, stride=1, padding=1)
+        # 使用BN层
+        self.bn = nn.BatchNorm2d(in_channels//2)
+        # 使用LeakyReLU激活函数
+        self.leaky_relu = nn.LeakyReLU(0.1)
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2, mode="bilinear", align_corners=False)
+        x = self.leaky_relu(self.bn(self.conv(x)))
+        return x
+class KeypointHead(nn.Module):
+    def __init__(self,in_channels,out_channels):
+        super().__init__()
+        self.layer1=BaseLayer(in_channels,32)
+        self.layer2=BaseLayer(32,32)
+        self.layer3=BaseLayer(32,64)
+        self.layer4=BaseLayer(64,64)
+        self.layer5=BaseLayer(64,128)
+        self.conv=nn.Conv2d(128,out_channels,kernel_size=3,stride=1,padding=1)
+        self.bn=nn.BatchNorm2d(65)
+    def forward(self,x):
+        x=self.layer1(x)
+        x=self.layer2(x)
+        x=self.layer3(x)
+        x=self.layer4(x)
+        x=self.layer5(x)
+        x=self.bn(self.conv(x))
+        return x
+class DescriptorHead(nn.Module):
+    def __init__(self,in_channels,out_channels):
+        super().__init__()
+        self.layer=nn.Sequential(
+            BaseLayer(in_channels,32),
+            BaseLayer(32,32,activation=False),
+            BaseLayer(32,64,activation=False),
+            BaseLayer(64,out_channels,activation=False)
+        )
+    def forward(self,x):
+        x=self.layer(x)
+        # x=nn.functional.softmax(x,dim=1)
+        return x
+class HeatmapHead(nn.Module):
+    def __init__(self,in_channels,mid_channels,out_channels):
+        super().__init__()
+        self.convHa = nn.Conv2d(in_channels, mid_channels, kernel_size=3, stride=1, padding=1)
+        self.bnHa = nn.BatchNorm2d(mid_channels)
+        self.convHb = nn.Conv2d(mid_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.bnHb = nn.BatchNorm2d(out_channels)
+        self.leaky_relu = nn.LeakyReLU(0.1)
+    def forward(self,x):
+        x = self.leaky_relu(self.bnHa(self.convHa(x)))
+        x = self.leaky_relu(self.bnHb(self.convHb(x)))
+        x = torch.sigmoid(x)
+        return x
+class DepthHead(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.upsampleDa = UpsampleLayer(in_channels)
+        self.upsampleDb = UpsampleLayer(in_channels//2)
+        self.upsampleDc = UpsampleLayer(in_channels//4)
+        self.convDepa = nn.Conv2d(in_channels//2+in_channels, in_channels//2, kernel_size=3, stride=1, padding=1)
+        self.bnDepa = nn.BatchNorm2d(in_channels//2)
+        self.convDepb = nn.Conv2d(in_channels//4+in_channels//2, in_channels//4, kernel_size=3, stride=1, padding=1)
+        self.bnDepb = nn.BatchNorm2d(in_channels//4)
+        self.convDepc = nn.Conv2d(in_channels//8+in_channels//4, 3, kernel_size=3, stride=1, padding=1)
+        self.bnDepc = nn.BatchNorm2d(3)
+        self.leaky_relu = nn.LeakyReLU(0.1)
+    def forward(self, x):
+        x0 = F.interpolate(x, scale_factor=2,mode='bilinear',align_corners=False)
+        x1 = self.upsampleDa(x)
+        x1 = torch.cat([x0,x1],dim=1)
+        x1 = self.leaky_relu(self.bnDepa(self.convDepa(x1)))
+        x1_0 = F.interpolate(x1,scale_factor=2,mode='bilinear',align_corners=False)
+        x2 = self.upsampleDb(x1)
+        x2 = torch.cat([x1_0,x2],dim=1)
+        x2 = self.leaky_relu(self.bnDepb(self.convDepb(x2)))
+        x2_0 = F.interpolate(x2,scale_factor=2,mode='bilinear',align_corners=False)
+        x3 = self.upsampleDc(x2)
+        x3 = torch.cat([x2_0,x3],dim=1)
+        x = self.leaky_relu(self.bnDepc(self.convDepc(x3)))
+        x = F.normalize(x,p=2,dim=1)
+        return x
+class BaseLayer(nn.Module):
+    def __init__(self,in_channels,out_channels,kernel_size=3,stride=1,padding=1,bias=False,activation=True):
+        super().__init__()
+        if activation:
+            self.layer=nn.Sequential(
+                nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding,bias=bias),
+                nn.BatchNorm2d(out_channels,affine=False),
+                nn.ReLU(inplace=True)
+            )
+        else:
+            self.layer=nn.Sequential(
+                nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding,bias=bias),
+                nn.BatchNorm2d(out_channels,affine=False)
+            )
+    def forward(self,x):
+        return self.layer(x)
+class LiftFeatSPModel(nn.Module):
+    default_conf = {
+        "has_detector": True,
+        "has_descriptor": True,
+        "descriptor_dim": 64,
+        # Inference
+        "sparse_outputs": True,
+        "dense_outputs": False,
+        "nms_radius": 4,
+        "refinement_radius": 0,
+        "detection_threshold": 0.005,
+        "max_num_keypoints": -1,
+        "max_num_keypoints_val": None,
+        "force_num_keypoints": False,
+        "randomize_keypoints_training": False,
+        "remove_borders": 4,
+        "legacy_sampling": True,  # True to use the old broken sampling
+    }
+    def __init__(self, featureboost_config, use_kenc=False, use_normal=True, use_cross=True):
+        super().__init__()
+        self.device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.descriptor_dim = 64
+        self.norm = nn.InstanceNorm2d(1)
+        self.relu = nn.ReLU(inplace=True)
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        c1,c2,c3,c4,c5 = 24,24,64,64,128
+        self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
+        self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
+        self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
+        self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
+        self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
+        self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
+        self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
+        self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
+        self.conv5a = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
+        self.conv5b = nn.Conv2d(c5, c5, kernel_size=3, stride=1, padding=1)
+        self.upsample4 = UpsampleLayer(c4)
+        self.upsample5 = UpsampleLayer(c5)
+        self.conv_fusion45 = nn.Conv2d(c5//2+c4,c4,kernel_size=3,stride=1,padding=1)
+        self.conv_fusion34 = nn.Conv2d(c4//2+c3,c3,kernel_size=3,stride=1,padding=1)
+        # detector
+        self.keypoint_head = KeypointHead(in_channels=c3,out_channels=65)
+        # descriptor
+        self.descriptor_head = DescriptorHead(in_channels=c3,out_channels=self.descriptor_dim)
+        # # heatmap
+        # self.heatmap_head = HeatmapHead(in_channels=c3,mid_channels=c3,out_channels=1)
+        # depth
+        self.depth_head = DepthHead(c3)
+        self.fine_matcher =  nn.Sequential(
+                                            nn.Linear(128, 512),
+                                            nn.BatchNorm1d(512, affine=False),
+                                            nn.ReLU(inplace = True),
+                                            nn.Linear(512, 512),
+                                            nn.BatchNorm1d(512, affine=False),
+                                            nn.ReLU(inplace = True),
+                                            nn.Linear(512, 512),
+                                            nn.BatchNorm1d(512, affine=False),
+                                            nn.ReLU(inplace = True),
+                                            nn.Linear(512, 512),
+                                            nn.BatchNorm1d(512, affine=False),
+                                            nn.ReLU(inplace = True),
+                                            nn.Linear(512, 64),
+                                        )
+        # feature_booster
+        self.feature_boost = FeatureBooster(featureboost_config, use_kenc=use_kenc, use_cross=use_cross, use_normal=use_normal)
+    def feature_extract(self, x):
+        x1 = self.relu(self.conv1a(x))
+        x1 = self.relu(self.conv1b(x1))
+        x1 = self.pool(x1)
+        x2 = self.relu(self.conv2a(x1))
+        x2 = self.relu(self.conv2b(x2))
+        x2 = self.pool(x2)
+        x3 = self.relu(self.conv3a(x2))
+        x3 = self.relu(self.conv3b(x3))
+        x3 = self.pool(x3)
+        x4 = self.relu(self.conv4a(x3))
+        x4 = self.relu(self.conv4b(x4))
+        x4 = self.pool(x4)
+        x5 = self.relu(self.conv5a(x4))
+        x5 = self.relu(self.conv5b(x5))
+        x5 = self.pool(x5)
+        return x3,x4,x5
+    def fuse_multi_features(self,x3,x4,x5):
+        # upsample x5 feature
+        x5 = self.upsample5(x5)
+        x4 = torch.cat([x4,x5],dim=1)
+        x4 = self.conv_fusion45(x4)
+        # upsample x4 feature
+        x4 = self.upsample4(x4)
+        x3 = torch.cat([x3,x4],dim=1)
+        x = self.conv_fusion34(x3)
+        return x
+    def _unfold2d(self, x, ws = 2):
+        """
+            Unfolds tensor in 2D with desired ws (window size) and concat the channels
+        """
+        B, C, H, W = x.shape
+        x = x.unfold(2,  ws , ws).unfold(3, ws,ws).reshape(B, C, H//ws, W//ws, ws**2)
+        return x.permute(0, 1, 4, 2, 3).reshape(B, -1, H//ws, W//ws)
+    def forward1(self, x):
+        """
+            input:
+                x -> torch.Tensor(B, C, H, W) grayscale or rgb images
+            return:
+                feats     ->  torch.Tensor(B, 64, H/8, W/8) dense local features
+                keypoints ->  torch.Tensor(B, 65, H/8, W/8) keypoint logit map
+                heatmap   ->  torch.Tensor(B,  1, H/8, W/8) reliability map
+        """
+        with torch.no_grad():
+            x = x.mean(dim=1, keepdim = True)
+            x = self.norm(x)
+        x3,x4,x5 = self.feature_extract(x)
+        # features fusion
+        x = self.fuse_multi_features(x3,x4,x5)
+        # keypoint
+        keypoint_map = self.keypoint_head(x)
+        # descriptor
+        des_map = self.descriptor_head(x)
+        # # heatmap
+        # heatmap = self.heatmap_head(x)
+        # import pdb;pdb.set_trace()
+        # depth
+        d_feats = self.depth_head(x)
+        return des_map, keypoint_map, d_feats
+        # return des_map, keypoint_map, heatmap, d_feats
+    def forward2(self, descs, kpts, normals):
+        # import pdb;pdb.set_trace()
+        normals_feat=self._unfold2d(normals, ws=8)
+        normals_v=normals_feat.squeeze(0).permute(1,2,0).reshape(-1,normals_feat.shape[1])
+        descs_v=descs.squeeze(0).permute(1,2,0).reshape(-1,descs.shape[1])
+        kpts_v=kpts.squeeze(0).permute(1,2,0).reshape(-1,kpts.shape[1])
+        descs_refine = self.feature_boost(descs_v, kpts_v, normals_v)
+        return descs_refine
+    def forward(self,x):
+        M1,K1,D1=self.forward1(x)
+        descs_refine=self.forward2(M1,K1,D1)
+        return descs_refine,M1,K1,D1
+if __name__ == "__main__":
+    img_path=os.path.join(os.path.dirname(__file__),'../assert/ref.jpg')
+    img=cv2.imread(img_path,cv2.IMREAD_GRAYSCALE)
+    img=cv2.resize(img,(800,608))
+    import pdb;pdb.set_trace()
+    img=torch.from_numpy(img).unsqueeze(0).unsqueeze(0).float()/255.0
+    img=img.cuda() if torch.cuda.is_available() else img
+    liftfeat_sp=LiftFeatSPModel(featureboost_config).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
+    des_map, keypoint_map, d_feats=liftfeat_sp.forward1(img)
+    des_fine=liftfeat_sp.forward2(des_map,keypoint_map,d_feats)
+    print(des_map.shape)

imcui/third_party/LiftFeat/requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+torch==1.13.1
+torchvision==0.14.1
+einops==0.8.0
+kornia==0.7.3
+timm==1.0.15
+albumentations==1.4.12
+imgaug==0.4.0
+opencv-python==4.10.0.84
+matplotlib==3.7.5
+numpy==1.24.4
+scikit-image==0.21.0
+scipy==1.10.1
+pillow==10.3.0
+tensorboard==2.14.0
+tqdm==4.66.4
+omegaconf==2.3.0
+thop==0.1.1.post2209072238
+poselib

imcui/third_party/LiftFeat/train.py ADDED Viewed

	@@ -0,0 +1,365 @@

+"""
+	"LiftFeat: 3D Geometry-Aware Local Feature Matching"
+    training script
+"""
+import argparse
+import os
+import time
+import sys
+sys.path.append(os.path.dirname(__file__))
+def parse_arguments():
+    parser = argparse.ArgumentParser(description="LiftFeat training script.")
+    parser.add_argument('--name',type=str,default='LiftFeat',help='set process name')
+    # MegaDepth dataset setting
+    parser.add_argument('--use_megadepth',action='store_true')
+    parser.add_argument('--megadepth_root_path', type=str,
+                        default='/home/yepeng_liu/code_python/dataset/MegaDepth/phoenix/S6/zl548',
+                        help='Path to the MegaDepth dataset root directory.')
+    parser.add_argument('--megadepth_batch_size', type=int, default=6)
+    # COCO20k dataset setting
+    parser.add_argument('--use_coco',action='store_true')
+    parser.add_argument('--coco_root_path', type=str, default='/home/yepeng_liu/code_python/dataset/coco_20k',
+                        help='Path to the COCO20k dataset root directory.')
+    parser.add_argument('--coco_batch_size',type=int,default=4)
+    parser.add_argument('--ckpt_save_path', type=str, default='/home/yepeng_liu/code_python/LiftFeat/trained_weights/test',
+                        help='Path to save the checkpoints.')
+    parser.add_argument('--n_steps', type=int, default=160_000,
+                        help='Number of training steps. Default is 160000.')
+    parser.add_argument('--lr', type=float, default=3e-4,
+                        help='Learning rate. Default is 0.0003.')
+    parser.add_argument('--gamma_steplr', type=float, default=0.5,
+                        help='Gamma value for StepLR scheduler. Default is 0.5.')
+    parser.add_argument('--training_res', type=lambda s: tuple(map(int, s.split(','))),
+                        default=(800, 608), help='Training resolution as width,height. Default is (800, 608).')
+    parser.add_argument('--device_num', type=str, default='0',
+                        help='Device number to use for training. Default is "0".')
+    parser.add_argument('--dry_run', action='store_true',
+                        help='If set, perform a dry run training with a mini-batch for sanity check.')
+    parser.add_argument('--save_ckpt_every', type=int, default=500,
+                        help='Save checkpoints every N steps. Default is 500.')
+    args = parser.parse_args()
+    os.environ['CUDA_VISIBLE_DEVICES'] = args.device_num
+    return args
+args = parse_arguments()
+import torch
+from torch import nn
+from torch import optim
+import torch.nn.functional as F
+from torch.utils.tensorboard import SummaryWriter
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+import tqdm
+import glob
+from models.model import LiftFeatSPModel
+from loss.loss import LiftFeatLoss
+from utils.config import featureboost_config
+from models.interpolator import InterpolateSparse2d
+from utils.depth_anything_wrapper import DepthAnythingExtractor
+from utils.alike_wrapper import ALikeExtractor
+from dataset import megadepth_wrapper
+from dataset import coco_wrapper
+from dataset.megadepth import MegaDepthDataset
+from dataset.coco_augmentor import COCOAugmentor
+import setproctitle
+class Trainer():
+    def __init__(self, megadepth_root_path,use_megadepth,megadepth_batch_size,
+                       coco_root_path,use_coco,coco_batch_size,
+                       ckpt_save_path,
+                       model_name = 'LiftFeat',
+                       n_steps = 160_000, lr= 3e-4, gamma_steplr=0.5,
+                       training_res = (800, 608), device_num="0", dry_run = False,
+                       save_ckpt_every = 500):
+        print(f'MegeDepth: {use_megadepth}-{megadepth_batch_size}')
+        print(f'COCO20k: {use_coco}-{coco_batch_size}')
+        self.dev = torch.device ('cuda' if torch.cuda.is_available() else 'cpu')
+        # training model
+        self.net = LiftFeatSPModel(featureboost_config, use_kenc=False, use_normal=True, use_cross=True).to(self.dev)
+        self.loss_fn=LiftFeatLoss(self.dev,lam_descs=1,lam_kpts=2,lam_heatmap=1)
+        # depth-anything model
+        self.depth_net=DepthAnythingExtractor('vits',self.dev,256)
+        # alike model
+        self.alike_net=ALikeExtractor('alike-t',self.dev)
+        #Setup optimizer
+        self.steps = n_steps
+        self.opt = optim.Adam(filter(lambda x: x.requires_grad, self.net.parameters()) , lr = lr)
+        self.scheduler = torch.optim.lr_scheduler.StepLR(self.opt, step_size=10_000, gamma=gamma_steplr)
+        ##################### COCO INIT ##########################
+        self.use_coco=use_coco
+        self.coco_batch_size=coco_batch_size
+        if self.use_coco:
+            self.augmentor=COCOAugmentor(
+                img_dir=coco_root_path,
+                device=self.dev,load_dataset=True,
+                batch_size=self.coco_batch_size,
+                out_resolution=training_res,
+                warp_resolution=training_res,
+                sides_crop=0.1,
+                max_num_imgs=3000,
+                num_test_imgs=5,
+                photometric=True,
+                geometric=True,
+                reload_step=4000
+            )
+        ##################### COCO END #######################
+        ##################### MEGADEPTH INIT ##########################
+        self.use_megadepth=use_megadepth
+        self.megadepth_batch_size=megadepth_batch_size
+        if self.use_megadepth:
+            TRAIN_BASE_PATH = f"{megadepth_root_path}/train_data/megadepth_indices"
+            TRAINVAL_DATA_SOURCE = f"{megadepth_root_path}/MegaDepth_v1"
+            TRAIN_NPZ_ROOT = f"{TRAIN_BASE_PATH}/scene_info_0.1_0.7"
+            npz_paths = glob.glob(TRAIN_NPZ_ROOT + '/*.npz')[:]
+            megadepth_dataset = torch.utils.data.ConcatDataset( [MegaDepthDataset(root_dir = TRAINVAL_DATA_SOURCE,
+                            npz_path = path) for path in tqdm.tqdm(npz_paths, desc="[MegaDepth] Loading metadata")] )
+            self.megadepth_dataloader = DataLoader(megadepth_dataset, batch_size=megadepth_batch_size, shuffle=True)
+            self.megadepth_data_iter = iter(self.megadepth_dataloader)
+        ##################### MEGADEPTH INIT END #######################
+        os.makedirs(ckpt_save_path, exist_ok=True)
+        os.makedirs(ckpt_save_path + '/logdir', exist_ok=True)
+        self.dry_run = dry_run
+        self.save_ckpt_every = save_ckpt_every
+        self.ckpt_save_path = ckpt_save_path
+        self.writer = SummaryWriter(ckpt_save_path + f'/logdir/{model_name}_' + time.strftime("%Y_%m_%d-%H_%M_%S"))
+        self.model_name = model_name
+    def generate_train_data(self):
+        imgs1_t,imgs2_t=[],[]
+        imgs1_np,imgs2_np=[],[]
+        # norms0,norms1=[],[]
+        positives_coarse=[]
+        if self.use_coco:
+            coco_imgs1, coco_imgs2, H1, H2 = coco_wrapper.make_batch(self.augmentor, 0.1)
+            h_coarse, w_coarse = coco_imgs1[0].shape[-2] // 8, coco_imgs1[0].shape[-1] // 8
+            _ , positives_coco_coarse = coco_wrapper.get_corresponding_pts(coco_imgs1, coco_imgs2, H1, H2, self.augmentor, h_coarse, w_coarse)
+            coco_imgs1=coco_imgs1.mean(1,keepdim=True);coco_imgs2=coco_imgs2.mean(1,keepdim=True)
+            imgs1_t.append(coco_imgs1);imgs2_t.append(coco_imgs2)
+            positives_coarse += positives_coco_coarse
+        if self.use_megadepth:
+            try:
+                megadepth_data=next(self.megadepth_data_iter)
+            except StopIteration:
+                print('End of MD DATASET')
+                self.megadepth_data_iter=iter(self.megadepth_dataloader)
+                megadepth_data=next(self.megadepth_data_iter)
+            if megadepth_data is not None:
+                for k in megadepth_data.keys():
+                    if isinstance(megadepth_data[k],torch.Tensor):
+                        megadepth_data[k]=megadepth_data[k].to(self.dev)
+                megadepth_imgs1_t,megadepth_imgs2_t=megadepth_data['image0'],megadepth_data['image1']
+                megadepth_imgs1_t=megadepth_imgs1_t.mean(1,keepdim=True);megadepth_imgs2_t=megadepth_imgs2_t.mean(1,keepdim=True)
+                imgs1_t.append(megadepth_imgs1_t);imgs2_t.append(megadepth_imgs2_t)
+                megadepth_imgs1_np,megadepth_imgs2_np=megadepth_data['image0_np'],megadepth_data['image1_np']
+                for np_idx in range(megadepth_imgs1_np.shape[0]):
+                    img1_np,img2_np=megadepth_imgs1_np[np_idx].squeeze(0).cpu().numpy(),megadepth_imgs2_np[np_idx].squeeze(0).cpu().numpy()
+                    imgs1_np.append(img1_np);imgs2_np.append(img2_np)
+                positives_megadepth_coarse=megadepth_wrapper.spvs_coarse(megadepth_data,8)
+                positives_coarse += positives_megadepth_coarse
+        with torch.no_grad():
+            imgs1_t=torch.cat(imgs1_t,dim=0)
+            imgs2_t=torch.cat(imgs2_t,dim=0)
+        return imgs1_t,imgs2_t,imgs1_np,imgs2_np,positives_coarse
+    def train(self):
+        self.net.train()
+        with tqdm.tqdm(total=self.steps) as pbar:
+            for i in range(self.steps):
+                # import pdb;pdb.set_trace()
+                imgs1_t,imgs2_t,imgs1_np,imgs2_np,positives_coarse=self.generate_train_data()
+                #Check if batch is corrupted with too few correspondences
+                is_corrupted = False
+                for p in positives_coarse:
+                    if len(p) < 30:
+                        is_corrupted = True
+                if is_corrupted:
+                    continue
+                # import pdb;pdb.set_trace()
+                #Forward pass
+                # start=time.perf_counter()
+                feats1,kpts1,normals1 = self.net.forward1(imgs1_t)
+                feats2,kpts2,normals2 = self.net.forward1(imgs2_t)
+                coordinates,fb_coordinates=[],[]
+                alike_kpts1,alike_kpts2=[],[]
+                DA_normals1,DA_normals2=[],[]
+                # import pdb;pdb.set_trace()
+                fb_feats1,fb_feats2=[],[]
+                for b in range(feats1.shape[0]):
+                    feat1=feats1[b].permute(1,2,0).reshape(-1,feats1.shape[1])
+                    feat2=feats2[b].permute(1,2,0).reshape(-1,feats2.shape[1])
+                    coordinate=self.net.fine_matcher(torch.cat([feat1,feat2],dim=-1))
+                    coordinates.append(coordinate)
+                    fb_feat1=self.net.forward2(feats1[b].unsqueeze(0),kpts1[b].unsqueeze(0),normals1[b].unsqueeze(0))
+                    fb_feat2=self.net.forward2(feats2[b].unsqueeze(0),kpts2[b].unsqueeze(0),normals2[b].unsqueeze(0))
+                    fb_coordinate=self.net.fine_matcher(torch.cat([fb_feat1,fb_feat2],dim=-1))
+                    fb_coordinates.append(fb_coordinate)
+                    fb_feats1.append(fb_feat1.unsqueeze(0));fb_feats2.append(fb_feat2.unsqueeze(0))
+                    img1,img2=imgs1_t[b],imgs2_t[b]
+                    img1=img1.permute(1,2,0).expand(-1,-1,3).cpu().numpy() * 255
+                    img2=img2.permute(1,2,0).expand(-1,-1,3).cpu().numpy() * 255
+                    alike_kpt1=torch.tensor(self.alike_net.extract_alike_kpts(img1),device=self.dev)
+                    alike_kpt2=torch.tensor(self.alike_net.extract_alike_kpts(img2),device=self.dev)
+                    alike_kpts1.append(alike_kpt1);alike_kpts2.append(alike_kpt2)
+                # import pdb;pdb.set_trace()
+                for b in range(len(imgs1_np)):
+                    megadepth_depth1,megadepth_norm1=self.depth_net.extract(imgs1_np[b])
+                    megadepth_depth2,megadepth_norm2=self.depth_net.extract(imgs2_np[b])
+                    DA_normals1.append(megadepth_norm1);DA_normals2.append(megadepth_norm2)
+                # import pdb;pdb.set_trace()
+                fb_feats1=torch.cat(fb_feats1,dim=0)
+                fb_feats2=torch.cat(fb_feats2,dim=0)
+                fb_feats1=fb_feats1.reshape(feats1.shape[0],feats1.shape[2],feats1.shape[3],-1).permute(0,3,1,2)
+                fb_feats2=fb_feats2.reshape(feats2.shape[0],feats2.shape[2],feats2.shape[3],-1).permute(0,3,1,2)
+                coordinates=torch.cat(coordinates,dim=0)
+                coordinates=coordinates.reshape(feats1.shape[0],feats1.shape[2],feats1.shape[3],-1).permute(0,3,1,2)
+                fb_coordinates=torch.cat(fb_coordinates,dim=0)
+                fb_coordinates=fb_coordinates.reshape(feats1.shape[0],feats1.shape[2],feats1.shape[3],-1).permute(0,3,1,2)
+                # end=time.perf_counter()
+                # print(f"forward1 cost {end-start} seconds")
+                loss_items = []
+                # import pdb;pdb.set_trace()
+                loss_info=self.loss_fn(
+                    feats1,fb_feats1,kpts1,normals1,
+                    feats2,fb_feats2,kpts2,normals2,
+                    positives_coarse,
+                    coordinates,fb_coordinates,
+                    alike_kpts1,alike_kpts2,
+                    DA_normals1,DA_normals2,
+                    self.megadepth_batch_size,self.coco_batch_size)
+                loss_descs,acc_coarse=loss_info['loss_descs'],loss_info['acc_coarse']
+                loss_coordinates,acc_coordinates=loss_info['loss_coordinates'],loss_info['acc_coordinates']
+                loss_fb_descs,acc_fb_coarse=loss_info['loss_fb_descs'],loss_info['acc_fb_coarse']
+                loss_fb_coordinates,acc_fb_coordinates=loss_info['loss_fb_coordinates'],loss_info['acc_fb_coordinates']
+                loss_kpts,acc_kpt=loss_info['loss_kpts'],loss_info['acc_kpt']
+                loss_normals=loss_info['loss_normals']
+                # loss_items.append(loss_descs.unsqueeze(0))
+                # loss_items.append(loss_coordinates.unsqueeze(0))
+                loss_items.append(loss_fb_descs.unsqueeze(0))
+                loss_items.append(loss_fb_coordinates.unsqueeze(0))
+                loss_items.append(loss_kpts.unsqueeze(0))
+                loss_items.append(loss_normals.unsqueeze(0))
+                # nb_coarse = len(m1)
+                # nb_coarse = len(fb_m1)
+                loss = torch.cat(loss_items, -1).mean()
+                # Compute Backward Pass
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(self.net.parameters(), 1.)
+                self.opt.step()
+                self.opt.zero_grad()
+                self.scheduler.step()
+                # import pdb;pdb.set_trace()
+                if (i+1) % self.save_ckpt_every == 0:
+                    print('saving iter ', i+1)
+                    torch.save(self.net.state_dict(), self.ckpt_save_path + f'/{self.model_name}_{i+1}.pth')
+                pbar.set_description(
+'Loss: {:.4f} \
+loss_descs: {:.3f} acc_coarse: {:.3f} \
+loss_coordinates: {:.3f} acc_coordinates: {:.3f} \
+loss_fb_descs: {:.3f} acc_fb_coarse: {:.3f} \
+loss_fb_coordinates: {:.3f} acc_fb_coordinates: {:.3f} \
+loss_kpts: {:.3f} acc_kpts: {:.3f} \
+loss_normals: {:.3f}'.format( \
+loss.item(), \
+loss_descs.item(), acc_coarse, \
+loss_coordinates.item(), acc_coordinates, \
+loss_fb_descs.item(), acc_fb_coarse, \
+loss_fb_coordinates.item(), acc_fb_coordinates, \
+loss_kpts.item(), acc_kpt, \
+loss_normals.item()) )
+                pbar.update(1)
+                # Log metrics
+                self.writer.add_scalar('Loss/total', loss.item(), i)
+                self.writer.add_scalar('Accuracy/acc_coarse', acc_coarse, i)
+                self.writer.add_scalar('Accuracy/acc_coordinates', acc_coordinates, i)
+                self.writer.add_scalar('Accuracy/acc_fb_coarse', acc_fb_coarse, i)
+                self.writer.add_scalar('Accuracy/acc_fb_coordinates', acc_fb_coordinates, i)
+                self.writer.add_scalar('Loss/descs', loss_descs.item(), i)
+                self.writer.add_scalar('Loss/coordinates', loss_coordinates.item(), i)
+                self.writer.add_scalar('Loss/fb_descs', loss_fb_descs.item(), i)
+                self.writer.add_scalar('Loss/fb_coordinates', loss_fb_coordinates.item(), i)
+                self.writer.add_scalar('Loss/kpts', loss_kpts.item(), i)
+                self.writer.add_scalar('Loss/normals', loss_normals.item(), i)
+if __name__ == '__main__':
+    setproctitle.setproctitle(args.name)
+    trainer = Trainer(
+        megadepth_root_path=args.megadepth_root_path,
+        use_megadepth=args.use_megadepth,
+        megadepth_batch_size=args.megadepth_batch_size,
+        coco_root_path=args.coco_root_path,
+        use_coco=args.use_coco,
+        coco_batch_size=args.coco_batch_size,
+        ckpt_save_path=args.ckpt_save_path,
+        n_steps=args.n_steps,
+        lr=args.lr,
+        gamma_steplr=args.gamma_steplr,
+        training_res=args.training_res,
+        device_num=args.device_num,
+        dry_run=args.dry_run,
+        save_ckpt_every=args.save_ckpt_every
+    )
+    #The most fun part
+    trainer.train()

imcui/third_party/LiftFeat/train.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+# default training
+nohup python /home/yepeng_liu/code_python/LiftFeat/train.py \
+--name LiftFeat_test \
+--ckpt_save_path /home/yepeng_liu/code_python/LiftFeat/trained_weights/test \
+--device_num 1 \
+--use_megadepth \
+--megadepth_batch_size 8 \
+--use_coco \
+--coco_batch_size 4 \
+--save_ckpt_every 1000 \
+> /home/yepeng_liu/code_python/LiftFeat/trained_weights/test/training.log 2>&1 &

imcui/third_party/LiftFeat/utils/__init__.py ADDED Viewed

File without changes

imcui/third_party/LiftFeat/utils/alike_wrapper.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""
+	"LiftFeat: 3D Geometry-Aware Local Feature Matching"
+"""
+import sys
+import os
+ALIKE_PATH = '/home/yepeng_liu/code_python/multimodal_remote/ALIKE'
+sys.path.append(ALIKE_PATH)
+import torch
+import torch.nn as nn
+from alike import ALike
+import cv2
+import numpy as np
+import pdb
+dev = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+configs = {
+    'alike-t': {'c1': 8, 'c2': 16, 'c3': 32, 'c4': 64, 'dim': 64, 'single_head': True, 'radius': 2,
+                'model_path': os.path.join(ALIKE_PATH, 'models', 'alike-t.pth')},
+    'alike-s': {'c1': 8, 'c2': 16, 'c3': 48, 'c4': 96, 'dim': 96, 'single_head': True, 'radius': 2,
+                'model_path': os.path.join(ALIKE_PATH, 'models', 'alike-s.pth')},
+    'alike-n': {'c1': 16, 'c2': 32, 'c3': 64, 'c4': 128, 'dim': 128, 'single_head': True, 'radius': 2,
+                'model_path': os.path.join(ALIKE_PATH, 'models', 'alike-n.pth')},
+    'alike-l': {'c1': 32, 'c2': 64, 'c3': 128, 'c4': 128, 'dim': 128, 'single_head': False, 'radius': 2,
+                'model_path': os.path.join(ALIKE_PATH, 'models', 'alike-l.pth')},
+}
+class ALikeExtractor(nn.Module):
+    def __init__(self,model_type,device) -> None:
+        super().__init__()
+        self.net=ALike(**configs[model_type],device=device,top_k=4096,scores_th=0.1,n_limit=8000)
+    @torch.inference_mode()
+    def extract_alike_kpts(self,img):
+        pred0=self.net(img,sub_pixel=True)
+        return pred0['keypoints']

imcui/third_party/LiftFeat/utils/config.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import os
+import sys
+import numpy as np
+featureboost_config = {
+    "keypoint_dim": 65,
+    "keypoint_encoder": [128, 64, 64],
+    "normal_dim": 192,
+    "normal_encoder": [128, 64, 64],
+    "descriptor_encoder": [64, 64],
+    "descriptor_dim": 64,
+    "Attentional_layers": 3,
+    "last_activation": None,
+    "l2_normalization": None,
+    "output_dim": 64,
+}

imcui/third_party/LiftFeat/utils/depth_anything_wrapper.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import argparse
+import cv2
+import glob
+import matplotlib
+import numpy as np
+import os
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.transforms import Compose
+import sys
+sys.path.append("/home/yepeng_liu/code_python/third_repos/Depth-Anything-V2")
+from depth_anything_v2.dpt_opt import DepthAnythingV2
+from depth_anything_v2.util.transform import Resize, NormalizeImage, PrepareForNet
+import time
+VITS_MODEL_PATH = "/home/yepeng_liu/code_python/third_repos/Depth-Anything-V2/checkpoints/depth_anything_v2_vits.pth"
+VITB_MODEL_PATH = "/home/yepeng_liu/code_python/third_repos/Depth-Anything-V2/checkpoints/depth_anything_v2_vitb.pth"
+VITL_MODEL_PATH = "/home/yepeng_liu/code_python/third_repos/Depth-Anything-V2/checkpoints/depth_anything_v2_vitl.pth"
+model_configs = {
+        "vits": {"encoder": "vits", "features": 64, "out_channels": [48, 96, 192, 384]},
+        "vitb": {
+            "encoder": "vitb",
+            "features": 128,
+            "out_channels": [96, 192, 384, 768],
+        },
+        "vitl": {
+            "encoder": "vitl",
+            "features": 256,
+            "out_channels": [256, 512, 1024, 1024],
+        },
+        "vitg": {
+            "encoder": "vitg",
+            "features": 384,
+            "out_channels": [1536, 1536, 1536, 1536],
+        },
+    }
+class DepthAnythingExtractor(nn.Module):
+    def __init__(self, encoder_type, device, input_size, process_size=(608,800)):
+        super().__init__()
+        self.net = DepthAnythingV2(**model_configs[encoder_type])
+        self.device = device
+        if encoder_type == "vits":
+            print(f"loading {VITS_MODEL_PATH}")
+            self.net.load_state_dict(torch.load(VITS_MODEL_PATH, map_location="cpu"))
+        elif encoder_type == "vitb":
+            print(f"loading {VITB_MODEL_PATH}")
+            self.net.load_state_dict(torch.load(VITB_MODEL_PATH, map_location="cpu"))
+        elif encoder_type == "vitl":
+            print(f"loading {VITL_MODEL_PATH}")
+            self.net.load_state_dict(torch.load(VITL_MODEL_PATH, map_location="cpu"))
+        else:
+            raise RuntimeError("unsupport encoder type")
+        self.net.to(self.device).eval()
+        self.tranform = Compose([
+                Resize(
+                    width=input_size,
+                    height=input_size,
+                    resize_target=False,
+                    keep_aspect_ratio=True,
+                    ensure_multiple_of=14,
+                    resize_method='lower_bound',
+                    image_interpolation_method=cv2.INTER_CUBIC,
+                ),
+                NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+                PrepareForNet(),
+            ])
+        self.process_size=process_size
+        self.input_size=input_size
+    @torch.inference_mode()
+    def infer_image(self,img):
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
+        img = self.tranform({'image': img})['image']
+        img = torch.from_numpy(img).unsqueeze(0)
+        img = img.to(self.device)
+        with torch.no_grad():
+            depth = self.net.forward(img)
+        depth = F.interpolate(depth[:, None], self.process_size, mode="bilinear", align_corners=True)[0, 0]
+        return depth.cpu().numpy()
+    @torch.inference_mode()
+    def compute_normal_map_torch(self, depth_map, scale=1.0):
+        """
+        通过深度图计算法向量 (PyTorch 实现)
+        参数：
+            depth_map (torch.Tensor): 深度图，形状为 (H, W)
+            scale (float): 深度值的比例因子，用于调整深度图中的梯度计算
+        返回：
+            torch.Tensor: 法向量图，形状为 (H, W, 3)
+        """
+        if depth_map.ndim != 2:
+            raise ValueError("输入 depth_map 必须是二维张量。")
+        # 计算深度图的梯度
+        dzdx = torch.diff(depth_map, dim=1, append=depth_map[:, -1:]) * scale
+        dzdy = torch.diff(depth_map, dim=0, append=depth_map[-1:, :]) * scale
+        # 初始化法向量图
+        H, W = depth_map.shape
+        normal_map = torch.zeros((H, W, 3), dtype=depth_map.dtype, device=depth_map.device)
+        normal_map[:, :, 0] = -dzdx  # x 分量
+        normal_map[:, :, 1] = -dzdy  # y 分量
+        normal_map[:, :, 2] = 1.0    # z 分量
+        # 归一化法向量
+        norm = torch.linalg.norm(normal_map, dim=2, keepdim=True)
+        norm = torch.where(norm == 0, torch.tensor(1.0, device=depth_map.device), norm)  # 避免除以零
+        normal_map /= norm
+        return normal_map
+    @torch.inference_mode()
+    def extract(self, img):
+        depth = self.infer_image(img)
+        depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+        depth_t=torch.from_numpy(depth).float().to(self.device)
+        normal_map = self.compute_normal_map_torch(depth_t,1.0)
+        return depth_t,normal_map
+if __name__=="__main__":
+    img_path=os.path.join(os.path.dirname(__file__),'../assert/ref.jpg')
+    img=cv2.imread(img_path)
+    img=cv2.resize(img,(800,608))
+    import pdb;pdb.set_trace()
+    DAExtractor=DepthAnythingExtractor('vitb',torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'),256)
+    depth_t,norm=DAExtractor.extract(img)
+    norm=norm.cpu().numpy()
+    norm=(norm+1)/2*255
+    norm=norm.astype(np.uint8)
+    cv2.imwrite(os.path.join(os.path.dirname(__file__),"norm.png"),norm)
+    start=time.perf_counter()
+    for i in range(20):
+        depth_t,norm=DAExtractor.extract(img)
+    end=time.perf_counter()
+    print(f"cost {end-start} seconds")

imcui/third_party/LiftFeat/utils/featurebooster.py ADDED Viewed

	@@ -0,0 +1,247 @@

+from typing import List
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+def MLP(channels: List[int], do_bn: bool = False) -> nn.Module:
+    """ Multi-layer perceptron """
+    n = len(channels)
+    layers = []
+    for i in range(1, n):
+        layers.append(nn.Linear(channels[i - 1], channels[i]))
+        if i < (n-1):
+            if do_bn:
+                layers.append(nn.BatchNorm1d(channels[i]))
+            layers.append(nn.ReLU())
+    return nn.Sequential(*layers)
+def MLP_no_ReLU(channels: List[int], do_bn: bool = False) -> nn.Module:
+    """ Multi-layer perceptron """
+    n = len(channels)
+    layers = []
+    for i in range(1, n):
+        layers.append(nn.Linear(channels[i - 1], channels[i]))
+        if i < (n-1):
+            if do_bn:
+                layers.append(nn.BatchNorm1d(channels[i]))
+    return nn.Sequential(*layers)
+class KeypointEncoder(nn.Module):
+    """ Encoding of geometric properties using MLP """
+    def __init__(self, keypoint_dim: int, feature_dim: int, layers: List[int], dropout: bool = False, p: float = 0.1) -> None:
+        super().__init__()
+        self.encoder = MLP([keypoint_dim] + layers + [feature_dim])
+        self.use_dropout = dropout
+        self.dropout = nn.Dropout(p=p)
+    def forward(self, kpts):
+        if self.use_dropout:
+            return self.dropout(self.encoder(kpts))
+        return self.encoder(kpts)
+class NormalEncoder(nn.Module):
+    """ Encoding of geometric properties using MLP """
+    def __init__(self, normal_dim: int, feature_dim: int, layers: List[int], dropout: bool = False, p: float = 0.1) -> None:
+        super().__init__()
+        self.encoder = MLP_no_ReLU([normal_dim] + layers + [feature_dim])
+        self.use_dropout = dropout
+        self.dropout = nn.Dropout(p=p)
+    def forward(self, kpts):
+        if self.use_dropout:
+            return self.dropout(self.encoder(kpts))
+        return self.encoder(kpts)
+class DescriptorEncoder(nn.Module):
+    """ Encoding of visual descriptor using MLP """
+    def __init__(self, feature_dim: int, layers: List[int], dropout: bool = False, p: float = 0.1) -> None:
+        super().__init__()
+        self.encoder = MLP([feature_dim] + layers + [feature_dim])
+        self.use_dropout = dropout
+        self.dropout = nn.Dropout(p=p)
+    def forward(self, descs):
+        residual = descs
+        if self.use_dropout:
+            return residual + self.dropout(self.encoder(descs))
+        return residual + self.encoder(descs)
+class AFTAttention(nn.Module):
+    """ Attention-free attention """
+    def __init__(self, d_model: int, dropout: bool = False, p: float = 0.1) -> None:
+        super().__init__()
+        self.dim = d_model
+        self.query = nn.Linear(d_model, d_model)
+        self.key = nn.Linear(d_model, d_model)
+        self.value = nn.Linear(d_model, d_model)
+        self.proj = nn.Linear(d_model, d_model)
+        # self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
+        self.use_dropout = dropout
+        self.dropout = nn.Dropout(p=p)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        residual = x
+        q = self.query(x)
+        k = self.key(x)
+        v = self.value(x)
+        # q = torch.sigmoid(q)
+        k = k.T
+        k = torch.softmax(k, dim=-1)
+        k = k.T
+        kv = (k * v).sum(dim=-2, keepdim=True)
+        x = q * kv
+        x = self.proj(x)
+        if self.use_dropout:
+            x = self.dropout(x)
+        x += residual
+        # x = self.layer_norm(x)
+        return x
+class PositionwiseFeedForward(nn.Module):
+    def __init__(self, feature_dim: int, dropout: bool = False, p: float = 0.1) -> None:
+        super().__init__()
+        self.mlp = MLP([feature_dim, feature_dim*2, feature_dim])
+        # self.layer_norm = nn.LayerNorm(feature_dim, eps=1e-6)
+        self.use_dropout = dropout
+        self.dropout = nn.Dropout(p=p)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        residual = x
+        x = self.mlp(x)
+        if self.use_dropout:
+            x = self.dropout(x)
+        x += residual
+        # x = self.layer_norm(x)
+        return x
+class AttentionalLayer(nn.Module):
+    def __init__(self, feature_dim: int, dropout: bool = False, p: float = 0.1):
+        super().__init__()
+        self.attn = AFTAttention(feature_dim, dropout=dropout, p=p)
+        self.ffn = PositionwiseFeedForward(feature_dim, dropout=dropout, p=p)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # import pdb;pdb.set_trace()
+        x = self.attn(x)
+        x = self.ffn(x)
+        return x
+class AttentionalNN(nn.Module):
+    def __init__(self, feature_dim: int, layer_num: int, dropout: bool = False, p: float = 0.1) -> None:
+        super().__init__()
+        self.layers = nn.ModuleList([
+            AttentionalLayer(feature_dim, dropout=dropout, p=p)
+            for _ in range(layer_num)])
+    def forward(self, desc: torch.Tensor) -> torch.Tensor:
+        for layer in self.layers:
+            desc = layer(desc)
+        return desc
+class FeatureBooster(nn.Module):
+    default_config = {
+        'descriptor_dim': 128,
+        'keypoint_encoder': [32, 64, 128],
+        'Attentional_layers': 3,
+        'last_activation': 'relu',
+        'l2_normalization': True,
+        'output_dim': 128
+    }
+    def __init__(self, config, dropout=False, p=0.1, use_kenc=True, use_normal=True, use_cross=True):
+        super().__init__()
+        self.config = {**self.default_config, **config}
+        self.use_kenc = use_kenc
+        self.use_cross = use_cross
+        self.use_normal = use_normal
+        if use_kenc:
+            self.kenc = KeypointEncoder(self.config['keypoint_dim'], self.config['descriptor_dim'], self.config['keypoint_encoder'], dropout=dropout)
+        if use_normal:
+            self.nenc = NormalEncoder(self.config['normal_dim'], self.config['descriptor_dim'], self.config['normal_encoder'], dropout=dropout)
+        if self.config.get('descriptor_encoder', False):
+            self.denc = DescriptorEncoder(self.config['descriptor_dim'], self.config['descriptor_encoder'], dropout=dropout)
+        else:
+            self.denc = None
+        if self.use_cross:
+            self.attn_proj = AttentionalNN(feature_dim=self.config['descriptor_dim'], layer_num=self.config['Attentional_layers'], dropout=dropout)
+        # self.final_proj = nn.Linear(self.config['descriptor_dim'], self.config['output_dim'])
+        self.use_dropout = dropout
+        self.dropout = nn.Dropout(p=p)
+        # self.layer_norm = nn.LayerNorm(self.config['descriptor_dim'], eps=1e-6)
+        if self.config.get('last_activation', False):
+            if self.config['last_activation'].lower() == 'relu':
+                self.last_activation = nn.ReLU()
+            elif self.config['last_activation'].lower() == 'sigmoid':
+                self.last_activation = nn.Sigmoid()
+            elif self.config['last_activation'].lower() == 'tanh':
+                self.last_activation = nn.Tanh()
+            else:
+                raise Exception('Not supported activation "%s".' % self.config['last_activation'])
+        else:
+            self.last_activation = None
+    def forward(self, desc, kpts, normals):
+        # import pdb;pdb.set_trace()
+        ## Self boosting
+        # Descriptor MLP encoder
+        if self.denc is not None:
+            desc = self.denc(desc)
+        # Geometric MLP encoder
+        if self.use_kenc:
+            desc = desc + self.kenc(kpts)
+            if self.use_dropout:
+                desc = self.dropout(desc)
+        # 法向量特征 encoder
+        if self.use_normal:
+            desc = desc + self.nenc(normals)
+            if self.use_dropout:
+                desc = self.dropout(desc)
+        ## Cross boosting
+        # Multi-layer Transformer network.
+        if self.use_cross:
+            # desc = self.attn_proj(self.layer_norm(desc))
+            desc = self.attn_proj(desc)
+        ## Post processing
+        # Final MLP projection
+        # desc = self.final_proj(desc)
+        if self.last_activation is not None:
+            desc = self.last_activation(desc)
+        # L2 normalization
+        if self.config['l2_normalization']:
+            desc = F.normalize(desc, dim=-1)
+        return desc
+if __name__ == "__main__":
+    from config import t1_featureboost_config
+    fb_net = FeatureBooster(t1_featureboost_config)
+    descs=torch.randn([1900,64])
+    kpts=torch.randn([1900,65])
+    normals=torch.randn([1900,3])
+    import pdb;pdb.set_trace()
+    descs_refine=fb_net(descs,kpts,normals)
+    print(descs_refine.shape)

imcui/third_party/LiftFeat/weights/LiftFeat.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0da33b2216bde964989f3d13e9b9c9cbdd65c98fb05fb4d4771b7d2f3a807c8b
+size 8086947